From 400c94de6700b07d9f3a0fd8ab45d395525e35aa Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Tue, 16 May 2023 17:37:29 -0400 Subject: [PATCH 01/14] use tbb::concurrent_vector for multithreaded use of matrix_cl types --- stan/math/opencl/copy.hpp | 11 +- stan/math/opencl/kernel_cl.hpp | 12 +- stan/math/opencl/matrix_cl.hpp | 29 +++- stan/math/prim/fun/vec_concat.hpp | 6 +- .../math/opencl/rev/normal_lccdf_test.cpp | 126 ++---------------- 5 files changed, 50 insertions(+), 134 deletions(-) diff --git a/stan/math/opencl/copy.hpp b/stan/math/opencl/copy.hpp index 51838ffa171..dd9382683e1 100644 --- a/stan/math/opencl/copy.hpp +++ b/stan/math/opencl/copy.hpp @@ -98,9 +98,10 @@ inline auto from_matrix_cl(const T& src) { try { cl::Event copy_event; const cl::CommandQueue queue = opencl_context.queue(); + std::vector copy_write_events(src.write_events().begin(), src.write_events().end()); queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0, sizeof(T_val) * dst.size(), dst.data(), - &src.write_events(), ©_event); + ©_write_events, ©_event); copy_event.wait(); src.clear_write_events(); } catch (const cl::Error& e) { @@ -151,8 +152,9 @@ inline T_dst from_matrix_cl(const matrix_cl& src) { try { cl::Event copy_event; const cl::CommandQueue queue = opencl_context.queue(); + std::vector copy_write_events(src.write_events().begin(), src.write_events().end()); queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0, - sizeof(T), &dst, &src.write_events(), ©_event); + sizeof(T), &dst, ©_write_events, ©_event); copy_event.wait(); src.clear_write_events(); } catch (const cl::Error& e) { @@ -183,9 +185,10 @@ inline T_dst from_matrix_cl(const matrix_cl& src) { try { cl::Event copy_event; const cl::CommandQueue queue = opencl_context.queue(); + std::vector copy_write_events(src.write_events().begin(), src.write_events().end()); queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0, sizeof(T) * src.rows(), dst.data(), - &src.write_events(), ©_event); + ©_write_events, ©_event); copy_event.wait(); src.clear_write_events(); } catch (const cl::Error& e) { @@ -257,7 +260,7 @@ inline auto packed_copy(const T& src) { packed, src, src.rows(), src.rows(), src.view()); const std::vector mat_events - = vec_concat(packed.read_write_events(), src.write_events()); + = vec_concat(std::vector{}, packed.read_write_events(), src.write_events()); cl::Event copy_event; queue.enqueueReadBuffer(packed.buffer(), opencl_context.in_order(), 0, sizeof(T_val) * packed_size, dst.data(), diff --git a/stan/math/opencl/kernel_cl.hpp b/stan/math/opencl/kernel_cl.hpp index de2706cccd6..d3b28bbe22c 100644 --- a/stan/math/opencl/kernel_cl.hpp +++ b/stan/math/opencl/kernel_cl.hpp @@ -109,17 +109,17 @@ inline void assign_events(const cl::Event& new_event, CallArg& m, * @return A vector of OpenCL events. */ template * = nullptr> -inline std::vector select_events(const T& m) { - return {}; +inline tbb::concurrent_vector select_events(const T& m) { + return tbb::concurrent_vector{}; } template * = nullptr, require_same_t* = nullptr> -inline const std::vector& select_events(const K& m) { +inline const tbb::concurrent_vector& select_events(const K& m) { return m.write_events(); } template * = nullptr, require_any_same_t* = nullptr> -inline std::vector select_events(K& m) { +inline tbb::concurrent_vector select_events(K& m) { static_assert(!std::is_const::value, "Can not write to const matrix_cl!"); return m.read_write_events(); } @@ -205,7 +205,7 @@ struct kernel_cl { opencl_context.register_kernel_cache(&kernel_); } cl::EnqueueArgs eargs(opencl_context.queue(), - vec_concat(internal::select_events(args)...), + vec_concat(std::vector{}, internal::select_events(args)...), global_thread_size); cl::KernelFunctor&...> kernel_functor( kernel_); @@ -232,7 +232,7 @@ struct kernel_cl { opencl_context.register_kernel_cache(&kernel_); } cl::EnqueueArgs eargs(opencl_context.queue(), - vec_concat(internal::select_events(args)...), + vec_concat(std::vector{}, internal::select_events(args)...), global_thread_size, thread_block_size); cl::KernelFunctor&...> kernel_functor( kernel_); diff --git a/stan/math/opencl/matrix_cl.hpp b/stan/math/opencl/matrix_cl.hpp index 0ea5e84ba35..616ea1daf92 100644 --- a/stan/math/opencl/matrix_cl.hpp +++ b/stan/math/opencl/matrix_cl.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -50,8 +51,8 @@ class matrix_cl : public matrix_cl_base { int cols_{0}; // Number of columns. // Holds info on if matrix is a special type matrix_cl_view view_{matrix_cl_view::Entire}; - mutable std::vector write_events_; // Tracks write jobs - mutable std::vector read_events_; // Tracks reads + mutable tbb::concurrent_vector write_events_; // Tracks write jobs + mutable tbb::concurrent_vector read_events_; // Tracks reads public: using Scalar = T; // Underlying type of the matrix @@ -99,7 +100,7 @@ class matrix_cl : public matrix_cl_base { * Get the events from the event stacks. * @return The write event stack. */ - inline const std::vector& write_events() const { + inline const tbb::concurrent_vector& write_events() const { return write_events_; } @@ -107,7 +108,7 @@ class matrix_cl : public matrix_cl_base { * Get the events from the event stacks. * @return The read/write event stack. */ - inline const std::vector& read_events() const { + inline const tbb::concurrent_vector& read_events() const { return read_events_; } @@ -115,7 +116,7 @@ class matrix_cl : public matrix_cl_base { * Get the events from the event stacks. * @return The read/write event stack. */ - inline const std::vector read_write_events() const { + inline const tbb::concurrent_vector read_write_events() const { return vec_concat(this->read_events(), this->write_events()); } @@ -615,15 +616,29 @@ class matrix_cl : public matrix_cl_base { * @param A matrix_cl */ void initialize_buffer_cl(const matrix_cl& A) { + cl::Event cstr_event; + std::vector* dep_events = + new std::vector(A.write_events().begin(), + A.write_events().end()); try { - cl::Event cstr_event; opencl_context.queue().enqueueCopyBuffer(A.buffer(), this->buffer(), 0, 0, A.size() * sizeof(T), - &A.write_events(), &cstr_event); + dep_events, &cstr_event); + if (opencl_context.device()[0].getInfo()) { + buffer_cl_.setDestructorCallback( + &delete_it_destructor>, dep_events); + } else { + cstr_event.setCallback(CL_COMPLETE, + &delete_it_event>, dep_events); + } this->add_write_event(cstr_event); A.add_read_event(cstr_event); } catch (const cl::Error& e) { + delete dep_events; check_opencl_error("copy (OpenCL)->(OpenCL)", e); + } catch (...) { + delete dep_events; + throw; } } diff --git a/stan/math/prim/fun/vec_concat.hpp b/stan/math/prim/fun/vec_concat.hpp index ed8a5df0c21..67cd6f5b599 100644 --- a/stan/math/prim/fun/vec_concat.hpp +++ b/stan/math/prim/fun/vec_concat.hpp @@ -37,7 +37,9 @@ inline void append_vectors(VecInOut& x) {} template inline void append_vectors(VecInOut& x, const VecIn& y, const VecArgs&... args) { - x.insert(x.end(), y.begin(), y.end()); + for (auto& yy : y) { + x.push_back(yy); + } append_vectors(x, args...); } } // namespace internal @@ -53,7 +55,7 @@ inline void append_vectors(VecInOut& x, const VecIn& y, */ template inline auto vec_concat(const Vec& v1, const Args&... args) { - std::vector> vec; + Vec vec; vec.reserve(internal::sum_vector_sizes(v1, args...)); internal::append_vectors(vec, v1, args...); return vec; diff --git a/test/unit/math/opencl/rev/normal_lccdf_test.cpp b/test/unit/math/opencl/rev/normal_lccdf_test.cpp index e03341403ed..dbb85275790 100644 --- a/test/unit/math/opencl/rev/normal_lccdf_test.cpp +++ b/test/unit/math/opencl/rev/normal_lccdf_test.cpp @@ -5,139 +5,35 @@ #include #include -TEST(ProbDistributionsNormalLccdf, error_checking) { - int N = 3; - - Eigen::VectorXd y(N); - y << 0.3, 0.8, 1.0; - Eigen::VectorXd y_size(N - 1); - y_size << 0.3, 0.8; - Eigen::VectorXd y_value(N); - y_value << 0.3, NAN, 0.5; - - Eigen::VectorXd mu(N); - mu << 0.3, 0.8, 1.0; - Eigen::VectorXd mu_size(N - 1); - mu_size << 0.3, 0.8; - Eigen::VectorXd mu_value(N); - mu_value << 0.3, -INFINITY, 0.5; - - Eigen::VectorXd sigma(N); - sigma << 0.3, 0.8, 1.0; - Eigen::VectorXd sigma_size(N - 1); - sigma_size << 0.3, 0.8; - Eigen::VectorXd sigma_value(N); - sigma_value << 0.3, 0, 0.5; - - stan::math::matrix_cl y_cl(y); - stan::math::matrix_cl y_size_cl(y_size); - stan::math::matrix_cl y_value_cl(y_value); - stan::math::matrix_cl mu_cl(mu); - stan::math::matrix_cl mu_size_cl(mu_size); - stan::math::matrix_cl mu_value_cl(mu_value); - stan::math::matrix_cl sigma_cl(sigma); - stan::math::matrix_cl sigma_size_cl(sigma_size); - stan::math::matrix_cl sigma_value_cl(sigma_value); - - EXPECT_NO_THROW(stan::math::normal_lccdf(y_cl, mu_cl, sigma_cl)); - - EXPECT_THROW(stan::math::normal_lccdf(y_size_cl, mu_cl, sigma_cl), - std::invalid_argument); - EXPECT_THROW(stan::math::normal_lccdf(y_cl, mu_size_cl, sigma_cl), - std::invalid_argument); - EXPECT_THROW(stan::math::normal_lccdf(y_cl, mu_cl, sigma_size_cl), - std::invalid_argument); - - EXPECT_THROW(stan::math::normal_lccdf(y_value_cl, mu_cl, sigma_cl), - std::domain_error); - EXPECT_THROW(stan::math::normal_lccdf(y_cl, mu_value_cl, sigma_cl), - std::domain_error); - EXPECT_THROW(stan::math::normal_lccdf(y_cl, mu_cl, sigma_value_cl), - std::domain_error); -} auto normal_lccdf_functor = [](const auto& y, const auto& mu, const auto& sigma) { return stan::math::normal_lccdf(y, mu, sigma); }; -TEST(ProbDistributionsNormalLccdf, opencl_matches_cpu_small) { - int N = 3; - int M = 2; - Eigen::VectorXd y(N); - y << 0.3, 0.8, 1.0; - Eigen::VectorXd mu(N); - mu << -0.3, -0.8, 1.01; - Eigen::VectorXd sigma(N); - sigma << 0.3, 0.1, 1.0; - - stan::math::test::compare_cpu_opencl_prim_rev(normal_lccdf_functor, y, mu, - sigma); - stan::math::test::compare_cpu_opencl_prim_rev( - normal_lccdf_functor, y.transpose().eval(), mu.transpose().eval(), - sigma.transpose().eval()); -} - -TEST(ProbDistributionsNormalLccdf, opencl_broadcast_y) { - int N = 3; - - double y_scal = 12.3; - Eigen::VectorXd mu(N); - mu << 0.5, 1.2, 1.0; - Eigen::VectorXd sigma(N); - sigma << 0.3, 0.8, 1.0; - - stan::math::test::test_opencl_broadcasting_prim_rev<0>(normal_lccdf_functor, - y_scal, mu, sigma); - stan::math::test::test_opencl_broadcasting_prim_rev<0>( - normal_lccdf_functor, y_scal, mu.transpose().eval(), sigma); -} - -TEST(ProbDistributionsNormalLccdf, opencl_broadcast_mu) { - int N = 3; - - Eigen::VectorXd y(N); - y << 0.3, 0.8, 1.0; - double mu_scal = 12.3; - Eigen::VectorXd sigma(N); - sigma << 0.3, 0.8, 1.0; - - stan::math::test::test_opencl_broadcasting_prim_rev<1>(normal_lccdf_functor, - y, mu_scal, sigma); - stan::math::test::test_opencl_broadcasting_prim_rev<1>( - normal_lccdf_functor, y.transpose().eval(), mu_scal, sigma); -} - -TEST(ProbDistributionsNormalLccdf, opencl_broadcast_sigma) { - int N = 3; - - Eigen::VectorXd y(N); - y << 0.3, 0.8, 1.0; - Eigen::VectorXd mu(N); - mu << 0.3, 0.8, 1.0; - double sigma_scal = 12.3; - - stan::math::test::test_opencl_broadcasting_prim_rev<2>(normal_lccdf_functor, - y, mu, sigma_scal); - stan::math::test::test_opencl_broadcasting_prim_rev<2>( - normal_lccdf_functor, y.transpose().eval(), mu, sigma_scal); -} TEST(ProbDistributionsNormalLccdf, opencl_matches_cpu_big) { int N = 153; - Eigen::Matrix y - = Eigen::Array::Random(N, 1).abs(); +std::srand(123); +for (int i = 0; i < 10; ++i) { Eigen::Matrix mu - = Eigen::Array::Random(N, 1).abs(); + = Eigen::Array::Random(N, 1) + 1.0; Eigen::Matrix sigma = Eigen::Array::Random(N, 1).abs() + 0.01; - + Eigen::Matrix y = (mu.array() * sigma.array()).matrix(); + std::cout << "Iter: " << i << " mu, sigma, y" << std::endl; + for (int j = 0; j < N; j++) { + std::cout << mu(j) << ", " << sigma(j) << ", " << y(j) << std::endl; + } + std::cout << "-----------compare_cpu_opencl_prim_rev" << std::endl; stan::math::test::compare_cpu_opencl_prim_rev(normal_lccdf_functor, y, mu, sigma); + std::cout << "-----------compare_cpu_opencl_prim_rev transpose" << std::endl; stan::math::test::compare_cpu_opencl_prim_rev( normal_lccdf_functor, y.transpose().eval(), mu.transpose().eval(), sigma.transpose().eval()); } +} #endif From 4e1f542e84b59dc7d2964444cb528062f6047a9c Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Wed, 17 May 2023 10:14:22 -0400 Subject: [PATCH 02/14] Fix normal_lccdf for opencl so that LOG_HALF is added N times to the result --- stan/math/opencl/prim/normal_lccdf.hpp | 6 +- .../math/opencl/rev/normal_lccdf_test.cpp | 193 ++++++++++++++++-- 2 files changed, 175 insertions(+), 24 deletions(-) diff --git a/stan/math/opencl/prim/normal_lccdf.hpp b/stan/math/opencl/prim/normal_lccdf.hpp index e9e7f97c079..68ce6bdbb20 100644 --- a/stan/math/opencl/prim/normal_lccdf.hpp +++ b/stan/math/opencl/prim/normal_lccdf.hpp @@ -64,12 +64,12 @@ return_type_t normal_lccdf( auto sigma_positive_expr = 0 < sigma_val; auto scaled_diff = elt_divide(y_val - mu_val, sigma_val * SQRT_TWO); - auto one_m_erf = select( + matrix_cl one_m_erf = select( scaled_diff < -37.5 * INV_SQRT_TWO, 2.0, select(scaled_diff < -5.0 * INV_SQRT_TWO, 2.0 - erfc(-scaled_diff), select(scaled_diff > 8.25 * INV_SQRT_TWO, 0.0, 1.0 - erf(scaled_diff)))); - auto lccdf_expr = colwise_sum(log(one_m_erf)); + auto lccdf_expr = log(one_m_erf); auto mu_deriv = select(scaled_diff > 8.25 * INV_SQRT_TWO, INFTY, SQRT_TWO_OVER_SQRT_PI * elt_divide(exp(-square(scaled_diff)), @@ -89,7 +89,7 @@ return_type_t normal_lccdf( calc_if::value>(mu_deriv), calc_if::value>(sigma_deriv)); - T_partials_return lccdf = LOG_HALF + sum(from_matrix_cl(lccdf_cl)); + T_partials_return lccdf = LOG_HALF * lccdf_cl.size() + sum(from_matrix_cl(lccdf_cl)); auto ops_partials = make_partials_propagator(y_col, mu_col, sigma_col); diff --git a/test/unit/math/opencl/rev/normal_lccdf_test.cpp b/test/unit/math/opencl/rev/normal_lccdf_test.cpp index dbb85275790..000814cb67e 100644 --- a/test/unit/math/opencl/rev/normal_lccdf_test.cpp +++ b/test/unit/math/opencl/rev/normal_lccdf_test.cpp @@ -5,35 +5,186 @@ #include #include +namespace exp_mod_normal_lccdf_test { -auto normal_lccdf_functor - = [](const auto& y, const auto& mu, const auto& sigma) { - return stan::math::normal_lccdf(y, mu, sigma); +TEST(ProbDistributionsDoubleExpModNormalLccdf, error_checking) { + int N = 3; + + Eigen::VectorXd y(N); + y << 0.3, 0.8, 1.0; + Eigen::VectorXd y_size(N - 1); + y_size << 0.3, 0.8; + Eigen::VectorXd y_value(N); + y_value << 0.3, NAN, 0.5; + + Eigen::VectorXd mu(N); + mu << 0.3, 0.8, 1.0; + Eigen::VectorXd mu_size(N - 1); + mu_size << 0.3, 0.8; + Eigen::VectorXd mu_value(N); + mu_value << 0.3, -INFINITY, 0.5; + + Eigen::VectorXd sigma(N); + sigma << 0.3, 0.8, 1.0; + Eigen::VectorXd sigma_size(N - 1); + sigma_size << 0.3, 0.8; + Eigen::VectorXd sigma_value(N); + sigma_value << 0.3, 0, 0.5; + + Eigen::VectorXd lambda(N); + lambda << 0.4, 0.4, 1.4; + Eigen::VectorXd lambda_size(N - 1); + lambda_size << 0.3, 0.8; + Eigen::VectorXd lambda_value(N); + lambda_value << 0.3, 0, 0.5; + + stan::math::matrix_cl y_cl(y); + stan::math::matrix_cl y_size_cl(y_size); + stan::math::matrix_cl y_value_cl(y_value); + stan::math::matrix_cl mu_cl(mu); + stan::math::matrix_cl mu_size_cl(mu_size); + stan::math::matrix_cl mu_value_cl(mu_value); + stan::math::matrix_cl sigma_cl(sigma); + stan::math::matrix_cl sigma_size_cl(sigma_size); + stan::math::matrix_cl sigma_value_cl(sigma_value); + stan::math::matrix_cl lambda_cl(lambda); + stan::math::matrix_cl lambda_size_cl(lambda_size); + stan::math::matrix_cl lambda_value_cl(lambda_value); + + EXPECT_NO_THROW( + stan::math::exp_mod_normal_lccdf(y_cl, mu_cl, sigma_cl, lambda_cl)); + + EXPECT_THROW( + stan::math::exp_mod_normal_lccdf(y_size_cl, mu_cl, sigma_cl, lambda_cl), + std::invalid_argument); + EXPECT_THROW( + stan::math::exp_mod_normal_lccdf(y_cl, mu_size_cl, sigma_cl, lambda_cl), + std::invalid_argument); + EXPECT_THROW( + stan::math::exp_mod_normal_lccdf(y_cl, mu_cl, sigma_size_cl, lambda_cl), + std::invalid_argument); + EXPECT_THROW( + stan::math::exp_mod_normal_lccdf(y_cl, mu_cl, sigma_cl, lambda_size_cl), + std::invalid_argument); + + EXPECT_THROW( + stan::math::exp_mod_normal_lccdf(y_value_cl, mu_cl, sigma_cl, lambda_cl), + std::domain_error); + EXPECT_THROW( + stan::math::exp_mod_normal_lccdf(y_cl, mu_value_cl, sigma_cl, lambda_cl), + std::domain_error); + EXPECT_THROW( + stan::math::exp_mod_normal_lccdf(y_cl, mu_cl, sigma_value_cl, lambda_cl), + std::domain_error); + EXPECT_THROW( + stan::math::exp_mod_normal_lccdf(y_cl, mu_cl, sigma_cl, lambda_value_cl), + std::domain_error); +} + +auto exp_mod_normal_lccdf_functor + = [](const auto& y, const auto& mu, const auto& sigma, const auto& lambda) { + return stan::math::exp_mod_normal_lccdf(y, mu, sigma, lambda); }; +TEST(ProbDistributionsDoubleExpModNormalLccdf, opencl_matches_cpu_small) { + int N = 3; + int M = 2; + + Eigen::VectorXd y(N); + y << -0.3, 1.8, 1.4; + Eigen::VectorXd mu(N); + mu << 0.3, 0.8, 1.0; + Eigen::VectorXd sigma(N); + sigma << 0.3, 0.8, 1.0; + Eigen::VectorXd lambda(N); + lambda << 0.3, 0.4, 1.1; + + stan::math::test::compare_cpu_opencl_prim_rev(exp_mod_normal_lccdf_functor, y, + mu, sigma, lambda); + stan::math::test::compare_cpu_opencl_prim_rev( + exp_mod_normal_lccdf_functor, y.transpose().eval(), mu.transpose().eval(), + sigma.transpose().eval(), lambda.transpose().eval()); +} + +TEST(ProbDistributionsDoubleExpModNormalLccdf, + opencl_matches_cpu_small_y_pos_inf) { + int N = 3; + int M = 2; + Eigen::VectorXd y(N); + y << -0.3, 1.8, INFINITY; + Eigen::VectorXd mu(N); + mu << 0.3, 0.8, 1.0; + Eigen::VectorXd sigma(N); + sigma << 0.3, 0.8, 1.0; + Eigen::VectorXd lambda(N); + lambda << 0.3, 0.4, 1.1; -TEST(ProbDistributionsNormalLccdf, opencl_matches_cpu_big) { + stan::math::test::compare_cpu_opencl_prim_rev(exp_mod_normal_lccdf_functor, y, + mu, sigma, lambda); + stan::math::test::compare_cpu_opencl_prim_rev( + exp_mod_normal_lccdf_functor, y.transpose().eval(), mu.transpose().eval(), + sigma.transpose().eval(), lambda.transpose().eval()); +} + +TEST(ProbDistributionsDoubleExpModNormalLccdf, + opencl_matches_cpu_small_y_neg_inf) { + int N = 3; + int M = 2; + + Eigen::VectorXd y(N); + y << -0.3, 1.8, -INFINITY; + Eigen::VectorXd mu(N); + mu << 0.3, 0.8, 1.0; + Eigen::VectorXd sigma(N); + sigma << 0.3, 0.8, 1.0; + Eigen::VectorXd lambda(N); + lambda << 0.3, 0.4, 1.1; + + stan::math::test::compare_cpu_opencl_prim_rev(exp_mod_normal_lccdf_functor, y, + mu, sigma, lambda); + stan::math::test::compare_cpu_opencl_prim_rev( + exp_mod_normal_lccdf_functor, y.transpose().eval(), mu.transpose().eval(), + sigma.transpose().eval(), lambda.transpose().eval()); +} + +TEST(ProbDistributionsDoubleExpModNormalLccdf, opencl_broadcast_y) { + int N = 3; + + double y_scal = 12.3; + Eigen::VectorXd mu(N); + mu << 0.5, 1.2, 1.0; + Eigen::VectorXd sigma(N); + sigma << 0.3, 0.8, 1.0; + Eigen::VectorXd lambda(N); + lambda << 0.3, 0.4, 1.1; + + stan::math::test::test_opencl_broadcasting_prim_rev<0>( + exp_mod_normal_lccdf_functor, y_scal, mu, sigma, lambda); + stan::math::test::test_opencl_broadcasting_prim_rev<0>( + exp_mod_normal_lccdf_functor, y_scal, mu.transpose().eval(), sigma, + lambda.transpose().eval()); +} + +TEST(ProbDistributionsDoubleExpModNormalLccdf, opencl_matches_cpu_big) { int N = 153; -std::srand(123); -for (int i = 0; i < 10; ++i) { + Eigen::Matrix y + = Eigen::Array::Random(N, 1).abs(); Eigen::Matrix mu - = Eigen::Array::Random(N, 1) + 1.0; + = Eigen::Array::Random(N, 1).abs(); Eigen::Matrix sigma - = Eigen::Array::Random(N, 1).abs() + 0.01; - Eigen::Matrix y = (mu.array() * sigma.array()).matrix(); - std::cout << "Iter: " << i << " mu, sigma, y" << std::endl; - for (int j = 0; j < N; j++) { - std::cout << mu(j) << ", " << sigma(j) << ", " << y(j) << std::endl; - } - std::cout << "-----------compare_cpu_opencl_prim_rev" << std::endl; - stan::math::test::compare_cpu_opencl_prim_rev(normal_lccdf_functor, y, mu, - sigma); - std::cout << "-----------compare_cpu_opencl_prim_rev transpose" << std::endl; + = Eigen::Array::Random(N, 1).abs().array() + + 0.1; + Eigen::Matrix lambda + = Eigen::Array::Random(N, 1).abs(); + + stan::math::test::compare_cpu_opencl_prim_rev(exp_mod_normal_lccdf_functor, y, + mu, sigma, lambda); stan::math::test::compare_cpu_opencl_prim_rev( - normal_lccdf_functor, y.transpose().eval(), mu.transpose().eval(), - sigma.transpose().eval()); -} + exp_mod_normal_lccdf_functor, y.transpose().eval(), mu.transpose().eval(), + sigma.transpose().eval(), lambda.transpose().eval()); } -#endif +} // namespace exp_mod_normal_lccdf_test + +#endif \ No newline at end of file From 670ca4013a594f2bbc56d378a6e0226a74589262 Mon Sep 17 00:00:00 2001 From: Stan Jenkins Date: Wed, 17 May 2023 13:17:29 -0400 Subject: [PATCH 03/14] [Jenkins] auto-formatting by clang-format version 10.0.0-4ubuntu1 --- stan/math/opencl/copy.hpp | 12 ++++++++---- stan/math/opencl/kernel_cl.hpp | 6 ++++-- stan/math/opencl/matrix_cl.hpp | 15 +++++++-------- stan/math/opencl/prim/normal_lccdf.hpp | 3 ++- 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/stan/math/opencl/copy.hpp b/stan/math/opencl/copy.hpp index dd9382683e1..c487fab30b1 100644 --- a/stan/math/opencl/copy.hpp +++ b/stan/math/opencl/copy.hpp @@ -98,7 +98,8 @@ inline auto from_matrix_cl(const T& src) { try { cl::Event copy_event; const cl::CommandQueue queue = opencl_context.queue(); - std::vector copy_write_events(src.write_events().begin(), src.write_events().end()); + std::vector copy_write_events(src.write_events().begin(), + src.write_events().end()); queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0, sizeof(T_val) * dst.size(), dst.data(), ©_write_events, ©_event); @@ -152,7 +153,8 @@ inline T_dst from_matrix_cl(const matrix_cl& src) { try { cl::Event copy_event; const cl::CommandQueue queue = opencl_context.queue(); - std::vector copy_write_events(src.write_events().begin(), src.write_events().end()); + std::vector copy_write_events(src.write_events().begin(), + src.write_events().end()); queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0, sizeof(T), &dst, ©_write_events, ©_event); copy_event.wait(); @@ -185,7 +187,8 @@ inline T_dst from_matrix_cl(const matrix_cl& src) { try { cl::Event copy_event; const cl::CommandQueue queue = opencl_context.queue(); - std::vector copy_write_events(src.write_events().begin(), src.write_events().end()); + std::vector copy_write_events(src.write_events().begin(), + src.write_events().end()); queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0, sizeof(T) * src.rows(), dst.data(), ©_write_events, ©_event); @@ -260,7 +263,8 @@ inline auto packed_copy(const T& src) { packed, src, src.rows(), src.rows(), src.view()); const std::vector mat_events - = vec_concat(std::vector{}, packed.read_write_events(), src.write_events()); + = vec_concat(std::vector{}, packed.read_write_events(), + src.write_events()); cl::Event copy_event; queue.enqueueReadBuffer(packed.buffer(), opencl_context.in_order(), 0, sizeof(T_val) * packed_size, dst.data(), diff --git a/stan/math/opencl/kernel_cl.hpp b/stan/math/opencl/kernel_cl.hpp index d3b28bbe22c..f21be61ce35 100644 --- a/stan/math/opencl/kernel_cl.hpp +++ b/stan/math/opencl/kernel_cl.hpp @@ -205,7 +205,8 @@ struct kernel_cl { opencl_context.register_kernel_cache(&kernel_); } cl::EnqueueArgs eargs(opencl_context.queue(), - vec_concat(std::vector{}, internal::select_events(args)...), + vec_concat(std::vector{}, + internal::select_events(args)...), global_thread_size); cl::KernelFunctor&...> kernel_functor( kernel_); @@ -232,7 +233,8 @@ struct kernel_cl { opencl_context.register_kernel_cache(&kernel_); } cl::EnqueueArgs eargs(opencl_context.queue(), - vec_concat(std::vector{}, internal::select_events(args)...), + vec_concat(std::vector{}, + internal::select_events(args)...), global_thread_size, thread_block_size); cl::KernelFunctor&...> kernel_functor( kernel_); diff --git a/stan/math/opencl/matrix_cl.hpp b/stan/math/opencl/matrix_cl.hpp index 616ea1daf92..364b09553c8 100644 --- a/stan/math/opencl/matrix_cl.hpp +++ b/stan/math/opencl/matrix_cl.hpp @@ -617,19 +617,18 @@ class matrix_cl : public matrix_cl_base { */ void initialize_buffer_cl(const matrix_cl& A) { cl::Event cstr_event; - std::vector* dep_events = - new std::vector(A.write_events().begin(), - A.write_events().end()); + std::vector* dep_events = new std::vector( + A.write_events().begin(), A.write_events().end()); try { opencl_context.queue().enqueueCopyBuffer(A.buffer(), this->buffer(), 0, 0, - A.size() * sizeof(T), - dep_events, &cstr_event); + A.size() * sizeof(T), dep_events, + &cstr_event); if (opencl_context.device()[0].getInfo()) { buffer_cl_.setDestructorCallback( - &delete_it_destructor>, dep_events); + &delete_it_destructor>, dep_events); } else { - cstr_event.setCallback(CL_COMPLETE, - &delete_it_event>, dep_events); + cstr_event.setCallback( + CL_COMPLETE, &delete_it_event>, dep_events); } this->add_write_event(cstr_event); A.add_read_event(cstr_event); diff --git a/stan/math/opencl/prim/normal_lccdf.hpp b/stan/math/opencl/prim/normal_lccdf.hpp index 68ce6bdbb20..2d6efaac977 100644 --- a/stan/math/opencl/prim/normal_lccdf.hpp +++ b/stan/math/opencl/prim/normal_lccdf.hpp @@ -89,7 +89,8 @@ return_type_t normal_lccdf( calc_if::value>(mu_deriv), calc_if::value>(sigma_deriv)); - T_partials_return lccdf = LOG_HALF * lccdf_cl.size() + sum(from_matrix_cl(lccdf_cl)); + T_partials_return lccdf + = LOG_HALF * lccdf_cl.size() + sum(from_matrix_cl(lccdf_cl)); auto ops_partials = make_partials_propagator(y_col, mu_col, sigma_col); From 5069528b66f71e3d0def1ce33664c04a64f67527 Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Wed, 17 May 2023 13:57:09 -0400 Subject: [PATCH 04/14] new line --- stan/math/opencl/opencl_context.hpp | 3 ++- test/unit/math/opencl/rev/normal_lccdf_test.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/stan/math/opencl/opencl_context.hpp b/stan/math/opencl/opencl_context.hpp index 764051a5737..3abbbb7bb2c 100644 --- a/stan/math/opencl/opencl_context.hpp +++ b/stan/math/opencl/opencl_context.hpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -207,7 +208,7 @@ class opencl_context_base { * The API to access the methods and values in opencl_context_base */ class opencl_context { - std::vector kernel_caches_; + tbb::concurrent_vector kernel_caches_; public: opencl_context() = default; diff --git a/test/unit/math/opencl/rev/normal_lccdf_test.cpp b/test/unit/math/opencl/rev/normal_lccdf_test.cpp index 000814cb67e..677ce2a2f89 100644 --- a/test/unit/math/opencl/rev/normal_lccdf_test.cpp +++ b/test/unit/math/opencl/rev/normal_lccdf_test.cpp @@ -187,4 +187,4 @@ TEST(ProbDistributionsDoubleExpModNormalLccdf, opencl_matches_cpu_big) { } } // namespace exp_mod_normal_lccdf_test -#endif \ No newline at end of file +#endif From 48d79e76ddd33f9819355fd99f71f3a538995ea9 Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Mon, 24 Jul 2023 09:57:33 -0400 Subject: [PATCH 05/14] fix opencl normal_lccdf test copy / paste error --- .../math/opencl/rev/normal_lccdf_test.cpp | 163 +++++++----------- 1 file changed, 58 insertions(+), 105 deletions(-) diff --git a/test/unit/math/opencl/rev/normal_lccdf_test.cpp b/test/unit/math/opencl/rev/normal_lccdf_test.cpp index 677ce2a2f89..d841b64e04b 100644 --- a/test/unit/math/opencl/rev/normal_lccdf_test.cpp +++ b/test/unit/math/opencl/rev/normal_lccdf_test.cpp @@ -5,9 +5,7 @@ #include #include -namespace exp_mod_normal_lccdf_test { - -TEST(ProbDistributionsDoubleExpModNormalLccdf, error_checking) { +TEST(ProbDistributionsNormalLccdf, error_checking) { int N = 3; Eigen::VectorXd y(N); @@ -31,13 +29,6 @@ TEST(ProbDistributionsDoubleExpModNormalLccdf, error_checking) { Eigen::VectorXd sigma_value(N); sigma_value << 0.3, 0, 0.5; - Eigen::VectorXd lambda(N); - lambda << 0.4, 0.4, 1.4; - Eigen::VectorXd lambda_size(N - 1); - lambda_size << 0.3, 0.8; - Eigen::VectorXd lambda_value(N); - lambda_value << 0.3, 0, 0.5; - stan::math::matrix_cl y_cl(y); stan::math::matrix_cl y_size_cl(y_size); stan::math::matrix_cl y_value_cl(y_value); @@ -47,126 +38,93 @@ TEST(ProbDistributionsDoubleExpModNormalLccdf, error_checking) { stan::math::matrix_cl sigma_cl(sigma); stan::math::matrix_cl sigma_size_cl(sigma_size); stan::math::matrix_cl sigma_value_cl(sigma_value); - stan::math::matrix_cl lambda_cl(lambda); - stan::math::matrix_cl lambda_size_cl(lambda_size); - stan::math::matrix_cl lambda_value_cl(lambda_value); - - EXPECT_NO_THROW( - stan::math::exp_mod_normal_lccdf(y_cl, mu_cl, sigma_cl, lambda_cl)); - - EXPECT_THROW( - stan::math::exp_mod_normal_lccdf(y_size_cl, mu_cl, sigma_cl, lambda_cl), - std::invalid_argument); - EXPECT_THROW( - stan::math::exp_mod_normal_lccdf(y_cl, mu_size_cl, sigma_cl, lambda_cl), - std::invalid_argument); - EXPECT_THROW( - stan::math::exp_mod_normal_lccdf(y_cl, mu_cl, sigma_size_cl, lambda_cl), - std::invalid_argument); - EXPECT_THROW( - stan::math::exp_mod_normal_lccdf(y_cl, mu_cl, sigma_cl, lambda_size_cl), - std::invalid_argument); - - EXPECT_THROW( - stan::math::exp_mod_normal_lccdf(y_value_cl, mu_cl, sigma_cl, lambda_cl), - std::domain_error); - EXPECT_THROW( - stan::math::exp_mod_normal_lccdf(y_cl, mu_value_cl, sigma_cl, lambda_cl), - std::domain_error); - EXPECT_THROW( - stan::math::exp_mod_normal_lccdf(y_cl, mu_cl, sigma_value_cl, lambda_cl), - std::domain_error); - EXPECT_THROW( - stan::math::exp_mod_normal_lccdf(y_cl, mu_cl, sigma_cl, lambda_value_cl), - std::domain_error); + + EXPECT_NO_THROW(stan::math::normal_lccdf(y_cl, mu_cl, sigma_cl)); + + EXPECT_THROW(stan::math::normal_lccdf(y_size_cl, mu_cl, sigma_cl), + std::invalid_argument); + EXPECT_THROW(stan::math::normal_lccdf(y_cl, mu_size_cl, sigma_cl), + std::invalid_argument); + EXPECT_THROW(stan::math::normal_lccdf(y_cl, mu_cl, sigma_size_cl), + std::invalid_argument); + + EXPECT_THROW(stan::math::normal_lccdf(y_value_cl, mu_cl, sigma_cl), + std::domain_error); + EXPECT_THROW(stan::math::normal_lccdf(y_cl, mu_value_cl, sigma_cl), + std::domain_error); + EXPECT_THROW(stan::math::normal_lccdf(y_cl, mu_cl, sigma_value_cl), + std::domain_error); } -auto exp_mod_normal_lccdf_functor - = [](const auto& y, const auto& mu, const auto& sigma, const auto& lambda) { - return stan::math::exp_mod_normal_lccdf(y, mu, sigma, lambda); +auto normal_lccdf_functor + = [](const auto& y, const auto& mu, const auto& sigma) { + return stan::math::normal_lccdf(y, mu, sigma); }; -TEST(ProbDistributionsDoubleExpModNormalLccdf, opencl_matches_cpu_small) { +TEST(ProbDistributionsNormalLccdf, opencl_matches_cpu_small) { int N = 3; int M = 2; Eigen::VectorXd y(N); - y << -0.3, 1.8, 1.4; + y << 0.3, 0.8, 1.0; Eigen::VectorXd mu(N); - mu << 0.3, 0.8, 1.0; + mu << -0.3, -0.8, 1.01; Eigen::VectorXd sigma(N); - sigma << 0.3, 0.8, 1.0; - Eigen::VectorXd lambda(N); - lambda << 0.3, 0.4, 1.1; + sigma << 0.3, 0.1, 1.0; - stan::math::test::compare_cpu_opencl_prim_rev(exp_mod_normal_lccdf_functor, y, - mu, sigma, lambda); + stan::math::test::compare_cpu_opencl_prim_rev(normal_lccdf_functor, y, mu, + sigma); stan::math::test::compare_cpu_opencl_prim_rev( - exp_mod_normal_lccdf_functor, y.transpose().eval(), mu.transpose().eval(), - sigma.transpose().eval(), lambda.transpose().eval()); + normal_lccdf_functor, y.transpose().eval(), mu.transpose().eval(), + sigma.transpose().eval()); } -TEST(ProbDistributionsDoubleExpModNormalLccdf, - opencl_matches_cpu_small_y_pos_inf) { +TEST(ProbDistributionsNormalLccdf, opencl_broadcast_y) { int N = 3; - int M = 2; - Eigen::VectorXd y(N); - y << -0.3, 1.8, INFINITY; + double y_scal = 12.3; Eigen::VectorXd mu(N); - mu << 0.3, 0.8, 1.0; + mu << 0.5, 1.2, 1.0; Eigen::VectorXd sigma(N); sigma << 0.3, 0.8, 1.0; - Eigen::VectorXd lambda(N); - lambda << 0.3, 0.4, 1.1; - stan::math::test::compare_cpu_opencl_prim_rev(exp_mod_normal_lccdf_functor, y, - mu, sigma, lambda); - stan::math::test::compare_cpu_opencl_prim_rev( - exp_mod_normal_lccdf_functor, y.transpose().eval(), mu.transpose().eval(), - sigma.transpose().eval(), lambda.transpose().eval()); + stan::math::test::test_opencl_broadcasting_prim_rev<0>(normal_lccdf_functor, + y_scal, mu, sigma); + stan::math::test::test_opencl_broadcasting_prim_rev<0>( + normal_lccdf_functor, y_scal, mu.transpose().eval(), sigma); } -TEST(ProbDistributionsDoubleExpModNormalLccdf, - opencl_matches_cpu_small_y_neg_inf) { +TEST(ProbDistributionsNormalLccdf, opencl_broadcast_mu) { int N = 3; - int M = 2; Eigen::VectorXd y(N); - y << -0.3, 1.8, -INFINITY; - Eigen::VectorXd mu(N); - mu << 0.3, 0.8, 1.0; + y << 0.3, 0.8, 1.0; + double mu_scal = 12.3; Eigen::VectorXd sigma(N); sigma << 0.3, 0.8, 1.0; - Eigen::VectorXd lambda(N); - lambda << 0.3, 0.4, 1.1; - stan::math::test::compare_cpu_opencl_prim_rev(exp_mod_normal_lccdf_functor, y, - mu, sigma, lambda); - stan::math::test::compare_cpu_opencl_prim_rev( - exp_mod_normal_lccdf_functor, y.transpose().eval(), mu.transpose().eval(), - sigma.transpose().eval(), lambda.transpose().eval()); + stan::math::test::test_opencl_broadcasting_prim_rev<1>(normal_lccdf_functor, + y, mu_scal, sigma); + stan::math::test::test_opencl_broadcasting_prim_rev<1>( + normal_lccdf_functor, y.transpose().eval(), mu_scal, sigma); } -TEST(ProbDistributionsDoubleExpModNormalLccdf, opencl_broadcast_y) { +TEST(ProbDistributionsNormalLccdf, opencl_broadcast_sigma) { int N = 3; - double y_scal = 12.3; + Eigen::VectorXd y(N); + y << 0.3, 0.8, 1.0; Eigen::VectorXd mu(N); - mu << 0.5, 1.2, 1.0; - Eigen::VectorXd sigma(N); - sigma << 0.3, 0.8, 1.0; - Eigen::VectorXd lambda(N); - lambda << 0.3, 0.4, 1.1; + mu << 0.3, 0.8, 1.0; + double sigma_scal = 12.3; - stan::math::test::test_opencl_broadcasting_prim_rev<0>( - exp_mod_normal_lccdf_functor, y_scal, mu, sigma, lambda); - stan::math::test::test_opencl_broadcasting_prim_rev<0>( - exp_mod_normal_lccdf_functor, y_scal, mu.transpose().eval(), sigma, - lambda.transpose().eval()); + stan::math::test::test_opencl_broadcasting_prim_rev<2>(normal_lccdf_functor, + y, mu, sigma_scal); + stan::math::test::test_opencl_broadcasting_prim_rev<2>( + normal_lccdf_functor, y.transpose().eval(), mu, sigma_scal); } -TEST(ProbDistributionsDoubleExpModNormalLccdf, opencl_matches_cpu_big) { +TEST(ProbDistributionsNormalLccdf, opencl_matches_cpu_big) { int N = 153; Eigen::Matrix y @@ -174,17 +132,12 @@ TEST(ProbDistributionsDoubleExpModNormalLccdf, opencl_matches_cpu_big) { Eigen::Matrix mu = Eigen::Array::Random(N, 1).abs(); Eigen::Matrix sigma - = Eigen::Array::Random(N, 1).abs().array() - + 0.1; - Eigen::Matrix lambda - = Eigen::Array::Random(N, 1).abs(); + = Eigen::Array::Random(N, 1).abs() + 0.01; - stan::math::test::compare_cpu_opencl_prim_rev(exp_mod_normal_lccdf_functor, y, - mu, sigma, lambda); + stan::math::test::compare_cpu_opencl_prim_rev(normal_lccdf_functor, y, mu, + sigma); stan::math::test::compare_cpu_opencl_prim_rev( - exp_mod_normal_lccdf_functor, y.transpose().eval(), mu.transpose().eval(), - sigma.transpose().eval(), lambda.transpose().eval()); + normal_lccdf_functor, y.transpose().eval(), mu.transpose().eval(), + sigma.transpose().eval()); } -} // namespace exp_mod_normal_lccdf_test - -#endif +#endif \ No newline at end of file From 7cafa7409035252e653b13569c4c1dfbdfc02e1c Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Mon, 24 Jul 2023 10:15:06 -0400 Subject: [PATCH 06/14] use reference instead of copy for several command queue calls for OpenCL --- stan/math/opencl/copy.hpp | 10 +++++----- stan/math/opencl/opencl_context.hpp | 20 ++++++++++---------- stan/math/opencl/zeros_strict_tri.hpp | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/stan/math/opencl/copy.hpp b/stan/math/opencl/copy.hpp index c487fab30b1..e3599e5c933 100644 --- a/stan/math/opencl/copy.hpp +++ b/stan/math/opencl/copy.hpp @@ -97,7 +97,7 @@ inline auto from_matrix_cl(const T& src) { } else { try { cl::Event copy_event; - const cl::CommandQueue queue = opencl_context.queue(); + const cl::CommandQueue& queue = opencl_context.queue(); std::vector copy_write_events(src.write_events().begin(), src.write_events().end()); queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0, @@ -152,7 +152,7 @@ inline T_dst from_matrix_cl(const matrix_cl& src) { "dst.cols()", 1); try { cl::Event copy_event; - const cl::CommandQueue queue = opencl_context.queue(); + const cl::CommandQueue& queue = opencl_context.queue(); std::vector copy_write_events(src.write_events().begin(), src.write_events().end()); queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0, @@ -186,7 +186,7 @@ inline T_dst from_matrix_cl(const matrix_cl& src) { } try { cl::Event copy_event; - const cl::CommandQueue queue = opencl_context.queue(); + const cl::CommandQueue& queue = opencl_context.queue(); std::vector copy_write_events(src.write_events().begin(), src.write_events().end()); queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0, @@ -257,7 +257,7 @@ inline auto packed_copy(const T& src) { return dst; } try { - const cl::CommandQueue queue = opencl_context.queue(); + const cl::CommandQueue& queue = opencl_context.queue(); matrix_cl packed(packed_size, 1); stan::math::opencl_kernels::pack(cl::NDRange(src.rows(), src.rows()), packed, src, src.rows(), src.rows(), @@ -310,7 +310,7 @@ inline matrix_cl packed_copy(Vec&& src, int rows) { try { matrix_cl packed(packed_size, 1); cl::Event packed_event; - const cl::CommandQueue queue = opencl_context.queue(); + const cl::CommandQueue& queue = opencl_context.queue(); queue.enqueueWriteBuffer( packed.buffer(), opencl_context.in_order() || std::is_rvalue_reference::value, 0, diff --git a/stan/math/opencl/opencl_context.hpp b/stan/math/opencl/opencl_context.hpp index 3abbbb7bb2c..ea0269570d8 100644 --- a/stan/math/opencl/opencl_context.hpp +++ b/stan/math/opencl/opencl_context.hpp @@ -161,7 +161,7 @@ class opencl_context_base { // the device bool in_order_; // Whether to use out of order execution. // Holds Default parameter values for each Kernel. - using map_base_opts = std::map; + using map_base_opts = std::unordered_map; map_base_opts base_opts_ = {{"LOWER", static_cast(matrix_cl_view::Lower)}, {"UPPER", static_cast(matrix_cl_view::Upper)}, @@ -194,7 +194,7 @@ class opencl_context_base { } tuning_opts_; protected: - static opencl_context_base& getInstance() { + static opencl_context_base& getInstance() noexcept { static opencl_context_base instance_; return instance_; } @@ -352,7 +352,7 @@ class opencl_context { * objects. For stan, there should only be one context, queue, device, and * program with multiple kernels. */ - inline cl::Context& context() { + inline cl::Context& context() noexcept { return opencl_context_base::getInstance().context_; } /** \ingroup opencl_context_group @@ -360,13 +360,13 @@ class opencl_context { * One command queue will exist per device where * kernels are placed on the command queue and by default executed in order. */ - inline cl::CommandQueue& queue() { + inline cl::CommandQueue& queue() noexcept { return opencl_context_base::getInstance().command_queue_; } /** \ingroup opencl_context_group * Returns a copy of the map of kernel defines */ - inline opencl_context_base::map_base_opts& base_opts() { + inline opencl_context_base::map_base_opts& base_opts() noexcept { return opencl_context_base::getInstance().base_opts_; } /** \ingroup opencl_context_group @@ -376,35 +376,35 @@ class opencl_context { * max workgroup of 256 would allow thread blocks of sizes (16,16), (128,2), * (8, 32), etc. */ - inline int max_thread_block_size() { + inline int max_thread_block_size() noexcept { return opencl_context_base::getInstance().max_thread_block_size_; } /** \ingroup opencl_context_group * Returns the thread block size for the Cholesky Decompositions L_11. */ - inline opencl_context_base::tuning_struct& tuning_opts() { + inline opencl_context_base::tuning_struct& tuning_opts() noexcept { return opencl_context_base::getInstance().tuning_opts_; } /** \ingroup opencl_context_group * Returns a vector containing the OpenCL device used to create the context */ - inline std::vector& device() { + inline std::vector& device() noexcept { return opencl_context_base::getInstance().device_; } /** \ingroup opencl_context_group * Returns a vector containing the OpenCL platform used to create the context */ - inline std::vector& platform() { + inline std::vector& platform() noexcept { return opencl_context_base::getInstance().platform_; } /** \ingroup opencl_context_group * Return a bool representing whether the write to the OpenCL device are * blocking */ - inline bool& in_order() { + inline bool& in_order() noexcept { return opencl_context_base::getInstance().in_order_; } diff --git a/stan/math/opencl/zeros_strict_tri.hpp b/stan/math/opencl/zeros_strict_tri.hpp index 3ff0b92842a..339f987cb59 100644 --- a/stan/math/opencl/zeros_strict_tri.hpp +++ b/stan/math/opencl/zeros_strict_tri.hpp @@ -44,7 +44,7 @@ inline void matrix_cl::zeros_strict_tri() try { return; } this->view_ = both(this->view_, invert(matrix_view)); - cl::CommandQueue cmdQueue = opencl_context.queue(); + cl::CommandQueue& cmdQueue = opencl_context.queue(); opencl_kernels::fill_strict_tri(cl::NDRange(this->rows(), this->cols()), *this, 0.0, this->rows(), this->cols(), matrix_view); From cab8b8767edf1a490ebb2ecb0a6ddcd25cf46508 Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Mon, 24 Jul 2023 10:15:57 -0400 Subject: [PATCH 07/14] add newline --- test/unit/math/opencl/rev/normal_lccdf_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/math/opencl/rev/normal_lccdf_test.cpp b/test/unit/math/opencl/rev/normal_lccdf_test.cpp index d841b64e04b..e03341403ed 100644 --- a/test/unit/math/opencl/rev/normal_lccdf_test.cpp +++ b/test/unit/math/opencl/rev/normal_lccdf_test.cpp @@ -140,4 +140,4 @@ TEST(ProbDistributionsNormalLccdf, opencl_matches_cpu_big) { normal_lccdf_functor, y.transpose().eval(), mu.transpose().eval(), sigma.transpose().eval()); } -#endif \ No newline at end of file +#endif From 21a1c95d1462e076a38ed9ce5bc3b7ec81009fe5 Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Mon, 24 Jul 2023 10:19:52 -0400 Subject: [PATCH 08/14] update headers for opencl --- stan/math/opencl/opencl_context.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stan/math/opencl/opencl_context.hpp b/stan/math/opencl/opencl_context.hpp index ea0269570d8..e2373df126d 100644 --- a/stan/math/opencl/opencl_context.hpp +++ b/stan/math/opencl/opencl_context.hpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include From b879a6b60c3ee5b8eef5d54568d9303cb858cf26 Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Mon, 24 Jul 2023 10:37:47 -0400 Subject: [PATCH 09/14] use unordered map instead of map --- stan/math/opencl/kernel_cl.hpp | 6 ++-- stan/math/opencl/kernel_generator/append.hpp | 24 ++++++------- .../as_column_vector_or_scalar.hpp | 18 +++++----- .../kernel_generator/block_zero_based.hpp | 18 +++++----- stan/math/opencl/kernel_generator/calc_if.hpp | 8 ++--- .../math/opencl/kernel_generator/check_cl.hpp | 8 ++--- .../kernel_generator/colwise_reduction.hpp | 4 +-- .../math/opencl/kernel_generator/constant.hpp | 4 +-- .../math/opencl/kernel_generator/indexing.hpp | 18 +++++----- stan/math/opencl/kernel_generator/load.hpp | 14 ++++---- .../kernel_generator/multi_result_kernel.hpp | 34 +++++++++---------- .../opencl/kernel_generator/opencl_code.hpp | 10 +++--- .../opencl/kernel_generator/operation_cl.hpp | 18 +++++----- .../kernel_generator/operation_cl_lhs.hpp | 6 ++-- .../kernel_generator/optional_broadcast.hpp | 6 ++-- .../opencl/kernel_generator/reduction_2d.hpp | 4 +-- .../kernel_generator/rowwise_reduction.hpp | 20 +++++------ stan/math/opencl/kernel_generator/scalar.hpp | 4 +-- stan/math/prim/err/check_flag_sundials.hpp | 2 +- stan/math/rev/core/profiling.hpp | 2 +- 20 files changed, 114 insertions(+), 114 deletions(-) diff --git a/stan/math/opencl/kernel_cl.hpp b/stan/math/opencl/kernel_cl.hpp index f21be61ce35..3ddb2c88c4a 100644 --- a/stan/math/opencl/kernel_cl.hpp +++ b/stan/math/opencl/kernel_cl.hpp @@ -135,7 +135,7 @@ inline tbb::concurrent_vector select_events(K& m) { */ inline auto compile_kernel(const char* name, const std::vector& sources, - const std::map& options) { + const std::unordered_map& options) { auto base_opts = opencl_context.base_opts(); for (auto& it : options) { if (base_opts[it.first] > it.second) { @@ -175,7 +175,7 @@ struct kernel_cl { private: const char* name_; std::vector sources_; - std::map opts_; + std::unordered_map opts_; mutable cl::Kernel kernel_; public: @@ -187,7 +187,7 @@ struct kernel_cl { * @param options The values of macros to be passed at compile time. */ kernel_cl(const char* name, std::vector sources, - std::map options = {}) + std::unordered_map options = {}) : name_(name), sources_(std::move(sources)), opts_(std::move(options)) {} /** \ingroup kernel_executor_opencl diff --git a/stan/math/opencl/kernel_generator/append.hpp b/stan/math/opencl/kernel_generator/append.hpp index 9f982cccb57..337bc5764b4 100644 --- a/stan/math/opencl/kernel_generator/append.hpp +++ b/stan/math/opencl/kernel_generator/append.hpp @@ -88,8 +88,8 @@ class append_row_ : public operation_cl, * @return part of kernel with code for this and nested expressions */ inline kernel_parts get_kernel_parts( - std::map& generated, - std::map& generated_all, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name, bool view_handled) const { kernel_parts res{}; @@ -101,7 +101,7 @@ class append_row_ : public operation_cl, true); std::string row_index_name_b = "(" + row_index_name + " - " + var_name_ + "_first_rows)"; - std::map generated_b; + std::unordered_map generated_b; kernel_parts parts_b = this->template get_arg<1>().get_kernel_parts( generated_b, generated_all, name_gen, row_index_name_b, col_index_name, true); @@ -129,14 +129,14 @@ class append_row_ : public operation_cl, * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::map& generated, - std::map& generated_all, + inline void set_args(std::unordered_map& generated, + std::unordered_map& generated_all, cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; this->template get_arg<0>().set_args(generated, generated_all, kernel, arg_num); - std::map generated_b; + std::unordered_map generated_b; this->template get_arg<1>().set_args(generated_b, generated_all, kernel, arg_num); kernel.setArg(arg_num++, this->template get_arg<0>().rows()); @@ -250,8 +250,8 @@ class append_col_ : public operation_cl, * @return part of kernel with code for this and nested expressions */ inline kernel_parts get_kernel_parts( - std::map& generated, - std::map& generated_all, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name, bool view_handled) const { kernel_parts res{}; @@ -263,7 +263,7 @@ class append_col_ : public operation_cl, true); std::string col_index_name_b = "(" + col_index_name + " - " + var_name_ + "_first_cols)"; - std::map generated_b; + std::unordered_map generated_b; kernel_parts parts_b = this->template get_arg<1>().get_kernel_parts( generated_b, generated_all, name_gen, row_index_name, col_index_name_b, true); @@ -291,14 +291,14 @@ class append_col_ : public operation_cl, * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::map& generated, - std::map& generated_all, + inline void set_args(std::unordered_map& generated, + std::unordered_map& generated_all, cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; this->template get_arg<0>().set_args(generated, generated_all, kernel, arg_num); - std::map generated_b; + std::unordered_map generated_b; this->template get_arg<1>().set_args(generated_b, generated_all, kernel, arg_num); kernel.setArg(arg_num++, this->template get_arg<0>().cols()); diff --git a/stan/math/opencl/kernel_generator/as_column_vector_or_scalar.hpp b/stan/math/opencl/kernel_generator/as_column_vector_or_scalar.hpp index cc995636933..580d45ae906 100644 --- a/stan/math/opencl/kernel_generator/as_column_vector_or_scalar.hpp +++ b/stan/math/opencl/kernel_generator/as_column_vector_or_scalar.hpp @@ -71,8 +71,8 @@ class as_column_vector_or_scalar_ * @return part of kernel with code for this and nested expressions */ inline kernel_parts get_kernel_parts( - std::map& generated, - std::map& generated_all, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name, bool view_handled) const { kernel_parts res{}; @@ -82,7 +82,7 @@ class as_column_vector_or_scalar_ std::string row_index_name_arg = row_index_name; std::string col_index_name_arg = col_index_name; modify_argument_indices(row_index_name_arg, col_index_name_arg); - std::map generated2; + std::unordered_map generated2; res = this->template get_arg<0>().get_kernel_parts( generated2, generated_all, name_gen, row_index_name_arg, col_index_name_arg, view_handled); @@ -134,8 +134,8 @@ class as_column_vector_or_scalar_ * @return part of kernel with code for this expressions */ inline kernel_parts get_kernel_parts_lhs( - std::map& generated, - std::map& generated_all, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name) const { if (generated.count(this) == 0) { @@ -145,7 +145,7 @@ class as_column_vector_or_scalar_ std::string row_index_name_arg = row_index_name; std::string col_index_name_arg = col_index_name; modify_argument_indices(row_index_name_arg, col_index_name_arg); - std::map generated2; + std::unordered_map generated2; kernel_parts res = this->template get_arg<0>().get_kernel_parts_lhs( generated2, generated_all, name_gen, row_index_name_arg, col_index_name_arg); @@ -185,12 +185,12 @@ class as_column_vector_or_scalar_ * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::map& generated, - std::map& generated_all, + inline void set_args(std::unordered_map& generated, + std::unordered_map& generated_all, cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; - std::map generated2; + std::unordered_map generated2; this->template get_arg<0>().set_args(generated2, generated_all, kernel, arg_num); if (generated_all.count(this) == 0) { diff --git a/stan/math/opencl/kernel_generator/block_zero_based.hpp b/stan/math/opencl/kernel_generator/block_zero_based.hpp index 97afe96606c..7a11f5eb85b 100644 --- a/stan/math/opencl/kernel_generator/block_zero_based.hpp +++ b/stan/math/opencl/kernel_generator/block_zero_based.hpp @@ -101,8 +101,8 @@ class block_ * @return part of kernel with code for this and nested expressions */ inline kernel_parts get_kernel_parts( - std::map& generated, - std::map& generated_all, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name, bool view_handled) const { kernel_parts res{}; @@ -112,7 +112,7 @@ class block_ std::string row_index_name_arg = row_index_name; std::string col_index_name_arg = col_index_name; modify_argument_indices(row_index_name_arg, col_index_name_arg); - std::map generated2; + std::unordered_map generated2; res = this->template get_arg<0>().get_kernel_parts( generated2, generated_all, name_gen, row_index_name_arg, col_index_name_arg, view_handled); @@ -175,8 +175,8 @@ class block_ * @return part of kernel with code for this expressions */ inline kernel_parts get_kernel_parts_lhs( - std::map& generated, - std::map& generated_all, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name) const { if (generated.count(this) == 0) { @@ -186,7 +186,7 @@ class block_ std::string row_index_name_arg = row_index_name; std::string col_index_name_arg = col_index_name; modify_argument_indices(row_index_name_arg, col_index_name_arg); - std::map generated2; + std::unordered_map generated2; kernel_parts res = this->template get_arg<0>().get_kernel_parts_lhs( generated2, generated_all, name_gen, row_index_name_arg, col_index_name_arg); @@ -226,12 +226,12 @@ class block_ * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::map& generated, - std::map& generated_all, + inline void set_args(std::unordered_map& generated, + std::unordered_map& generated_all, cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; - std::map generated2; + std::unordered_map generated2; this->template get_arg<0>().set_args(generated2, generated_all, kernel, arg_num); if (generated_all.count(this) == 0) { diff --git a/stan/math/opencl/kernel_generator/calc_if.hpp b/stan/math/opencl/kernel_generator/calc_if.hpp index 79475bdef3d..5903912badc 100644 --- a/stan/math/opencl/kernel_generator/calc_if.hpp +++ b/stan/math/opencl/kernel_generator/calc_if.hpp @@ -66,8 +66,8 @@ class calc_if_ */ template kernel_parts get_whole_kernel_parts( - std::map& generated, - std::map& generated_all, name_generator& ng, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& ng, const std::string& row_index_name, const std::string& col_index_name, const T_result& result) const { if (Do_Calculate) { @@ -88,8 +88,8 @@ class calc_if_ * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::map& generated, - std::map& generated_all, + inline void set_args(std::unordered_map& generated, + std::unordered_map& generated_all, cl::Kernel& kernel, int& arg_num) const { if (Do_Calculate) { this->template get_arg<0>().set_args(generated, generated_all, kernel, diff --git a/stan/math/opencl/kernel_generator/check_cl.hpp b/stan/math/opencl/kernel_generator/check_cl.hpp index 2af27c85dc1..4ab50c780b5 100644 --- a/stan/math/opencl/kernel_generator/check_cl.hpp +++ b/stan/math/opencl/kernel_generator/check_cl.hpp @@ -78,8 +78,8 @@ class check_cl_ : public operation_cl_lhs, bool> { * @return part of kernel with code for this and nested expressions */ inline kernel_parts get_kernel_parts_lhs( - std::map& generated, - std::map& generated_all, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name) const { kernel_parts res; @@ -110,8 +110,8 @@ class check_cl_ : public operation_cl_lhs, bool> { * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::map& generated, - std::map& generated_all, + inline void set_args(std::unordered_map& generated, + std::unordered_map& generated_all, cl::Kernel& kernel, int& arg_num) const { generated[this] = ""; arg_.set_args(generated, generated_all, kernel, arg_num); diff --git a/stan/math/opencl/kernel_generator/colwise_reduction.hpp b/stan/math/opencl/kernel_generator/colwise_reduction.hpp index 7a623e68f25..c553fca7297 100644 --- a/stan/math/opencl/kernel_generator/colwise_reduction.hpp +++ b/stan/math/opencl/kernel_generator/colwise_reduction.hpp @@ -96,8 +96,8 @@ class colwise_reduction */ template kernel_parts get_whole_kernel_parts( - std::map& generated, - std::map& generated_all, name_generator& ng, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& ng, const std::string& row_index_name, const std::string& col_index_name, const T_result& result) const { kernel_parts parts = derived().get_kernel_parts( diff --git a/stan/math/opencl/kernel_generator/constant.hpp b/stan/math/opencl/kernel_generator/constant.hpp index 313309e8746..14fb730bc71 100644 --- a/stan/math/opencl/kernel_generator/constant.hpp +++ b/stan/math/opencl/kernel_generator/constant.hpp @@ -80,8 +80,8 @@ class constant_ : public operation_cl, T> { * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::map& generated, - std::map& generated_all, + inline void set_args(std::unordered_map& generated, + std::unordered_map& generated_all, cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; diff --git a/stan/math/opencl/kernel_generator/indexing.hpp b/stan/math/opencl/kernel_generator/indexing.hpp index d57cf612775..93aaa5edc36 100644 --- a/stan/math/opencl/kernel_generator/indexing.hpp +++ b/stan/math/opencl/kernel_generator/indexing.hpp @@ -92,8 +92,8 @@ class indexing_ * @return part of kernel with code for this and nested expressions */ inline kernel_parts get_kernel_parts( - std::map& generated, - std::map& generated_all, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name, bool view_handled) const { kernel_parts res{}; @@ -110,7 +110,7 @@ class indexing_ kernel_parts parts_col_idx = col_index.get_kernel_parts( generated, generated_all, name_gen, row_index_name, col_index_name, view_handled); - std::map generated2; + std::unordered_map generated2; kernel_parts parts_mat = mat.get_kernel_parts( generated2, generated_all, name_gen, row_index.var_name_, col_index.var_name_, false); @@ -134,8 +134,8 @@ class indexing_ * @return part of kernel with code for this expressions */ inline kernel_parts get_kernel_parts_lhs( - std::map& generated, - std::map& generated_all, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name) const { if (generated.count(this) == 0) { @@ -151,7 +151,7 @@ class indexing_ kernel_parts parts_col_idx = col_index.get_kernel_parts(generated, generated_all, name_gen, row_index_name, col_index_name, false); - std::map generated2; + std::unordered_map generated2; kernel_parts parts_mat = mat.get_kernel_parts_lhs(generated2, generated_all, name_gen, row_index.var_name_, col_index.var_name_); @@ -171,8 +171,8 @@ class indexing_ * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::map& generated, - std::map& generated_all, + inline void set_args(std::unordered_map& generated, + std::unordered_map& generated_all, cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; @@ -180,7 +180,7 @@ class indexing_ arg_num); this->template get_arg<2>().set_args(generated, generated_all, kernel, arg_num); - std::map generated2; + std::unordered_map generated2; this->template get_arg<0>().set_args(generated2, generated_all, kernel, arg_num); } diff --git a/stan/math/opencl/kernel_generator/load.hpp b/stan/math/opencl/kernel_generator/load.hpp index 319557959b6..da97abbc405 100644 --- a/stan/math/opencl/kernel_generator/load.hpp +++ b/stan/math/opencl/kernel_generator/load.hpp @@ -68,8 +68,8 @@ class load_ * @return part of kernel with code for this and nested expressions */ inline kernel_parts get_kernel_parts( - std::map& generated, - std::map& generated_all, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name, bool view_handled) const { kernel_parts res{}; @@ -137,8 +137,8 @@ class load_ * @return part of kernel with code for this expressions */ inline kernel_parts get_kernel_parts_lhs( - std::map& generated, - std::map& generated_all, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name) const { if (generated_all.count(&a_) == 0) { @@ -184,8 +184,8 @@ class load_ * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::map& generated, - std::map& generated_all, + inline void set_args(std::unordered_map& generated, + std::unordered_map& generated_all, cl::Kernel& kernel, int& arg_num) const { if (generated_all.count(&a_) == 0) { generated_all[&a_] = ""; @@ -316,7 +316,7 @@ class load_ * @param[in,out] next_id neqt unique id to use */ inline void get_unique_matrix_accesses(std::vector& uids, - std::map& id_map, + std::unordered_map& id_map, int& next_id) const { if (id_map.count(&a_) == 0) { id_map[&a_] = next_id; diff --git a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp index a82d033c5f3..992f1c3157c 100644 --- a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp +++ b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp @@ -33,7 +33,7 @@ template struct multi_result_kernel_internal { template struct inner { - static std::map, cl::Kernel> kernel_cache_; + static std::unordered_map, cl::Kernel> kernel_cache_; using next = typename multi_result_kernel_internal< N - 1, T_results...>::template inner; using T_current_result = std::remove_reference_t< @@ -127,8 +127,8 @@ struct multi_result_kernel_internal { * @return kernel parts for the kernel */ static kernel_parts generate( - std::map& generated, - std::map& generated_all, name_generator& ng, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& ng, const std::string& row_index_name, const std::string& col_index_name, const std::tuple...>& assignment_pairs) { @@ -156,8 +156,8 @@ struct multi_result_kernel_internal { * @param assignment_pairs pairs of result and expression */ static void set_args( - std::map& generated, - std::map& generated_all, cl::Kernel& kernel, + std::unordered_map& generated, + std::unordered_map& generated_all, cl::Kernel& kernel, int& arg_num, const std::tuple...>& assignment_pairs) { @@ -191,7 +191,7 @@ struct multi_result_kernel_internal { * @param assignment_pairs pairs of result and expression */ static void get_unique_matrix_accesses( - std::vector& uids, std::map& id_map, + std::vector& uids, std::unordered_map& id_map, int& next_id, const std::tuple...>& assignment_pairs) { @@ -220,8 +220,8 @@ struct multi_result_kernel_internal<-1, T_results...> { assignment_pairs) {} static kernel_parts generate( - std::map& generated, - std::map& generated_all, name_generator& ng, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& ng, const std::string& row_index_name, const std::string& col_index_name, const std::tuple...>& assignment_pairs) { @@ -229,8 +229,8 @@ struct multi_result_kernel_internal<-1, T_results...> { } static void set_args( - std::map& generated, - std::map& generated_all, cl::Kernel& kernel, + std::unordered_map& generated, + std::unordered_map& generated_all, cl::Kernel& kernel, int& arg_num, const std::tuple...>& assignment_pairs) {} @@ -240,7 +240,7 @@ struct multi_result_kernel_internal<-1, T_results...> { assignment_pairs) {} static void get_unique_matrix_accesses( - std::vector& uids, std::map& id_map, + std::vector& uids, std::unordered_map& id_map, int& next_id, const std::tuple...>& assignment_pairs) {} @@ -249,7 +249,7 @@ struct multi_result_kernel_internal<-1, T_results...> { template template -std::map, cl::Kernel> multi_result_kernel_internal< +std::unordered_map, cl::Kernel> multi_result_kernel_internal< N, T_results...>::inner::kernel_cache_; } // namespace internal @@ -380,8 +380,8 @@ class results_cl { {std::decay_t::Deriv::require_specific_local_size...}); name_generator ng; - std::map generated; - std::map generated_all; + std::unordered_map generated; + std::unordered_map generated_all; kernel_parts parts = impl::generate(generated, generated_all, ng, "i", "j", assignment_pairs); std::string src; @@ -470,7 +470,7 @@ class results_cl { } std::vector uids; - std::map id_map; + std::unordered_map id_map; int next_id = 0; impl::get_unique_matrix_accesses(uids, id_map, next_id, assignment_pairs); @@ -486,8 +486,8 @@ class results_cl { cl::Kernel& kernel = impl::kernel_cache_[uids]; int arg_num = 0; - std::map generated; - std::map generated_all; + std::unordered_map generated; + std::unordered_map generated_all; impl::set_args(generated, generated_all, kernel, arg_num, assignment_pairs); diff --git a/stan/math/opencl/kernel_generator/opencl_code.hpp b/stan/math/opencl/kernel_generator/opencl_code.hpp index 23444da1668..22464b633b1 100644 --- a/stan/math/opencl/kernel_generator/opencl_code.hpp +++ b/stan/math/opencl/kernel_generator/opencl_code.hpp @@ -181,8 +181,8 @@ class opencl_code_ : public operation_cl_base { * @param view_handled whether caller already handled matrix view * @return part of kernel with code for this and nested expressions */ - auto get_kernel_parts(std::map& generated, - std::map& generated_all, + auto get_kernel_parts(std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name, @@ -202,8 +202,8 @@ class opencl_code_ : public operation_cl_base { * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - auto set_args(std::map& generated, - std::map& generated_all, + auto set_args(std::unordered_map& generated, + std::unordered_map& generated_all, cl::Kernel& kernel, int& arg_num) const { return impl_->set_args(generated, generated_all, kernel, arg_num); } @@ -300,7 +300,7 @@ class opencl_code_ : public operation_cl_base { * @param[in,out] next_id neqt unique id to use */ auto get_unique_matrix_accesses(std::vector& uids, - std::map& id_map, + std::unordered_map& id_map, int& next_id) const { return impl_->get_unique_matrix_accesses(uids, id_map, next_id); } diff --git a/stan/math/opencl/kernel_generator/operation_cl.hpp b/stan/math/opencl/kernel_generator/operation_cl.hpp index ef2d4977c97..dbfe0f95b5d 100644 --- a/stan/math/opencl/kernel_generator/operation_cl.hpp +++ b/stan/math/opencl/kernel_generator/operation_cl.hpp @@ -193,8 +193,8 @@ class operation_cl : public operation_cl_base { */ template kernel_parts get_whole_kernel_parts( - std::map& generated, - std::map& generated_all, name_generator& ng, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& ng, const std::string& row_index_name, const std::string& col_index_name, const T_result& result) const { kernel_parts parts = derived().get_kernel_parts( @@ -219,8 +219,8 @@ class operation_cl : public operation_cl_base { * @return part of kernel with code for this and nested expressions */ inline kernel_parts get_kernel_parts( - std::map& generated, - std::map& generated_all, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name, bool view_handled) const { kernel_parts res{}; @@ -231,7 +231,7 @@ class operation_cl : public operation_cl_base { std::string col_index_name_arg = col_index_name; derived().modify_argument_indices(row_index_name_arg, col_index_name_arg); std::array args_parts = index_apply([&](auto... Is) { - std::map generated2; + std::unordered_map generated2; return std::array{this->get_arg().get_kernel_parts( &Derived::modify_argument_indices == &operation_cl::modify_argument_indices @@ -293,8 +293,8 @@ class operation_cl : public operation_cl_base { * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::map& generated, - std::map& generated_all, + inline void set_args(std::unordered_map& generated, + std::unordered_map& generated_all, cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; @@ -304,7 +304,7 @@ class operation_cl : public operation_cl_base { // initializer_list from. Cast to voids avoids warnings about unused // expression. index_apply([&](auto... Is) { - std::map generated2; + std::unordered_map generated2; static_cast(std::initializer_list{ (this->get_arg().set_args( &Derived::modify_argument_indices @@ -434,7 +434,7 @@ class operation_cl : public operation_cl_base { * @param[in,out] next_id neqt unique id to use */ inline void get_unique_matrix_accesses(std::vector& uids, - std::map& id_map, + std::unordered_map& id_map, int& next_id) const { index_apply([&](auto... Is) { static_cast(std::initializer_list{( diff --git a/stan/math/opencl/kernel_generator/operation_cl_lhs.hpp b/stan/math/opencl/kernel_generator/operation_cl_lhs.hpp index 3410dd24eb4..eb0ccc0212e 100644 --- a/stan/math/opencl/kernel_generator/operation_cl_lhs.hpp +++ b/stan/math/opencl/kernel_generator/operation_cl_lhs.hpp @@ -47,8 +47,8 @@ class operation_cl_lhs : public operation_cl, * @return part of kernel with code for this expressions */ inline kernel_parts get_kernel_parts_lhs( - std::map& generated, - std::map& generated_all, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name) const { if (generated.count(this) == 0) { @@ -59,7 +59,7 @@ class operation_cl_lhs : public operation_cl, std::string col_index_name_arg = col_index_name; derived().modify_argument_indices(row_index_name_arg, col_index_name_arg); std::array args_parts = index_apply([&](auto... Is) { - std::map generated2; + std::unordered_map generated2; return std::array{ this->template get_arg().get_kernel_parts_lhs( &Derived::modify_argument_indices diff --git a/stan/math/opencl/kernel_generator/optional_broadcast.hpp b/stan/math/opencl/kernel_generator/optional_broadcast.hpp index 722c0dc661f..0cd3f7fbf90 100644 --- a/stan/math/opencl/kernel_generator/optional_broadcast.hpp +++ b/stan/math/opencl/kernel_generator/optional_broadcast.hpp @@ -100,12 +100,12 @@ class optional_broadcast_ * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::map& generated, - std::map& generated_all, + inline void set_args(std::unordered_map& generated, + std::unordered_map& generated_all, cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; - std::map generated2; + std::unordered_map generated2; this->template get_arg<0>().set_args(generated2, generated_all, kernel, arg_num); if (Colwise) { diff --git a/stan/math/opencl/kernel_generator/reduction_2d.hpp b/stan/math/opencl/kernel_generator/reduction_2d.hpp index ed3131a888b..a7e10bd65c7 100644 --- a/stan/math/opencl/kernel_generator/reduction_2d.hpp +++ b/stan/math/opencl/kernel_generator/reduction_2d.hpp @@ -80,8 +80,8 @@ class reduction_2d */ template kernel_parts get_whole_kernel_parts( - std::map& generated, - std::map& generated_all, name_generator& ng, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& ng, const std::string& row_index_name, const std::string& col_index_name, const T_result& result) const { kernel_parts parts = derived().get_kernel_parts( diff --git a/stan/math/opencl/kernel_generator/rowwise_reduction.hpp b/stan/math/opencl/kernel_generator/rowwise_reduction.hpp index 5ff44dd1fff..a74dac4434c 100644 --- a/stan/math/opencl/kernel_generator/rowwise_reduction.hpp +++ b/stan/math/opencl/kernel_generator/rowwise_reduction.hpp @@ -31,8 +31,8 @@ struct matvec_mul_opt { static matrix_cl_view view(const Arg&) { return matrix_cl_view::Entire; } static kernel_parts get_kernel_parts( - const Arg& a, std::map& generated, - std::map& generated_all, + const Arg& a, std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name) { return {}; @@ -71,8 +71,8 @@ struct matvec_mul_opt>> { * @return part of kernel with code for this and nested expressions */ static kernel_parts get_kernel_parts( - const Arg& mul, std::map& generated, - std::map& generated_all, + const Arg& mul, std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name) { kernel_parts res{}; @@ -154,8 +154,8 @@ class rowwise_reduction * @return part of kernel with code for this and nested expressions */ inline kernel_parts get_kernel_parts( - std::map& generated, - std::map& generated_all, + std::unordered_map& generated, + std::unordered_map& generated_all, name_generator& name_gen, const std::string& row_index_name, const std::string& col_index_name, bool view_handled) const { kernel_parts res{}; @@ -163,7 +163,7 @@ class rowwise_reduction this->var_name_ = name_gen.generate(); generated[this] = ""; - std::map generated2; + std::unordered_map generated2; if (PassZero && internal::matvec_mul_opt::is_possible) { res = internal::matvec_mul_opt::get_kernel_parts( this->template get_arg<0>(), generated2, generated_all, name_gen, @@ -245,12 +245,12 @@ class rowwise_reduction * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::map& generated, - std::map& generated_all, + inline void set_args(std::unordered_map& generated, + std::unordered_map& generated_all, cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; - std::map generated2; + std::unordered_map generated2; this->template get_arg<0>().set_args(generated2, generated_all, kernel, arg_num); kernel.setArg(arg_num++, this->template get_arg<0>().view()); diff --git a/stan/math/opencl/kernel_generator/scalar.hpp b/stan/math/opencl/kernel_generator/scalar.hpp index 91476cb1230..fa96025f3c3 100644 --- a/stan/math/opencl/kernel_generator/scalar.hpp +++ b/stan/math/opencl/kernel_generator/scalar.hpp @@ -72,8 +72,8 @@ class scalar_ : public operation_cl, T> { * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::map& generated, - std::map& generated_all, + inline void set_args(std::unordered_map& generated, + std::unordered_map& generated_all, cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; diff --git a/stan/math/prim/err/check_flag_sundials.hpp b/stan/math/prim/err/check_flag_sundials.hpp index 72f8e9a5c8b..85f10781f3f 100644 --- a/stan/math/prim/err/check_flag_sundials.hpp +++ b/stan/math/prim/err/check_flag_sundials.hpp @@ -16,7 +16,7 @@ namespace math { /** * Map cvodes error flag to acutally error msg. The most frequent - * errors are put at the top. An alternative would be to use std::map + * errors are put at the top. An alternative would be to use std::unordered_map * but in our case the difference would be negligible. Note that we * don't use CVGetReturnFlagName function to retrieve the constant * because sanitizer indicates it contains mem leak. diff --git a/stan/math/rev/core/profiling.hpp b/stan/math/rev/core/profiling.hpp index ea51aa79499..57d1758f2fa 100644 --- a/stan/math/rev/core/profiling.hpp +++ b/stan/math/rev/core/profiling.hpp @@ -115,7 +115,7 @@ class profile_info { using profile_key = std::pair; -using profile_map = std::map; +using profile_map = std::unordered_map; /** * Profiles C++ lines where the object is in scope. From 8a78efdd1f25472eab14d2dd8b7af065ef06c23c Mon Sep 17 00:00:00 2001 From: Stan Jenkins Date: Mon, 24 Jul 2023 10:39:01 -0400 Subject: [PATCH 10/14] [Jenkins] auto-formatting by clang-format version 10.0.0-4ubuntu1 --- stan/math/opencl/kernel_cl.hpp | 6 +++--- stan/math/opencl/kernel_generator/append.hpp | 14 ++++++++------ .../as_column_vector_or_scalar.hpp | 7 ++++--- .../kernel_generator/block_zero_based.hpp | 7 ++++--- stan/math/opencl/kernel_generator/calc_if.hpp | 13 +++++++------ .../math/opencl/kernel_generator/check_cl.hpp | 7 ++++--- .../kernel_generator/colwise_reduction.hpp | 6 +++--- .../math/opencl/kernel_generator/constant.hpp | 7 ++++--- .../math/opencl/kernel_generator/indexing.hpp | 7 ++++--- stan/math/opencl/kernel_generator/load.hpp | 13 +++++++------ .../kernel_generator/multi_result_kernel.hpp | 18 ++++++++++-------- .../opencl/kernel_generator/opencl_code.hpp | 11 +++++------ .../opencl/kernel_generator/operation_cl.hpp | 19 ++++++++++--------- .../kernel_generator/optional_broadcast.hpp | 7 ++++--- .../opencl/kernel_generator/reduction_2d.hpp | 6 +++--- .../kernel_generator/rowwise_reduction.hpp | 7 ++++--- stan/math/opencl/kernel_generator/scalar.hpp | 7 ++++--- 17 files changed, 88 insertions(+), 74 deletions(-) diff --git a/stan/math/opencl/kernel_cl.hpp b/stan/math/opencl/kernel_cl.hpp index 3ddb2c88c4a..8f0b6a66e7d 100644 --- a/stan/math/opencl/kernel_cl.hpp +++ b/stan/math/opencl/kernel_cl.hpp @@ -133,9 +133,9 @@ inline tbb::concurrent_vector select_events(K& m) { * @param sources A std::vector of strings containing the code for the kernel. * @param options The values of macros to be passed at compile time. */ -inline auto compile_kernel(const char* name, - const std::vector& sources, - const std::unordered_map& options) { +inline auto compile_kernel( + const char* name, const std::vector& sources, + const std::unordered_map& options) { auto base_opts = opencl_context.base_opts(); for (auto& it : options) { if (base_opts[it.first] > it.second) { diff --git a/stan/math/opencl/kernel_generator/append.hpp b/stan/math/opencl/kernel_generator/append.hpp index 337bc5764b4..c29cbe40284 100644 --- a/stan/math/opencl/kernel_generator/append.hpp +++ b/stan/math/opencl/kernel_generator/append.hpp @@ -129,9 +129,10 @@ class append_row_ : public operation_cl, * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::unordered_map& generated, - std::unordered_map& generated_all, - cl::Kernel& kernel, int& arg_num) const { + inline void set_args( + std::unordered_map& generated, + std::unordered_map& generated_all, + cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; this->template get_arg<0>().set_args(generated, generated_all, kernel, @@ -291,9 +292,10 @@ class append_col_ : public operation_cl, * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::unordered_map& generated, - std::unordered_map& generated_all, - cl::Kernel& kernel, int& arg_num) const { + inline void set_args( + std::unordered_map& generated, + std::unordered_map& generated_all, + cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; this->template get_arg<0>().set_args(generated, generated_all, kernel, diff --git a/stan/math/opencl/kernel_generator/as_column_vector_or_scalar.hpp b/stan/math/opencl/kernel_generator/as_column_vector_or_scalar.hpp index 580d45ae906..b2c91b1b0ac 100644 --- a/stan/math/opencl/kernel_generator/as_column_vector_or_scalar.hpp +++ b/stan/math/opencl/kernel_generator/as_column_vector_or_scalar.hpp @@ -185,9 +185,10 @@ class as_column_vector_or_scalar_ * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::unordered_map& generated, - std::unordered_map& generated_all, - cl::Kernel& kernel, int& arg_num) const { + inline void set_args( + std::unordered_map& generated, + std::unordered_map& generated_all, + cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; std::unordered_map generated2; diff --git a/stan/math/opencl/kernel_generator/block_zero_based.hpp b/stan/math/opencl/kernel_generator/block_zero_based.hpp index 7a11f5eb85b..4c0eeaeb2ac 100644 --- a/stan/math/opencl/kernel_generator/block_zero_based.hpp +++ b/stan/math/opencl/kernel_generator/block_zero_based.hpp @@ -226,9 +226,10 @@ class block_ * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::unordered_map& generated, - std::unordered_map& generated_all, - cl::Kernel& kernel, int& arg_num) const { + inline void set_args( + std::unordered_map& generated, + std::unordered_map& generated_all, + cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; std::unordered_map generated2; diff --git a/stan/math/opencl/kernel_generator/calc_if.hpp b/stan/math/opencl/kernel_generator/calc_if.hpp index 5903912badc..f9b5ef5ee6c 100644 --- a/stan/math/opencl/kernel_generator/calc_if.hpp +++ b/stan/math/opencl/kernel_generator/calc_if.hpp @@ -67,9 +67,9 @@ class calc_if_ template kernel_parts get_whole_kernel_parts( std::unordered_map& generated, - std::unordered_map& generated_all, name_generator& ng, - const std::string& row_index_name, const std::string& col_index_name, - const T_result& result) const { + std::unordered_map& generated_all, + name_generator& ng, const std::string& row_index_name, + const std::string& col_index_name, const T_result& result) const { if (Do_Calculate) { return this->template get_arg<0>().get_whole_kernel_parts( generated, generated_all, ng, row_index_name, col_index_name, result); @@ -88,9 +88,10 @@ class calc_if_ * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::unordered_map& generated, - std::unordered_map& generated_all, - cl::Kernel& kernel, int& arg_num) const { + inline void set_args( + std::unordered_map& generated, + std::unordered_map& generated_all, + cl::Kernel& kernel, int& arg_num) const { if (Do_Calculate) { this->template get_arg<0>().set_args(generated, generated_all, kernel, arg_num); diff --git a/stan/math/opencl/kernel_generator/check_cl.hpp b/stan/math/opencl/kernel_generator/check_cl.hpp index 4ab50c780b5..85b675b7540 100644 --- a/stan/math/opencl/kernel_generator/check_cl.hpp +++ b/stan/math/opencl/kernel_generator/check_cl.hpp @@ -110,9 +110,10 @@ class check_cl_ : public operation_cl_lhs, bool> { * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::unordered_map& generated, - std::unordered_map& generated_all, - cl::Kernel& kernel, int& arg_num) const { + inline void set_args( + std::unordered_map& generated, + std::unordered_map& generated_all, + cl::Kernel& kernel, int& arg_num) const { generated[this] = ""; arg_.set_args(generated, generated_all, kernel, arg_num); kernel.setArg(arg_num++, buffer_.buffer()); diff --git a/stan/math/opencl/kernel_generator/colwise_reduction.hpp b/stan/math/opencl/kernel_generator/colwise_reduction.hpp index c553fca7297..e402ad1edeb 100644 --- a/stan/math/opencl/kernel_generator/colwise_reduction.hpp +++ b/stan/math/opencl/kernel_generator/colwise_reduction.hpp @@ -97,9 +97,9 @@ class colwise_reduction template kernel_parts get_whole_kernel_parts( std::unordered_map& generated, - std::unordered_map& generated_all, name_generator& ng, - const std::string& row_index_name, const std::string& col_index_name, - const T_result& result) const { + std::unordered_map& generated_all, + name_generator& ng, const std::string& row_index_name, + const std::string& col_index_name, const T_result& result) const { kernel_parts parts = derived().get_kernel_parts( generated, generated_all, ng, row_index_name, col_index_name, false); kernel_parts out_parts = result.get_kernel_parts_lhs( diff --git a/stan/math/opencl/kernel_generator/constant.hpp b/stan/math/opencl/kernel_generator/constant.hpp index 14fb730bc71..078b08fd0f7 100644 --- a/stan/math/opencl/kernel_generator/constant.hpp +++ b/stan/math/opencl/kernel_generator/constant.hpp @@ -80,9 +80,10 @@ class constant_ : public operation_cl, T> { * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::unordered_map& generated, - std::unordered_map& generated_all, - cl::Kernel& kernel, int& arg_num) const { + inline void set_args( + std::unordered_map& generated, + std::unordered_map& generated_all, + cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; kernel.setArg(arg_num++, a_); diff --git a/stan/math/opencl/kernel_generator/indexing.hpp b/stan/math/opencl/kernel_generator/indexing.hpp index 93aaa5edc36..e997531c19b 100644 --- a/stan/math/opencl/kernel_generator/indexing.hpp +++ b/stan/math/opencl/kernel_generator/indexing.hpp @@ -171,9 +171,10 @@ class indexing_ * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::unordered_map& generated, - std::unordered_map& generated_all, - cl::Kernel& kernel, int& arg_num) const { + inline void set_args( + std::unordered_map& generated, + std::unordered_map& generated_all, + cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; this->template get_arg<1>().set_args(generated, generated_all, kernel, diff --git a/stan/math/opencl/kernel_generator/load.hpp b/stan/math/opencl/kernel_generator/load.hpp index da97abbc405..ae547143150 100644 --- a/stan/math/opencl/kernel_generator/load.hpp +++ b/stan/math/opencl/kernel_generator/load.hpp @@ -184,9 +184,10 @@ class load_ * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::unordered_map& generated, - std::unordered_map& generated_all, - cl::Kernel& kernel, int& arg_num) const { + inline void set_args( + std::unordered_map& generated, + std::unordered_map& generated_all, + cl::Kernel& kernel, int& arg_num) const { if (generated_all.count(&a_) == 0) { generated_all[&a_] = ""; kernel.setArg(arg_num++, a_.buffer()); @@ -315,9 +316,9 @@ class load_ * @param[in,out] id_map map from memory addresses to unique ids * @param[in,out] next_id neqt unique id to use */ - inline void get_unique_matrix_accesses(std::vector& uids, - std::unordered_map& id_map, - int& next_id) const { + inline void get_unique_matrix_accesses( + std::vector& uids, std::unordered_map& id_map, + int& next_id) const { if (id_map.count(&a_) == 0) { id_map[&a_] = next_id; uids.push_back(next_id); diff --git a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp index 992f1c3157c..6c1ae3f8b6f 100644 --- a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp +++ b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp @@ -128,8 +128,9 @@ struct multi_result_kernel_internal { */ static kernel_parts generate( std::unordered_map& generated, - std::unordered_map& generated_all, name_generator& ng, - const std::string& row_index_name, const std::string& col_index_name, + std::unordered_map& generated_all, + name_generator& ng, const std::string& row_index_name, + const std::string& col_index_name, const std::tuple...>& assignment_pairs) { kernel_parts parts @@ -157,8 +158,8 @@ struct multi_result_kernel_internal { */ static void set_args( std::unordered_map& generated, - std::unordered_map& generated_all, cl::Kernel& kernel, - int& arg_num, + std::unordered_map& generated_all, + cl::Kernel& kernel, int& arg_num, const std::tuple...>& assignment_pairs) { next::set_args(generated, generated_all, kernel, arg_num, @@ -221,8 +222,9 @@ struct multi_result_kernel_internal<-1, T_results...> { static kernel_parts generate( std::unordered_map& generated, - std::unordered_map& generated_all, name_generator& ng, - const std::string& row_index_name, const std::string& col_index_name, + std::unordered_map& generated_all, + name_generator& ng, const std::string& row_index_name, + const std::string& col_index_name, const std::tuple...>& assignment_pairs) { return {}; @@ -230,8 +232,8 @@ struct multi_result_kernel_internal<-1, T_results...> { static void set_args( std::unordered_map& generated, - std::unordered_map& generated_all, cl::Kernel& kernel, - int& arg_num, + std::unordered_map& generated_all, + cl::Kernel& kernel, int& arg_num, const std::tuple...>& assignment_pairs) {} diff --git a/stan/math/opencl/kernel_generator/opencl_code.hpp b/stan/math/opencl/kernel_generator/opencl_code.hpp index 22464b633b1..ab53950e14b 100644 --- a/stan/math/opencl/kernel_generator/opencl_code.hpp +++ b/stan/math/opencl/kernel_generator/opencl_code.hpp @@ -181,12 +181,11 @@ class opencl_code_ : public operation_cl_base { * @param view_handled whether caller already handled matrix view * @return part of kernel with code for this and nested expressions */ - auto get_kernel_parts(std::unordered_map& generated, - std::unordered_map& generated_all, - name_generator& name_gen, - const std::string& row_index_name, - const std::string& col_index_name, - bool view_handled) const { + auto get_kernel_parts( + std::unordered_map& generated, + std::unordered_map& generated_all, + name_generator& name_gen, const std::string& row_index_name, + const std::string& col_index_name, bool view_handled) const { return impl_->get_kernel_parts(generated, generated_all, name_gen, row_index_name, col_index_name, view_handled); diff --git a/stan/math/opencl/kernel_generator/operation_cl.hpp b/stan/math/opencl/kernel_generator/operation_cl.hpp index dbfe0f95b5d..9e2bddc4fdb 100644 --- a/stan/math/opencl/kernel_generator/operation_cl.hpp +++ b/stan/math/opencl/kernel_generator/operation_cl.hpp @@ -194,9 +194,9 @@ class operation_cl : public operation_cl_base { template kernel_parts get_whole_kernel_parts( std::unordered_map& generated, - std::unordered_map& generated_all, name_generator& ng, - const std::string& row_index_name, const std::string& col_index_name, - const T_result& result) const { + std::unordered_map& generated_all, + name_generator& ng, const std::string& row_index_name, + const std::string& col_index_name, const T_result& result) const { kernel_parts parts = derived().get_kernel_parts( generated, generated_all, ng, row_index_name, col_index_name, false); kernel_parts out_parts = result.get_kernel_parts_lhs( @@ -293,9 +293,10 @@ class operation_cl : public operation_cl_base { * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::unordered_map& generated, - std::unordered_map& generated_all, - cl::Kernel& kernel, int& arg_num) const { + inline void set_args( + std::unordered_map& generated, + std::unordered_map& generated_all, + cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; // parameter pack expansion returns a comma-separated list of values, @@ -433,9 +434,9 @@ class operation_cl : public operation_cl_base { * @param[in,out] id_map map from memory addresses to unique ids * @param[in,out] next_id neqt unique id to use */ - inline void get_unique_matrix_accesses(std::vector& uids, - std::unordered_map& id_map, - int& next_id) const { + inline void get_unique_matrix_accesses( + std::vector& uids, std::unordered_map& id_map, + int& next_id) const { index_apply([&](auto... Is) { static_cast(std::initializer_list{( this->get_arg().get_unique_matrix_accesses(uids, id_map, next_id), diff --git a/stan/math/opencl/kernel_generator/optional_broadcast.hpp b/stan/math/opencl/kernel_generator/optional_broadcast.hpp index 0cd3f7fbf90..01cd3e2528d 100644 --- a/stan/math/opencl/kernel_generator/optional_broadcast.hpp +++ b/stan/math/opencl/kernel_generator/optional_broadcast.hpp @@ -100,9 +100,10 @@ class optional_broadcast_ * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::unordered_map& generated, - std::unordered_map& generated_all, - cl::Kernel& kernel, int& arg_num) const { + inline void set_args( + std::unordered_map& generated, + std::unordered_map& generated_all, + cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; std::unordered_map generated2; diff --git a/stan/math/opencl/kernel_generator/reduction_2d.hpp b/stan/math/opencl/kernel_generator/reduction_2d.hpp index a7e10bd65c7..33c957ea5f6 100644 --- a/stan/math/opencl/kernel_generator/reduction_2d.hpp +++ b/stan/math/opencl/kernel_generator/reduction_2d.hpp @@ -81,9 +81,9 @@ class reduction_2d template kernel_parts get_whole_kernel_parts( std::unordered_map& generated, - std::unordered_map& generated_all, name_generator& ng, - const std::string& row_index_name, const std::string& col_index_name, - const T_result& result) const { + std::unordered_map& generated_all, + name_generator& ng, const std::string& row_index_name, + const std::string& col_index_name, const T_result& result) const { kernel_parts parts = derived().get_kernel_parts( generated, generated_all, ng, row_index_name, col_index_name, false); kernel_parts out_parts = result.get_kernel_parts_lhs( diff --git a/stan/math/opencl/kernel_generator/rowwise_reduction.hpp b/stan/math/opencl/kernel_generator/rowwise_reduction.hpp index a74dac4434c..0dfdec2d5a4 100644 --- a/stan/math/opencl/kernel_generator/rowwise_reduction.hpp +++ b/stan/math/opencl/kernel_generator/rowwise_reduction.hpp @@ -245,9 +245,10 @@ class rowwise_reduction * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::unordered_map& generated, - std::unordered_map& generated_all, - cl::Kernel& kernel, int& arg_num) const { + inline void set_args( + std::unordered_map& generated, + std::unordered_map& generated_all, + cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; std::unordered_map generated2; diff --git a/stan/math/opencl/kernel_generator/scalar.hpp b/stan/math/opencl/kernel_generator/scalar.hpp index fa96025f3c3..34216ea9349 100644 --- a/stan/math/opencl/kernel_generator/scalar.hpp +++ b/stan/math/opencl/kernel_generator/scalar.hpp @@ -72,9 +72,10 @@ class scalar_ : public operation_cl, T> { * @param[in,out] arg_num consecutive number of the first argument to set. * This is incremented for each argument set by this function. */ - inline void set_args(std::unordered_map& generated, - std::unordered_map& generated_all, - cl::Kernel& kernel, int& arg_num) const { + inline void set_args( + std::unordered_map& generated, + std::unordered_map& generated_all, + cl::Kernel& kernel, int& arg_num) const { if (generated.count(this) == 0) { generated[this] = ""; kernel.setArg(arg_num++, a_); From 3e0006ffc40cebc5c9e365374af1c22de39185ab Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Mon, 24 Jul 2023 10:54:34 -0400 Subject: [PATCH 11/14] set default constructor for internal multi kernel --- stan/math/opencl/kernel_generator/multi_result_kernel.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp index 6c1ae3f8b6f..4e2c302dcc7 100644 --- a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp +++ b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp @@ -252,7 +252,7 @@ struct multi_result_kernel_internal<-1, T_results...> { template template std::unordered_map, cl::Kernel> multi_result_kernel_internal< - N, T_results...>::inner::kernel_cache_; + N, T_results...>::inner::kernel_cache_{20}; } // namespace internal From 34536216c13a7278cd0d090ba70e36bbf738deb1 Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Mon, 24 Jul 2023 11:03:56 -0400 Subject: [PATCH 12/14] revert kernel_cache_ back to a std::map --- stan/math/opencl/kernel_generator/multi_result_kernel.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp index 4e2c302dcc7..f6835486dd5 100644 --- a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp +++ b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp @@ -33,7 +33,7 @@ template struct multi_result_kernel_internal { template struct inner { - static std::unordered_map, cl::Kernel> kernel_cache_; + static std::map, cl::Kernel> kernel_cache_; using next = typename multi_result_kernel_internal< N - 1, T_results...>::template inner; using T_current_result = std::remove_reference_t< @@ -251,8 +251,8 @@ struct multi_result_kernel_internal<-1, T_results...> { template template -std::unordered_map, cl::Kernel> multi_result_kernel_internal< - N, T_results...>::inner::kernel_cache_{20}; +std::map, cl::Kernel> multi_result_kernel_internal< + N, T_results...>::inner::kernel_cache_; } // namespace internal From b5fbe4acf8d6387f7ad6d8705ed45e3c888c0da6 Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Mon, 24 Jul 2023 11:37:36 -0400 Subject: [PATCH 13/14] add back map for profiling --- stan/math/prim/err/check_flag_sundials.hpp | 2 +- stan/math/rev/core/profiling.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/stan/math/prim/err/check_flag_sundials.hpp b/stan/math/prim/err/check_flag_sundials.hpp index 85f10781f3f..72f8e9a5c8b 100644 --- a/stan/math/prim/err/check_flag_sundials.hpp +++ b/stan/math/prim/err/check_flag_sundials.hpp @@ -16,7 +16,7 @@ namespace math { /** * Map cvodes error flag to acutally error msg. The most frequent - * errors are put at the top. An alternative would be to use std::unordered_map + * errors are put at the top. An alternative would be to use std::map * but in our case the difference would be negligible. Note that we * don't use CVGetReturnFlagName function to retrieve the constant * because sanitizer indicates it contains mem leak. diff --git a/stan/math/rev/core/profiling.hpp b/stan/math/rev/core/profiling.hpp index 57d1758f2fa..ea51aa79499 100644 --- a/stan/math/rev/core/profiling.hpp +++ b/stan/math/rev/core/profiling.hpp @@ -115,7 +115,7 @@ class profile_info { using profile_key = std::pair; -using profile_map = std::unordered_map; +using profile_map = std::map; /** * Profiles C++ lines where the object is in scope. From 650efe90f3784f1869338a2c8868360d04643ac0 Mon Sep 17 00:00:00 2001 From: Dylan Simon Date: Wed, 8 Nov 2023 18:32:14 -0500 Subject: [PATCH 14/14] jenkins: try builtin abortPrevious rather than custom killOldBuilds --- Jenkinsfile | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 3039cbd08b1..5250f52d947 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -45,6 +45,7 @@ pipeline { preserveStashes(buildCount: 7) parallelsAlwaysFailFast() buildDiscarder(logRotator(numToKeepStr: '20', daysToKeepStr: '30')) + disableConcurrentBuilds(abortPrevious: true) } environment { STAN_NUM_THREADS = 4 @@ -66,18 +67,6 @@ pipeline { } stages { - stage('Kill previous builds') { - when { - not { branch 'develop' } - not { branch 'master' } - } - steps { - script { - utils.killOldBuilds() - } - } - } - stage("Clang-format") { agent { docker {