stan-dev · rok-cesnovar · Oct 11, 2019 · Sep 20, 2019 · Sep 20, 2019 · Sep 20, 2019
diff --git a/stan/math/opencl/cholesky_decompose.hpp b/stan/math/opencl/cholesky_decompose.hpp
@@ -3,33 +3,34 @@
 #ifdef STAN_OPENCL
 #include <stan/math/opencl/matrix_cl.hpp>
 #include <stan/math/opencl/matrix_cl_view.hpp>
-#include <stan/math/opencl/multiply.hpp>
 #include <stan/math/opencl/multiply_transpose.hpp>
 #include <stan/math/opencl/tri_inverse.hpp>
-#include <stan/math/opencl/subtract.hpp>
 #include <stan/math/opencl/sub_block.hpp>
-#include <stan/math/opencl/transpose.hpp>
 #include <stan/math/opencl/err/check_diagonal_zeros.hpp>
 #include <stan/math/opencl/err/check_nan.hpp>
 #include <stan/math/opencl/err/check_opencl.hpp>
 #include <stan/math/opencl/kernels/cholesky_decompose.hpp>
+#include <stan/math/opencl/prim/multiply.hpp>
+#include <stan/math/opencl/prim/subtract.hpp>
+#include <stan/math/opencl/prim/transpose.hpp>
 #include <stan/math/prim/meta.hpp>
 #include <cl.hpp>
 #include <algorithm>
 #include <cmath>
 
 namespace stan {
 namespace math {
+namespace opencl {
 /**
- * Performs an in-place of the the lower-triangular Cholesky factor (i.e.,
- * matrix square root) of the specified square, symmetric matrix. The return
- * value \f$L\f$ will be a lower-traingular matrix such that the original matrix
- * \f$A\f$ is given by <p>\f$A = L \times L^T\f$. The Cholesky decomposition is
- * computed using an OpenCL kernel. This algorithm is recursive. The matrix is
- * subset into a matrix of size <code>A.rows() / 4</code>, and if the submatrix
- * size is less than 50 or <code>min_block</code> then the cholesky
- * decomposition on the OpenCL device is computed using that submatrix. If the
- * submatrix is greater than 50 or <code>min_block</code> then
+ * Performs an in-place computation of the lower-triangular Cholesky factor
+ * (i.e., matrix square root) of the specified square, symmetric matrix. The
+ * return value \f$L\f$ will be a lower-traingular matrix such that the original
+ * matrix \f$A\f$ is given by <p>\f$A = L \times L^T\f$. The Cholesky
+ * decomposition is computed using an OpenCL kernel. This algorithm is
+ * recursive. The matrix is subset into a matrix of size <code>A.rows() /
+ * 4</code>, and if the submatrix size is less than 50 or <code>min_block</code>
+ * then the cholesky decomposition on the OpenCL device is computed using that
+ * submatrix. If the submatrix is greater than 50 or <code>min_block</code> then
  * <code>cholesky_decompose</code> is run again on a submatrix with size equal
  * to <code>submat.rows() / 4</code>. Once the Cholesky Decomposition is
  * computed, the full matrix cholesky is created by propogating the cholesky
@@ -71,7 +72,7 @@ inline void cholesky_decompose(matrix_cl<T>& A) {
   // The following function either calls the
   // blocked cholesky recursively for the submatrix A_11
   // or calls the kernel  directly if the size of the block is small enough
-  cholesky_decompose(A_11);
+  opencl::cholesky_decompose(A_11);
   // Copies L_11 back to the input matrix
   A.sub_block(A_11, 0, 0, 0, 0, block, block);
 
@@ -87,11 +88,11 @@ inline void cholesky_decompose(matrix_cl<T>& A) {
   // computes A_22 - L_21*(L_21^T)
   matrix_cl<T> L_22 = A_22 - multiply_transpose(L_21);
   // copy L_22 into A's lower left hand corner
-  cholesky_decompose(L_22);
+  opencl::cholesky_decompose(L_22);
   A.sub_block(L_22, 0, 0, block, block, block_subset, block_subset);
   A.view(matrix_cl_view::Lower);
 }
-
+}  // namespace opencl
 }  // namespace math
 }  // namespace stan
 

diff --git a/stan/math/opencl/multiply.hpp b/stan/math/opencl/multiply.hpp
@@ -102,92 +102,7 @@ inline matrix_cl<return_type_t<T1, T2>> multiply(const matrix_cl<T1>& A,
   return temp;
 }
 }  // namespace opencl
-
-/**
- * Multiplies the specified matrix on the OpenCL device
- * with the specified scalar.
- *
- * @param A matrix
- * @param scalar scalar
- * @return matrix multipled with scalar
- */
-template <typename T1, typename T2, typename = require_all_arithmetic_t<T1, T2>>
-inline matrix_cl<return_type_t<T1, T2>> multiply(const matrix_cl<T1>& A,
-                                                 const T2 scalar) {
-  matrix_cl<return_type_t<T1, T2>> temp(A.rows(), A.cols(), A.view());
-  if (A.size() == 0) {
-    return temp;
-  }
-  try {
-    opencl_kernels::scalar_mul(cl::NDRange(A.rows(), A.cols()), temp, A, scalar,
-                               A.rows(), A.cols(), A.view());
-  } catch (const cl::Error& e) {
-    check_opencl_error("multiply scalar", e);
-  }
-  return temp;
-}
-
-/**
- * Multiplies the specified matrix on the OpenCL device
- * with the specified scalar.
- *
- * @param scalar scalar
- * @param A matrix
- * @return matrix multipled with scalar
- */
-template <typename T1, typename T2, typename = require_all_arithmetic_t<T1, T2>>
-inline matrix_cl<return_type_t<T1, T2>> multiply(const T1 scalar,
-                                                 const matrix_cl<T2>& A) {
-  return multiply(A, scalar);
-}
-
-/**
- * Computes the product of the specified matrices.
- *
- * Computes the matrix multiplication C[M, K] = A[M, N] x B[N, K]
- *
- * @param A first matrix
- * @param B second matrix
- * @return the product of the first and second matrix
- *
- * @throw <code>std::invalid_argument</code> if the
- *   number of columns in A and rows in B do not match
- */
-template <typename T1, typename T2, typename = require_all_arithmetic_t<T1, T2>>
-inline matrix_cl<return_type_t<T1, T2>> multiply(const matrix_cl<T1>& A,
-                                                 const matrix_cl<T2>& B) {
-  return opencl::multiply(A, B);
-}
-
-/**
- * Templated product operator for OpenCL matrices.
- *
- * Computes the matrix multiplication C[M, K] = A[M, N] x B[N, K].
- *
- * @param A A matrix or scalar
- * @param B A matrix or scalar
- * @return the product of the first and second arguments
- *
- * @throw <code>std::invalid_argument</code> if the
- *   number of columns in A and rows in B do not match
- */
-template <typename T1, typename T2, typename = require_all_arithmetic_t<T1, T2>>
-inline matrix_cl<return_type_t<T1, T2>> operator*(const matrix_cl<T1>& A,
-                                                  const matrix_cl<T2>& B) {
-  return opencl::multiply(A, B);
-}
-template <typename T1, typename T2, typename = require_all_arithmetic_t<T1, T2>>
-inline matrix_cl<return_type_t<T1, T2>> operator*(const matrix_cl<T1>& B,
-                                                  const T2 scalar) {
-  return multiply(B, scalar);
-}
-template <typename T1, typename T2, typename = require_all_arithmetic_t<T1, T2>>
-inline matrix_cl<return_type_t<T1, T2>> operator*(const T1 scalar,
-                                                  const matrix_cl<T2>& B) {
-  return multiply(scalar, B);
-}
 }  // namespace math
 }  // namespace stan
-
 #endif
 #endif
diff --git a/stan/math/opencl/opencl.hpp b/stan/math/opencl/opencl.hpp
@@ -4,28 +4,40 @@
 
 #include <stan/math/opencl/opencl_context.hpp>
 #include <stan/math/opencl/matrix_cl.hpp>
-#include <stan/math/opencl/add.hpp>
 #include <stan/math/opencl/copy.hpp>
 #include <stan/math/opencl/copy_triangular.hpp>
 #include <stan/math/opencl/cholesky_decompose.hpp>
 #include <stan/math/opencl/diagonal_multiply.hpp>
 #include <stan/math/opencl/identity.hpp>
 #include <stan/math/opencl/is_matrix_cl.hpp>
 #include <stan/math/opencl/tri_inverse.hpp>
-#include <stan/math/opencl/multiply.hpp>
 #include <stan/math/opencl/multiply_transpose.hpp>
 #include <stan/math/opencl/matrix_cl_view.hpp>
 #include <stan/math/opencl/prim/rep_matrix.hpp>
 #include <stan/math/opencl/prim/rep_row_vector.hpp>
 #include <stan/math/opencl/prim/rep_vector.hpp>
-#include <stan/math/opencl/sub_block.hpp>
-#include <stan/math/opencl/subtract.hpp>
 #include <stan/math/opencl/scalar_type.hpp>
+#include <stan/math/opencl/sub_block.hpp>
 #include <stan/math/opencl/triangular_transpose.hpp>
-#include <stan/math/opencl/transpose.hpp>
 #include <stan/math/opencl/value_type.hpp>
 #include <stan/math/opencl/zeros.hpp>
 
+#include <stan/math/opencl/prim/add.hpp>
+#include <stan/math/opencl/prim/bernoulli_logit_glm_lpmf.hpp>
+#include <stan/math/opencl/prim/categorical_logit_glm_lpmf.hpp>
+#include <stan/math/opencl/prim/cholesky_decompose.hpp>
+#include <stan/math/opencl/prim/divide_columns.hpp>
+#include <stan/math/opencl/prim/gp_exp_quad_cov.hpp>
+#include <stan/math/opencl/prim/mdivide_left_tri_low.hpp>
+#include <stan/math/opencl/prim/mdivide_right_tri_low.hpp>
+#include <stan/math/opencl/prim/multiply.hpp>
+#include <stan/math/opencl/prim/neg_binomial_2_log_glm_lpmf.hpp>
+#include <stan/math/opencl/prim/normal_id_glm_lpdf.hpp>
+#include <stan/math/opencl/prim/ordered_logistic_glm_lpmf.hpp>
+#include <stan/math/opencl/prim/poisson_log_glm_lpmf.hpp>
+#include <stan/math/opencl/prim/subtract.hpp>
+#include <stan/math/opencl/prim/transpose.hpp>
+
 #include <stan/math/opencl/err/check_diagonal_zeros.hpp>
 #include <stan/math/opencl/err/check_invalid_matrix_view.hpp>
 #include <stan/math/opencl/err/check_matching_dims.hpp>

diff --git a/stan/math/opencl/add.hpp → stan/math/opencl/prim/add.hpp b/stan/math/opencl/add.hpp → stan/math/opencl/prim/add.hpp
@@ -1,5 +1,5 @@
-#ifndef STAN_MATH_OPENCL_ADD_HPP
-#define STAN_MATH_OPENCL_ADD_HPP
+#ifndef STAN_MATH_OPENCL_PRIM_ADD_HPP
+#define STAN_MATH_OPENCL_PRIM_ADD_HPP
 #ifdef STAN_OPENCL
 #include <stan/math/opencl/matrix_cl.hpp>
 #include <stan/math/opencl/err/check_matching_dims.hpp>

diff --git a/.../math/opencl/bernoulli_logit_glm_lpmf.hpp → .../opencl/prim/bernoulli_logit_glm_lpmf.hpp b/.../math/opencl/bernoulli_logit_glm_lpmf.hpp → .../opencl/prim/bernoulli_logit_glm_lpmf.hpp
@@ -1,5 +1,5 @@
-#ifndef STAN_MATH_OPENCL_BERNOULLI_LOGIT_GLM_LPMF_HPP
-#define STAN_MATH_OPENCL_BERNOULLI_LOGIT_GLM_LPMF_HPP
+#ifndef STAN_MATH_OPENCL_PRIM_BERNOULLI_LOGIT_GLM_LPMF_HPP
+#define STAN_MATH_OPENCL_PRIM_BERNOULLI_LOGIT_GLM_LPMF_HPP
 #ifdef STAN_OPENCL
 
 #include <stan/math/prim/mat/fun/Eigen.hpp>
@@ -9,6 +9,7 @@
 #include <stan/math/prim/scal/err/check_finite.hpp>
 #include <stan/math/prim/scal/fun/constants.hpp>
 #include <stan/math/prim/mat/fun/value_of_rec.hpp>
+#include <stan/math/prim/mat/fun/sum.hpp>
 #include <stan/math/prim/arr/fun/value_of_rec.hpp>
 #include <stan/math/prim/scal/fun/size_zero.hpp>
 

diff --git a/...ath/opencl/categorical_logit_glm_lpmf.hpp → ...pencl/prim/categorical_logit_glm_lpmf.hpp b/...ath/opencl/categorical_logit_glm_lpmf.hpp → ...pencl/prim/categorical_logit_glm_lpmf.hpp
@@ -1,11 +1,12 @@
-#ifndef STAN_MATH_OPENCL_CATEGORICAL_LOGIT_GLM_LPMF_HPP
-#define STAN_MATH_OPENCL_CATEGORICAL_LOGIT_GLM_LPMF_HPP
+#ifndef STAN_MATH_OPENCL_PRIM_CATEGORICAL_LOGIT_GLM_LPMF_HPP
+#define STAN_MATH_OPENCL_PRIM_CATEGORICAL_LOGIT_GLM_LPMF_HPP
 #ifdef STAN_OPENCL
 
 #include <stan/math/prim/meta.hpp>
 #include <stan/math/prim/scal/err/check_bounded.hpp>
 #include <stan/math/prim/scal/err/check_consistent_size.hpp>
 #include <stan/math/prim/scal/fun/size_zero.hpp>
+#include <stan/math/prim/mat/fun/sum.hpp>
 #include <Eigen/Core>
 
 #include <stan/math/opencl/matrix_cl.hpp>

diff --git a/stan/math/opencl/prim/cholesky_decompose.hpp b/stan/math/opencl/prim/cholesky_decompose.hpp
@@ -0,0 +1,43 @@
+#ifndef STAN_MATH_OPENCL_PRIM_CHOLESKY_DECOMPOSE_HPP
+#define STAN_MATH_OPENCL_PRIM_CHOLESKY_DECOMPOSE_HPP
+#ifdef STAN_OPENCL
+#include <stan/math/opencl/matrix_cl.hpp>
+#include <stan/math/opencl/cholesky_decompose.hpp>
+#include <stan/math/opencl/copy_triangular.hpp>
+#include <stan/math/prim/meta.hpp>
+#include <cl.hpp>
+#include <algorithm>
+#include <cmath>
+
+namespace stan {
+namespace math {
+/**
+ * Returns the lower-triangular Cholesky factor (i.e., matrix
+ * square root) of the specified square, symmetric matrix on the OpenCL device.
+ * The return value \f$L\f$ will be a lower-traingular matrix such that the
+ * original matrix \f$A\f$ is given by <p>\f$A = L \times L^T\f$.
+ * @param A Input square matrix
+ * @return Square root of matrix.
+ * @throw std::domain_error if m is not a symmetric matrix or
+ *   if m is not positive definite (if m has more than 0 elements)
+ */
+template <typename T, typename = require_floating_point_t<T>>
+inline matrix_cl<T> cholesky_decompose(matrix_cl<T>& A) {
+  check_square("cholesky_decompose", "A", A);
+  check_symmetric("cholesky_decompose", "A", A);
+  matrix_cl<T> res = copy_cl(A);
+  if (res.rows() == 0) {
+    return res;
+  }
+  opencl::cholesky_decompose(res);
+  // check_pos_definite on matrix_cl is check_nan + check_diagonal_zeros
+  check_nan("cholesky_decompose (OpenCL)", "A", res);
+  check_diagonal_zeros("cholesky_decompose (OpenCL)", "A", res);
+  res.template zeros_strict_tri<matrix_cl_view::Upper>();
+  return res;
+}
+}  // namespace math
+}  // namespace stan
+
+#endif
+#endif
diff --git a/stan/math/opencl/divide_columns.hpp → stan/math/opencl/prim/divide_columns.hpp b/stan/math/opencl/divide_columns.hpp → stan/math/opencl/prim/divide_columns.hpp
@@ -1,5 +1,5 @@
-#ifndef STAN_MATH_OPENCL_DIVIDE_COLUMNS_HPP
-#define STAN_MATH_OPENCL_DIVIDE_COLUMNS_HPP
+#ifndef STAN_MATH_OPENCL_PRIM_DIVIDE_COLUMNS_HPP
+#define STAN_MATH_OPENCL_PRIM_DIVIDE_COLUMNS_HPP
 #ifdef STAN_OPENCL
 #include <stan/math/opencl/matrix_cl.hpp>
 #include <stan/math/opencl/kernels/divide_columns.hpp>

diff --git a/stan/math/opencl/gp_exp_quad_cov.hpp → stan/math/opencl/prim/gp_exp_quad_cov.hpp b/stan/math/opencl/gp_exp_quad_cov.hpp → stan/math/opencl/prim/gp_exp_quad_cov.hpp
@@ -1,11 +1,11 @@
-#ifndef STAN_MATH_OPENCL_GP_EXP_QUAD_COV_HPP
-#define STAN_MATH_OPENCL_GP_EXP_QUAD_COV_HPP
+#ifndef STAN_MATH_OPENCL_PRIM_GP_EXP_QUAD_COV_HPP
+#define STAN_MATH_OPENCL_PRIM_GP_EXP_QUAD_COV_HPP
 #ifdef STAN_OPENCL
-
+#include <stan/math/prim/meta.hpp>
+#include <stan/math/prim/scal/fun/square.hpp>
 #include <stan/math/opencl/matrix_cl.hpp>
 #include <stan/math/opencl/kernels/gp_exp_quad_cov.hpp>
 #include <stan/math/opencl/err/check_matching_dims.hpp>
-#include <stan/math/prim/meta.hpp>
 #include <cl.hpp>
 
 namespace stan {

diff --git a/stan/math/opencl/prim/mdivide_left_tri_low.hpp b/stan/math/opencl/prim/mdivide_left_tri_low.hpp
@@ -0,0 +1,49 @@
+#ifndef STAN_MATH_OPENCL_PRIM_MDIVIDE_LEFT_TRI_LOW_HPP
+#define STAN_MATH_OPENCL_PRIM_MDIVIDE_LEFT_TRI_LOW_HPP
+#ifdef STAN_OPENCL
+#include <stan/math/prim/mat/err/check_square.hpp>
+#include <stan/math/prim/mat/err/check_multiplicable.hpp>
+#include <stan/math/opencl/matrix_cl.hpp>
+#include <stan/math/opencl/multiply.hpp>
+#include <stan/math/opencl/tri_inverse.hpp>
+namespace stan {
+namespace math {
+
+/**
+ * Returns the solution of the system Ax=b when A is lower triangular.
+ * @tparam T1 type of elements in A
+ * @tparam T2 type of elements in b
+ * @param A Triangular matrix.
+ * @param b Right hand side matrix or vector.
+ * @return x = A^-1 b, solution of the linear system.
+ * @throws std::domain_error if A is not square or the rows of b don't
+ * match the size of A.
+ */
+template <typename T1, typename T2,
+          typename = require_all_floating_point_t<T1, T2>>
+inline matrix_cl<return_type_t<T1, T2>> mdivide_left_tri_low(
+    const matrix_cl<T1>& A, const matrix_cl<T2>& b) {
+  check_square("mdivide_left_tri_low", "A", A);
+  check_multiplicable("mdivide_left_tri_low", "A", A, "b", b);
+  return tri_inverse<matrix_cl_view::Lower>(A) * b;
+}
+
+/**
+ * Returns the solution of the system Ax=b when A is triangular and b=I.
+ * @tparam T type of elements in A
+ * @tparam R1 number of rows in A
+ * @tparam C1 number of columns in A
+ * @param A Triangular matrix.
+ * @return x = A^-1 .
+ * @throws std::domain_error if A is not square
+ */
+template <typename T, typename = require_all_floating_point_t<T>>
+inline matrix_cl<T> mdivide_left_tri_low(const matrix_cl<T>& A) {
+  check_square("mdivide_left_tri_low", "A", A);
+  return tri_inverse<matrix_cl_view::Lower>(A);
+}
+
+}  // namespace math
+}  // namespace stan
+#endif
+#endif
diff --git a/stan/math/opencl/prim/mdivide_right_tri_low.hpp b/stan/math/opencl/prim/mdivide_right_tri_low.hpp
@@ -0,0 +1,33 @@
+#ifndef STAN_MATH_OPENCL_PRIM_MDIVIDE_RIGHT_TRI_LOW_HPP
+#define STAN_MATH_OPENCL_PRIM_MDIVIDE_RIGHT_TRI_LOW_HPP
+#ifdef STAN_OPENCL
+#include <stan/math/prim/mat/err/check_square.hpp>
+#include <stan/math/prim/mat/err/check_multiplicable.hpp>
+#include <stan/math/opencl/matrix_cl.hpp>
+#include <stan/math/opencl/multiply.hpp>
+#include <stan/math/opencl/tri_inverse.hpp>
+namespace stan {
+namespace math {
+
+/**
+ * Returns the solution of the system Ax=b where A is a
+ * lower triangular matrix.
+ * @param A Matrix.
+ * @param b Right hand side matrix or vector.
+ * @return x = b * tri(A)^-1, solution of the linear system.
+ * @throws std::domain_error if A is not square or the rows of b don't
+ * match the size of A.
+ */
+template <typename T1, typename T2,
+          typename = require_all_floating_point_t<T1, T2>>
+inline matrix_cl<return_type_t<T1, T2>> mdivide_right_tri_low(
+    const matrix_cl<T2>& b, const matrix_cl<T1>& A) {
+  check_square("mdivide_right_tri_low (OpenCL)", "A", A);
+  check_multiplicable("mdivide_right_tri_low (OpenCL)", "b", b, "A", A);
+  return b * tri_inverse<matrix_cl_view::Lower>(A);
+}
+
+}  // namespace math
+}  // namespace stan
+#endif
+#endif