Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reorganize /opencl and add missing matrix_cl overloads #1364

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
f60a88d
moved add and subtract
rok-cesnovar Sep 20, 2019
077bec2
moved multiply
rok-cesnovar Sep 20, 2019
29146a0
Merge branch 'develop' into refactor/match-opencl-function-signatures
rok-cesnovar Sep 20, 2019
798f661
fix header path
rok-cesnovar Sep 21, 2019
912c05c
fixed assignment bug
rok-cesnovar Sep 21, 2019
88ee0c4
moved divide_columns and gp_exp_quad_cov
rok-cesnovar Sep 21, 2019
a2464ac
moved transpose and multiply, inplace cholesky is now in opencl:: nam…
rok-cesnovar Sep 21, 2019
b1cd81d
cholesky moved
rok-cesnovar Sep 21, 2019
0b4d1ea
doxy for multiply cleanup
rok-cesnovar Sep 21, 2019
0e084a2
Merge branch 'develop' into refactor/opencl-function-signatures-and-l…
rok-cesnovar Sep 21, 2019
b36103f
clang format
rok-cesnovar Sep 21, 2019
de53951
doxygen fix
rok-cesnovar Sep 21, 2019
72f88a1
fix include path
rok-cesnovar Sep 21, 2019
40be0f0
simplify prim/mat/cholesky call
rok-cesnovar Sep 21, 2019
675df84
moving mdivide_left_tri
rok-cesnovar Sep 25, 2019
4e5927d
Merge branch 'develop' into refactor/opencl-function-signatures-and-l…
rok-cesnovar Sep 28, 2019
522d83b
[Jenkins] auto-formatting by clang-format version 5.0.0-3~16.04.1 (ta…
stan-buildbot Sep 28, 2019
7a8f326
added mdivide_right_tri_low and mdivide_left_tri_low
rok-cesnovar Sep 28, 2019
b416f74
removed the .hpp test file
rok-cesnovar Sep 28, 2019
af84a9e
added comments and fixed typos
rok-cesnovar Sep 28, 2019
aca4aca
move GLMs
rok-cesnovar Sep 28, 2019
3c57a14
cleaned up headers and added guards
rok-cesnovar Sep 28, 2019
d097a9e
added newlines to files
rok-cesnovar Sep 28, 2019
d4d52ff
Merge branch 'develop' into refactor/opencl-function-signatures-and-l…
rok-cesnovar Sep 29, 2019
75c83f6
added missing PRIM in guards, moved new GLMs, added missing sum include
rok-cesnovar Sep 29, 2019
197977f
fixed header
rok-cesnovar Sep 29, 2019
d69b112
Merge branch 'develop' into refactor/opencl-function-signatures-and-l…
rok-cesnovar Oct 10, 2019
9bc190e
added new requires, resolved review comments
rok-cesnovar Oct 10, 2019
fdd62f2
returned back 2 headers
rok-cesnovar Oct 10, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 16 additions & 15 deletions stan/math/opencl/cholesky_decompose.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,34 @@
#ifdef STAN_OPENCL
#include <stan/math/opencl/matrix_cl.hpp>
#include <stan/math/opencl/matrix_cl_view.hpp>
#include <stan/math/opencl/multiply.hpp>
#include <stan/math/opencl/multiply_transpose.hpp>
#include <stan/math/opencl/tri_inverse.hpp>
#include <stan/math/opencl/subtract.hpp>
#include <stan/math/opencl/sub_block.hpp>
#include <stan/math/opencl/transpose.hpp>
#include <stan/math/opencl/err/check_diagonal_zeros.hpp>
#include <stan/math/opencl/err/check_nan.hpp>
#include <stan/math/opencl/err/check_opencl.hpp>
#include <stan/math/opencl/kernels/cholesky_decompose.hpp>
#include <stan/math/opencl/prim/multiply.hpp>
#include <stan/math/opencl/prim/subtract.hpp>
#include <stan/math/opencl/prim/transpose.hpp>
#include <stan/math/prim/meta.hpp>
#include <cl.hpp>
#include <algorithm>
#include <cmath>

namespace stan {
namespace math {
namespace opencl {
/**
* Performs an in-place of the the lower-triangular Cholesky factor (i.e.,
* matrix square root) of the specified square, symmetric matrix. The return
* value \f$L\f$ will be a lower-traingular matrix such that the original matrix
* \f$A\f$ is given by <p>\f$A = L \times L^T\f$. The Cholesky decomposition is
* computed using an OpenCL kernel. This algorithm is recursive. The matrix is
* subset into a matrix of size <code>A.rows() / 4</code>, and if the submatrix
* size is less than 50 or <code>min_block</code> then the cholesky
* decomposition on the OpenCL device is computed using that submatrix. If the
* submatrix is greater than 50 or <code>min_block</code> then
* Performs an in-place computation of the lower-triangular Cholesky factor
* (i.e., matrix square root) of the specified square, symmetric matrix. The
* return value \f$L\f$ will be a lower-traingular matrix such that the original
* matrix \f$A\f$ is given by <p>\f$A = L \times L^T\f$. The Cholesky
* decomposition is computed using an OpenCL kernel. This algorithm is
* recursive. The matrix is subset into a matrix of size <code>A.rows() /
* 4</code>, and if the submatrix size is less than 50 or <code>min_block</code>
* then the cholesky decomposition on the OpenCL device is computed using that
* submatrix. If the submatrix is greater than 50 or <code>min_block</code> then
* <code>cholesky_decompose</code> is run again on a submatrix with size equal
* to <code>submat.rows() / 4</code>. Once the Cholesky Decomposition is
* computed, the full matrix cholesky is created by propogating the cholesky
Expand Down Expand Up @@ -71,7 +72,7 @@ inline void cholesky_decompose(matrix_cl<T>& A) {
// The following function either calls the
// blocked cholesky recursively for the submatrix A_11
// or calls the kernel directly if the size of the block is small enough
cholesky_decompose(A_11);
opencl::cholesky_decompose(A_11);
// Copies L_11 back to the input matrix
A.sub_block(A_11, 0, 0, 0, 0, block, block);

Expand All @@ -87,11 +88,11 @@ inline void cholesky_decompose(matrix_cl<T>& A) {
// computes A_22 - L_21*(L_21^T)
matrix_cl<T> L_22 = A_22 - multiply_transpose(L_21);
// copy L_22 into A's lower left hand corner
cholesky_decompose(L_22);
opencl::cholesky_decompose(L_22);
A.sub_block(L_22, 0, 0, block, block, block_subset, block_subset);
A.view(matrix_cl_view::Lower);
}

} // namespace opencl
} // namespace math
} // namespace stan

Expand Down
85 changes: 0 additions & 85 deletions stan/math/opencl/multiply.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,92 +102,7 @@ inline matrix_cl<return_type_t<T1, T2>> multiply(const matrix_cl<T1>& A,
return temp;
}
} // namespace opencl

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This section was move to /prim. The actual implementation that was already in opencl:: was left in /opencl.

/**
* Multiplies the specified matrix on the OpenCL device
* with the specified scalar.
*
* @param A matrix
* @param scalar scalar
* @return matrix multipled with scalar
*/
template <typename T1, typename T2, typename = require_all_arithmetic_t<T1, T2>>
inline matrix_cl<return_type_t<T1, T2>> multiply(const matrix_cl<T1>& A,
const T2 scalar) {
matrix_cl<return_type_t<T1, T2>> temp(A.rows(), A.cols(), A.view());
if (A.size() == 0) {
return temp;
}
try {
opencl_kernels::scalar_mul(cl::NDRange(A.rows(), A.cols()), temp, A, scalar,
A.rows(), A.cols(), A.view());
} catch (const cl::Error& e) {
check_opencl_error("multiply scalar", e);
}
return temp;
}

/**
* Multiplies the specified matrix on the OpenCL device
* with the specified scalar.
*
* @param scalar scalar
* @param A matrix
* @return matrix multipled with scalar
*/
template <typename T1, typename T2, typename = require_all_arithmetic_t<T1, T2>>
inline matrix_cl<return_type_t<T1, T2>> multiply(const T1 scalar,
const matrix_cl<T2>& A) {
return multiply(A, scalar);
}

/**
* Computes the product of the specified matrices.
*
* Computes the matrix multiplication C[M, K] = A[M, N] x B[N, K]
*
* @param A first matrix
* @param B second matrix
* @return the product of the first and second matrix
*
* @throw <code>std::invalid_argument</code> if the
* number of columns in A and rows in B do not match
*/
template <typename T1, typename T2, typename = require_all_arithmetic_t<T1, T2>>
inline matrix_cl<return_type_t<T1, T2>> multiply(const matrix_cl<T1>& A,
const matrix_cl<T2>& B) {
return opencl::multiply(A, B);
}

/**
* Templated product operator for OpenCL matrices.
*
* Computes the matrix multiplication C[M, K] = A[M, N] x B[N, K].
*
* @param A A matrix or scalar
* @param B A matrix or scalar
* @return the product of the first and second arguments
*
* @throw <code>std::invalid_argument</code> if the
* number of columns in A and rows in B do not match
*/
template <typename T1, typename T2, typename = require_all_arithmetic_t<T1, T2>>
inline matrix_cl<return_type_t<T1, T2>> operator*(const matrix_cl<T1>& A,
const matrix_cl<T2>& B) {
return opencl::multiply(A, B);
}
template <typename T1, typename T2, typename = require_all_arithmetic_t<T1, T2>>
inline matrix_cl<return_type_t<T1, T2>> operator*(const matrix_cl<T1>& B,
const T2 scalar) {
return multiply(B, scalar);
}
template <typename T1, typename T2, typename = require_all_arithmetic_t<T1, T2>>
inline matrix_cl<return_type_t<T1, T2>> operator*(const T1 scalar,
const matrix_cl<T2>& B) {
return multiply(scalar, B);
}
} // namespace math
} // namespace stan

#endif
#endif
22 changes: 17 additions & 5 deletions stan/math/opencl/opencl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,40 @@

#include <stan/math/opencl/opencl_context.hpp>
#include <stan/math/opencl/matrix_cl.hpp>
#include <stan/math/opencl/add.hpp>
#include <stan/math/opencl/copy.hpp>
#include <stan/math/opencl/copy_triangular.hpp>
#include <stan/math/opencl/cholesky_decompose.hpp>
#include <stan/math/opencl/diagonal_multiply.hpp>
#include <stan/math/opencl/identity.hpp>
#include <stan/math/opencl/is_matrix_cl.hpp>
#include <stan/math/opencl/tri_inverse.hpp>
#include <stan/math/opencl/multiply.hpp>
#include <stan/math/opencl/multiply_transpose.hpp>
#include <stan/math/opencl/matrix_cl_view.hpp>
#include <stan/math/opencl/prim/rep_matrix.hpp>
#include <stan/math/opencl/prim/rep_row_vector.hpp>
#include <stan/math/opencl/prim/rep_vector.hpp>
#include <stan/math/opencl/sub_block.hpp>
#include <stan/math/opencl/subtract.hpp>
#include <stan/math/opencl/scalar_type.hpp>
#include <stan/math/opencl/sub_block.hpp>
#include <stan/math/opencl/triangular_transpose.hpp>
#include <stan/math/opencl/transpose.hpp>
#include <stan/math/opencl/value_type.hpp>
#include <stan/math/opencl/zeros.hpp>

#include <stan/math/opencl/prim/add.hpp>
#include <stan/math/opencl/prim/bernoulli_logit_glm_lpmf.hpp>
#include <stan/math/opencl/prim/categorical_logit_glm_lpmf.hpp>
#include <stan/math/opencl/prim/cholesky_decompose.hpp>
#include <stan/math/opencl/prim/divide_columns.hpp>
#include <stan/math/opencl/prim/gp_exp_quad_cov.hpp>
#include <stan/math/opencl/prim/mdivide_left_tri_low.hpp>
#include <stan/math/opencl/prim/mdivide_right_tri_low.hpp>
#include <stan/math/opencl/prim/multiply.hpp>
#include <stan/math/opencl/prim/neg_binomial_2_log_glm_lpmf.hpp>
#include <stan/math/opencl/prim/normal_id_glm_lpdf.hpp>
#include <stan/math/opencl/prim/ordered_logistic_glm_lpmf.hpp>
#include <stan/math/opencl/prim/poisson_log_glm_lpmf.hpp>
#include <stan/math/opencl/prim/subtract.hpp>
#include <stan/math/opencl/prim/transpose.hpp>

#include <stan/math/opencl/err/check_diagonal_zeros.hpp>
#include <stan/math/opencl/err/check_invalid_matrix_view.hpp>
#include <stan/math/opencl/err/check_matching_dims.hpp>
Expand Down
4 changes: 2 additions & 2 deletions stan/math/opencl/add.hpp → stan/math/opencl/prim/add.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef STAN_MATH_OPENCL_ADD_HPP
#define STAN_MATH_OPENCL_ADD_HPP
#ifndef STAN_MATH_OPENCL_PRIM_ADD_HPP
#define STAN_MATH_OPENCL_PRIM_ADD_HPP
#ifdef STAN_OPENCL
#include <stan/math/opencl/matrix_cl.hpp>
#include <stan/math/opencl/err/check_matching_dims.hpp>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef STAN_MATH_OPENCL_BERNOULLI_LOGIT_GLM_LPMF_HPP
#define STAN_MATH_OPENCL_BERNOULLI_LOGIT_GLM_LPMF_HPP
#ifndef STAN_MATH_OPENCL_PRIM_BERNOULLI_LOGIT_GLM_LPMF_HPP
#define STAN_MATH_OPENCL_PRIM_BERNOULLI_LOGIT_GLM_LPMF_HPP
#ifdef STAN_OPENCL

#include <stan/math/prim/mat/fun/Eigen.hpp>
Expand All @@ -9,6 +9,7 @@
#include <stan/math/prim/scal/err/check_finite.hpp>
#include <stan/math/prim/scal/fun/constants.hpp>
#include <stan/math/prim/mat/fun/value_of_rec.hpp>
#include <stan/math/prim/mat/fun/sum.hpp>
#include <stan/math/prim/arr/fun/value_of_rec.hpp>
#include <stan/math/prim/scal/fun/size_zero.hpp>

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
#ifndef STAN_MATH_OPENCL_CATEGORICAL_LOGIT_GLM_LPMF_HPP
#define STAN_MATH_OPENCL_CATEGORICAL_LOGIT_GLM_LPMF_HPP
#ifndef STAN_MATH_OPENCL_PRIM_CATEGORICAL_LOGIT_GLM_LPMF_HPP
#define STAN_MATH_OPENCL_PRIM_CATEGORICAL_LOGIT_GLM_LPMF_HPP
#ifdef STAN_OPENCL

#include <stan/math/prim/meta.hpp>
#include <stan/math/prim/scal/err/check_bounded.hpp>
#include <stan/math/prim/scal/err/check_consistent_size.hpp>
#include <stan/math/prim/scal/fun/size_zero.hpp>
#include <stan/math/prim/mat/fun/sum.hpp>
#include <Eigen/Core>

#include <stan/math/opencl/matrix_cl.hpp>
Expand Down
43 changes: 43 additions & 0 deletions stan/math/opencl/prim/cholesky_decompose.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#ifndef STAN_MATH_OPENCL_PRIM_CHOLESKY_DECOMPOSE_HPP
#define STAN_MATH_OPENCL_PRIM_CHOLESKY_DECOMPOSE_HPP
#ifdef STAN_OPENCL
#include <stan/math/opencl/matrix_cl.hpp>
#include <stan/math/opencl/cholesky_decompose.hpp>
#include <stan/math/opencl/copy_triangular.hpp>
#include <stan/math/prim/meta.hpp>
#include <cl.hpp>
#include <algorithm>
#include <cmath>

namespace stan {
namespace math {
/**
* Returns the lower-triangular Cholesky factor (i.e., matrix
* square root) of the specified square, symmetric matrix on the OpenCL device.
* The return value \f$L\f$ will be a lower-traingular matrix such that the
* original matrix \f$A\f$ is given by <p>\f$A = L \times L^T\f$.
* @param A Input square matrix
* @return Square root of matrix.
* @throw std::domain_error if m is not a symmetric matrix or
* if m is not positive definite (if m has more than 0 elements)
*/
template <typename T, typename = require_floating_point_t<T>>
inline matrix_cl<T> cholesky_decompose(matrix_cl<T>& A) {
check_square("cholesky_decompose", "A", A);
check_symmetric("cholesky_decompose", "A", A);
matrix_cl<T> res = copy_cl(A);
if (res.rows() == 0) {
return res;
}
opencl::cholesky_decompose(res);
// check_pos_definite on matrix_cl is check_nan + check_diagonal_zeros
check_nan("cholesky_decompose (OpenCL)", "A", res);
check_diagonal_zeros("cholesky_decompose (OpenCL)", "A", res);
res.template zeros_strict_tri<matrix_cl_view::Upper>();
return res;
}
} // namespace math
} // namespace stan

#endif
#endif
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef STAN_MATH_OPENCL_DIVIDE_COLUMNS_HPP
#define STAN_MATH_OPENCL_DIVIDE_COLUMNS_HPP
#ifndef STAN_MATH_OPENCL_PRIM_DIVIDE_COLUMNS_HPP
#define STAN_MATH_OPENCL_PRIM_DIVIDE_COLUMNS_HPP
#ifdef STAN_OPENCL
#include <stan/math/opencl/matrix_cl.hpp>
#include <stan/math/opencl/kernels/divide_columns.hpp>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#ifndef STAN_MATH_OPENCL_GP_EXP_QUAD_COV_HPP
#define STAN_MATH_OPENCL_GP_EXP_QUAD_COV_HPP
#ifndef STAN_MATH_OPENCL_PRIM_GP_EXP_QUAD_COV_HPP
#define STAN_MATH_OPENCL_PRIM_GP_EXP_QUAD_COV_HPP
#ifdef STAN_OPENCL

#include <stan/math/prim/meta.hpp>
#include <stan/math/prim/scal/fun/square.hpp>
#include <stan/math/opencl/matrix_cl.hpp>
#include <stan/math/opencl/kernels/gp_exp_quad_cov.hpp>
#include <stan/math/opencl/err/check_matching_dims.hpp>
#include <stan/math/prim/meta.hpp>
#include <cl.hpp>

namespace stan {
Expand Down
49 changes: 49 additions & 0 deletions stan/math/opencl/prim/mdivide_left_tri_low.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#ifndef STAN_MATH_OPENCL_PRIM_MDIVIDE_LEFT_TRI_LOW_HPP
#define STAN_MATH_OPENCL_PRIM_MDIVIDE_LEFT_TRI_LOW_HPP
#ifdef STAN_OPENCL
#include <stan/math/prim/mat/err/check_square.hpp>
#include <stan/math/prim/mat/err/check_multiplicable.hpp>
#include <stan/math/opencl/matrix_cl.hpp>
#include <stan/math/opencl/multiply.hpp>
#include <stan/math/opencl/tri_inverse.hpp>
namespace stan {
namespace math {

/**
* Returns the solution of the system Ax=b when A is lower triangular.
* @tparam T1 type of elements in A
* @tparam T2 type of elements in b
* @param A Triangular matrix.
* @param b Right hand side matrix or vector.
* @return x = A^-1 b, solution of the linear system.
* @throws std::domain_error if A is not square or the rows of b don't
* match the size of A.
*/
template <typename T1, typename T2,
typename = require_all_floating_point_t<T1, T2>>
inline matrix_cl<return_type_t<T1, T2>> mdivide_left_tri_low(
const matrix_cl<T1>& A, const matrix_cl<T2>& b) {
check_square("mdivide_left_tri_low", "A", A);
check_multiplicable("mdivide_left_tri_low", "A", A, "b", b);
return tri_inverse<matrix_cl_view::Lower>(A) * b;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In order to force the Lower I had to extend the tri_inverse with a template.

The other option would be to force a change of A before calling tri_inverse but that changes the view for A globally, which is bad.

The third option would be to have the "forced" view as an argument to tri_inverse. I am also fine with that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the option you chose.

}

/**
* Returns the solution of the system Ax=b when A is triangular and b=I.
* @tparam T type of elements in A
* @tparam R1 number of rows in A
* @tparam C1 number of columns in A
* @param A Triangular matrix.
* @return x = A^-1 .
* @throws std::domain_error if A is not square
*/
template <typename T, typename = require_all_floating_point_t<T>>
inline matrix_cl<T> mdivide_left_tri_low(const matrix_cl<T>& A) {
check_square("mdivide_left_tri_low", "A", A);
return tri_inverse<matrix_cl_view::Lower>(A);
}

} // namespace math
} // namespace stan
#endif
#endif
33 changes: 33 additions & 0 deletions stan/math/opencl/prim/mdivide_right_tri_low.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#ifndef STAN_MATH_OPENCL_PRIM_MDIVIDE_RIGHT_TRI_LOW_HPP
#define STAN_MATH_OPENCL_PRIM_MDIVIDE_RIGHT_TRI_LOW_HPP
#ifdef STAN_OPENCL
#include <stan/math/prim/mat/err/check_square.hpp>
#include <stan/math/prim/mat/err/check_multiplicable.hpp>
#include <stan/math/opencl/matrix_cl.hpp>
#include <stan/math/opencl/multiply.hpp>
#include <stan/math/opencl/tri_inverse.hpp>
namespace stan {
namespace math {

/**
* Returns the solution of the system Ax=b where A is a
* lower triangular matrix.
* @param A Matrix.
* @param b Right hand side matrix or vector.
* @return x = b * tri(A)^-1, solution of the linear system.
* @throws std::domain_error if A is not square or the rows of b don't
* match the size of A.
*/
template <typename T1, typename T2,
typename = require_all_floating_point_t<T1, T2>>
inline matrix_cl<return_type_t<T1, T2>> mdivide_right_tri_low(
const matrix_cl<T2>& b, const matrix_cl<T1>& A) {
check_square("mdivide_right_tri_low (OpenCL)", "A", A);
check_multiplicable("mdivide_right_tri_low (OpenCL)", "b", b, "A", A);
return b * tri_inverse<matrix_cl_view::Lower>(A);
}

} // namespace math
} // namespace stan
#endif
#endif
Loading