Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix assignment for nullptr var_value<matrix> and for assigning expressions #2978

Merged
merged 14 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions stan/math/opencl/matrix_cl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,37 @@ class matrix_cl : public matrix_cl_base {
*/
~matrix_cl() { wait_for_read_write_events(); }

/**
* Set the values of a `matrix_cl` to zero.
*/
void setZero() {
if (this->size() == 0) {
return;
}
cl::Event zero_event;
const std::size_t write_events_size = this->write_events().size();
const std::size_t read_events_size = this->read_events().size();
const std::size_t read_write_size = write_events_size + read_events_size;
std::vector<cl::Event> read_write_events(read_write_size, cl::Event{});
auto&& read_events_vec = this->read_events();
auto&& write_events_vec = this->write_events();
for (std::size_t i = 0; i < read_events_size; ++i) {
read_write_events[i] = read_events_vec[i];
}
for (std::size_t i = read_events_size, j = 0; j < write_events_size;
++i, ++j) {
read_write_events[i] = write_events_vec[j];
}
try {
opencl_context.queue().enqueueFillBuffer(buffer_cl_, static_cast<T>(0), 0,
sizeof(T) * this->size(),
&read_write_events, &zero_event);
} catch (const cl::Error& e) {
check_opencl_error("setZero", e);
}
this->add_write_event(zero_event);
}

private:
/**
* Initializes the OpenCL buffer of this matrix by copying the data from given
Expand Down
9 changes: 9 additions & 0 deletions stan/math/rev/core/arena_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,15 @@ class arena_matrix : public Eigen::Map<MatrixType> {
Base::operator=(a);
return *this;
}
/**
* Forces hard copying matrices into an arena matrix
* @tparam T Any type assignable to `Base`
* @param x the values to write to `this`
*/
template <typename T>
void deep_copy(const T& x) {
Base::operator=(x);
}
};

} // namespace math
Expand Down
62 changes: 56 additions & 6 deletions stan/math/rev/core/var.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1020,9 +1020,10 @@ class var_value<T, internal::require_matrix_var_value<T>> {
* @param other the value to assign
* @return this
*/
template <typename S, require_assignable_t<value_type, S>* = nullptr,
require_all_plain_type_t<T, S>* = nullptr,
require_not_same_t<plain_type_t<T>, plain_type_t<S>>* = nullptr>
template <typename S, typename T_ = T,
require_assignable_t<value_type, S>* = nullptr,
require_all_plain_type_t<T_, S>* = nullptr,
require_not_same_t<plain_type_t<T_>, plain_type_t<S>>* = nullptr>
inline var_value<T>& operator=(const var_value<S>& other) {
static_assert(
EIGEN_PREDICATE_SAME_MATRIX_SIZE(T, S),
Expand All @@ -1032,16 +1033,65 @@ class var_value<T, internal::require_matrix_var_value<T>> {
}

/**
* Assignment of another var value, when either this or the other one does not
* Assignment of another var value, when the `this` does not
* contain a plain type.
* @tparam S type of the value in the `var_value` to assing
* @tparam S type of the value in the `var_value` to assign
* @param other the value to assign
* @return this
*/
template <typename S, typename T_ = T,
require_assignable_t<value_type, S>* = nullptr,
require_not_plain_type_t<S>* = nullptr,
require_plain_type_t<T_>* = nullptr>
inline var_value<T>& operator=(const var_value<S>& other) {
// If vi_ is nullptr then the var needs initialized via copy constructor
if (!(this->vi_)) {
*this = var_value<T>(other);
return *this;
}
arena_t<plain_type_t<T>> prev_val(vi_->val_.rows(), vi_->val_.cols());
prev_val.deep_copy(vi_->val_);
vi_->val_.deep_copy(other.val());
// no need to change any adjoints - these are just zeros before the reverse
// pass

reverse_pass_callback(
[this_vi = this->vi_, other_vi = other.vi_, prev_val]() mutable {
this_vi->val_.deep_copy(prev_val);

// we have no way of detecting aliasing between this->vi_->adj_ and
// other.vi_->adj_, so we must copy adjoint before reseting to zero

// we can reuse prev_val instead of allocating a new matrix
prev_val.deep_copy(this_vi->adj_);
this_vi->adj_.setZero();
other_vi->adj_ += prev_val;
});
return *this;
}
/**
* Assignment of another var value, when either both `this` or other does not
* contain a plain type.
* @note Here we do not need to use `deep_copy` as the `var_value`'s
* inner `vari_type` holds a view which will call the assignment operator
* that does not perform a placement new.
* @tparam S type of the value in the `var_value` to assign
* @param other the value to assign
* @return this
*/
template <typename S, typename T_ = T,
require_assignable_t<value_type, S>* = nullptr,
require_any_not_plain_type_t<T_, S>* = nullptr>
require_not_plain_type_t<T_>* = nullptr>
inline var_value<T>& operator=(const var_value<S>& other) {
// If vi_ is nullptr then the var needs initialized via copy constructor
if (!(this->vi_)) {
[]() STAN_COLD_PATH {
throw std::domain_error(
"var_value<matrix>::operator=(var_value<expression>):"
" Internal Bug! Please report this with an example"
" of your model to the Stan math github repository.");
}();
}
arena_t<plain_type_t<T>> prev_val = vi_->val_;
vi_->val_ = other.val();
// no need to change any adjoints - these are just zeros before the reverse
Expand Down
14 changes: 14 additions & 0 deletions test/unit/math/opencl/matrix_cl_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,18 @@ TEST(MathMatrixCL, assignment) {
EXPECT_EQ(nullptr, mat1_cl.buffer()());
}

TEST(MathMatrixCL, setZeroFun) {
using stan::math::matrix_cl;
Eigen::Matrix<double, 2, 2> mat_1;
mat_1 << 1, 2, 3, 4;
matrix_cl<double> mat1_cl(mat_1);
mat1_cl.setZero();
Eigen::Matrix<double, 2, 2> mat_1_fromcl
= stan::math::from_matrix_cl(mat1_cl);
EXPECT_EQ(mat_1_fromcl(0), 0);
EXPECT_EQ(mat_1_fromcl(1), 0);
EXPECT_EQ(mat_1_fromcl(2), 0);
EXPECT_EQ(mat_1_fromcl(3), 0);
}

#endif
84 changes: 84 additions & 0 deletions test/unit/math/rev/core/var_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -910,3 +910,87 @@ TEST_F(AgradRev, matrix_compile_time_conversions) {
EXPECT_MATRIX_FLOAT_EQ(colvec.val(), rowvec.val());
EXPECT_MATRIX_FLOAT_EQ(x11.val(), rowvec.val());
}

TEST_F(AgradRev, assign_nan_varmat) {
using stan::math::var_value;
using var_vector = var_value<Eigen::Matrix<double, -1, 1>>;
using stan::math::var;
Eigen::VectorXd x_val(10);
for (int i = 0; i < 10; ++i) {
x_val(i) = i + 0.1;
}
var_vector x(x_val);
var_vector y = var_vector(Eigen::Matrix<double, -1, 1>::Constant(
10, std::numeric_limits<double>::quiet_NaN()));
y = stan::math::head(x, 10);
var sigma = 1.0;
var lp = stan::math::normal_lpdf<false>(y, 0, sigma);
lp.grad();
Eigen::VectorXd x_ans_adj(10);
for (int i = 0; i < 10; ++i) {
x_ans_adj(i) = -(i + 0.1);
}
EXPECT_MATRIX_EQ(x.adj(), x_ans_adj);
Eigen::VectorXd y_ans_adj = Eigen::VectorXd::Zero(10);
EXPECT_MATRIX_EQ(y_ans_adj, y.adj());
}

TEST_F(AgradRev, assign_nan_matvar) {
using stan::math::var;
using var_vector = Eigen::Matrix<var, -1, 1>;
Eigen::VectorXd x_val(10);
for (int i = 0; i < 10; ++i) {
x_val(i) = i + 0.1;
}
var_vector x(x_val);
var_vector y = var_vector(Eigen::Matrix<double, -1, 1>::Constant(
10, std::numeric_limits<double>::quiet_NaN()));
// need to store y's previous vari pointers
var_vector z = y;
y = stan::math::head(x, 10);
var sigma = 1.0;
var lp = stan::math::normal_lpdf<false>(y, 0, sigma);
lp.grad();
Eigen::VectorXd x_ans_adj(10);
for (int i = 0; i < 10; ++i) {
x_ans_adj(i) = -(i + 0.1);
}
EXPECT_MATRIX_EQ(x.adj(), x_ans_adj);
Eigen::VectorXd z_ans_adj = Eigen::VectorXd::Zero(10);
EXPECT_MATRIX_EQ(z_ans_adj, z.adj());
}

/**
* For var<Matrix> and Matrix<var>, we need to make sure
* the tape, when going through reverse mode, leads to the same outcomes.
* In the case where we declare a var<Matrix> without initializing it, aka
* `var_value<Eigen::MatrixXd>`, we need to think about what the equivalent
* behavior is for `Eigen::Matrix<var, -1, -1>`.
* When default constructing `Eigen::Matrix<var, -1, -1>` we would have an array
* of `var` types with `nullptr` as the vari. The first assignment to that array
* would then just copy the vari pointer from the other array. This is the
* behavior we want to mimic for `var_value<Eigen::MatrixXd>`. So in this test
* show that for uninitialized `var_value<Eigen::MatrixXd>`, we can assign it
* and the adjoints are the same as x.
*/
TEST_F(AgradRev, assign_nullptr_var) {
using stan::math::var_value;
using var_vector = var_value<Eigen::Matrix<double, -1, 1>>;
using stan::math::var;
Eigen::VectorXd x_val(10);
for (int i = 0; i < 10; ++i) {
x_val(i) = i + 0.1;
}
var_vector x(x_val);
var_vector y;
y = stan::math::head(x, 10);
var sigma = 1.0;
var lp = stan::math::normal_lpdf<false>(y, 0, sigma);
lp.grad();
Eigen::VectorXd x_ans_adj(10);
for (int i = 0; i < 10; ++i) {
x_ans_adj(i) = -(i + 0.1);
}
EXPECT_MATRIX_EQ(x.adj(), x_ans_adj);
EXPECT_MATRIX_EQ(x_ans_adj, y.adj());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you a comment just stating that this is intentionally different from the above test and why (e.g., that y did not exist before and therefore does not have its own memory which can be set to 0)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I also wrote another test here to show that the assign_nan test has the same behavior for both kinds of matrices

}