diff --git a/src/stan/variational/families/normal_lowrank.hpp b/src/stan/variational/families/normal_lowrank.hpp new file mode 100644 index 00000000000..3ddbfd0074c --- /dev/null +++ b/src/stan/variational/families/normal_lowrank.hpp @@ -0,0 +1,213 @@ +#ifndef STAN_VARIATIONAL_NORMAL_LOWRANK_HPP +#define STAN_VARIATIONAL_NORMAL_LOWRANK_HPP + +#include +#include +#include +#include +#include +#include +#include + +namespace stan { + namespace variational { + class normal_lowrank : public base_family { + private: + Eigen::VectorXd mu_; + Eigen::MatrixXd B_; + Eigen::VectorXd d_; + + const int dimension_; + const int rank_; + + void validate_mean(const char* function, + const Eigen::VectorXd& mu) { + stan::math::check_not_nan(function, "Mean vector", mu); + stan::math::check_size_match(function, + "Dimension of input vector", mu.size(), + "Dimension of current vector", dimension()); + } + + void validate_factor(const char* function, + const Eigen::MatrixXd& B) { + stan::math::check_not_nan(function, "Low rank factor", B); + stan::math::check_size_match(function, + "Dimension of factor", B.rows(), + "Dimension of approximation", dimension()); + stan::math::check_size_match(function, + "Rank of factor", B.cols(), + "Rank of approximation", rank()); + } + + void validate_noise(const char *function, + const Eigen::VectorXd& d) { + stan::math::check_not_nan(function, "Noise vector", d); + stan::math::check_size_match(function, + "Dimension of noise vector", d.size(), + "Dimension of approximation", dimension()); + } + + public: + explicit normal_lowrank(size_t dimension, size_t rank) + : mu_(Eigen::VectorXd::Zero(dimension)), + B_(Eigen::MatrixXd::Zero(dimension, rank)), + d_(Eigen::VectorXd::Zero(dimension)), + dimension_(dimension), + rank_(rank) { + } + + explicit normal_lowrank(const Eigen::VectorXd& mu, + const Eigen::MatrixXd& B, + const Eigen::VectorXd& d) + : mu_(mu), B_(B), d_(d), dimension_(mu.size()), rank_(B.cols()) { + static const char* function = "stan::variational::normal_lowrank"; + validate_mean(function, mu); + validate_factor(function, B); + validate_noise(function, d); + } + + int dimension() const { return dimension_; } + int rank() const { return rank_; } + + const Eigen::VectorXd& mean() const { return mu(); } + const Eigen::VectorXd& mu() const { return mu_; } + const Eigen::MatrixXd& B() const { return B_; } + const Eigen::VectorXd& d() const { return d_; } + + void set_mu(const Eigen::VectorXd& mu) { + static const char* function = "stan::variational::set_mu"; + validate_mean(function, mu); + mu_ = mu; + } + + void set_B(const Eigen::MatrixXd& B) { + static const char* function = "stan::variational::set_B"; + validate_factor(function, B); + B_ = B; + } + + void set_d(const Eigen::VectorXd& d) { + static const char* function = "stan::variational::set_d"; + validate_noise(function, d); + d_ = d; + } + + void set_to_zero() { + mu_ = Eigen::VectorXd::Zero(dimension()); + B_ = Eigen::MatrixXd::Zero(dimension(), rank()); + d_ = Eigen::VectorXd::Zero(dimension()); + } + + double entropy() const { + static int r = rank(); + static double mult = 0.5 * (1.0 + stan::math::LOG_TWO_PI); + double result = mult * dimension(); + result += 0.5 * log((Eigen::MatrixXd::Identity(r, r) + + B_.transpose() * + d_.array().square().matrix().asDiagonal().inverse() * + B_).determinant()); + for (int d = 0; d < dimension(); ++d) { + result += log(d_(d)); + } + return result; + } + + Eigen::VectorXd transform(const Eigen::VectorXd& eta) const { + static const char* function = + "stan::variational::normal_lowrank::transform"; + stan::math::check_size_match(function, + "Dimension of input vector", eta.size(), + "Sum of dimension and rank", dimension() + rank()); + stan::math::check_not_nan(function, "Input vector", eta); + Eigen::VectorXd z = eta.head(rank()); + Eigen::VectorXd eps = eta.tail(dimension()); + return (d_.cwiseProduct(eps)) + (B_ * z) + mu_; + } + + template + void calc_grad(normal_lowrank& elbo_grad, + M& m, + Eigen::VectorXd& cont_params, + int n_monte_carlo_grad, + BaseRNG& rng, + callbacks::logger& logger) const { + static const char* function = + "stan::variational::normal_lowrank::calc_grad"; + + Eigen::VectorXd mu_grad = Eigen::VectorXd::Zero(dimension()); + Eigen::MatrixXd B_grad = Eigen::MatrixXd::Zero(dimension(), rank()); + Eigen::VectorXd d_grad = Eigen::VectorXd::Zero(dimension()); + + double tmp_lp = 0.0; + Eigen::VectorXd tmp_mu_grad = Eigen::VectorXd::Zero(dimension()); + Eigen::VectorXd eta = Eigen::VectorXd::Zero(dimension() + rank()); + Eigen::VectorXd zeta = Eigen::VectorXd::Zero(dimension()); + + Eigen::MatrixXd inv_noise = + d_.array().square().matrix().asDiagonal().inverse(); + Eigen::VectorXd var_grad_left = inv_noise - inv_noise * B_ * + (Eigen::MatrixXd::Identity(rank(), rank()) + + B_.transpose() * inv_noise * B_).inverse() + * B_.transpose() * inv_noise; + Eigen::VectorXd tmp_var_grad_left = Eigen::VectorXd::Zero(dimension()); + + // Naive Monte Carlo integration + static const int n_retries = 10; + for (int i = 0, n_monte_carlo_drop = 0; i < n_monte_carlo_grad; ) { + // Draw from standard normal and transform to real-coordinate space + for (int d = 0; d < dimension() + rank(); ++d) { + eta(d) = stan::math::normal_rng(0, 1, rng); + } + Eigen::VectorXd z = eta.head(rank()); + Eigen::VectorXd eps = eta.tail(dimension()); + zeta = transform(eta); + try { + std::stringstream ss; + stan::model::gradient(m, zeta, tmp_lp, tmp_mu_grad, &ss); + if (ss.str().length() > 0) + logger.info(ss); + stan::math::check_finite(function, "Gradient of mu", tmp_mu_grad); + + mu_grad += tmp_mu_grad; + tmp_var_grad_left = tmp_mu_grad + var_grad_left * (zeta - mu); + for (int ii = 0; ii < dimension(); ++ii) { + for (int jj = 0; jj <= ii && jj < rank(); ++jj) { + B_grad(ii, jj) += tmp_var_grad_left(ii) * z(jj); + } + d_grad(ii) += tmp_var_grad_left(ii) * eps(ii); + } + ++i; + } catch (const std::exception& e) { + ++n_monte_carlo_drop; + if (n_monte_carlo_drop >= n_retries * n_monte_carlo_grad) { + const char* name = "The number of dropped evaluations"; + const char* msg1 = "has reached its maximum amount ("; + int y = n_retries * n_monte_carlo_grad; + const char* msg2 = "). Your model may be either severely " + "ill-conditioned or misspecified."; + stan::math::domain_error(function, name, y, msg1, msg2); + } + } + } + mu_grad /= static_cast(n_monte_carlo_grad); + B_grad /= static_cast(n_monte_carlo_grad); + d_grad /= static_cast(n_monte_carlo_grad); + + elbo_grad.set_mu(mu_grad); + elbo_grad.set_B(B_grad); + elbo_grad.set_d(d_grad); + } + + double calc_log_g(const Eigen::VectorXd& eta) const { + double log_g = 0; + for (int d = 0; d < rank() + dimension(); ++d) { + log_g += -stan::math::square(eta(d)) * 0.5; + } + return log_g; + } + }; + } +} + +#endif + diff --git a/src/test/unit/variational/families/normal_lowrank_test.cpp b/src/test/unit/variational/families/normal_lowrank_test.cpp new file mode 100644 index 00000000000..c255f1fff34 --- /dev/null +++ b/src/test/unit/variational/families/normal_lowrank_test.cpp @@ -0,0 +1,196 @@ +#include +#include +#include +#include + +TEST(normal_lowrank_test, zero_init) { + int my_dimension = 10; + int my_rank = 3; + + stan::variational::normal_lowrank my_normal_lowrank(my_dimension, my_rank); + EXPECT_FLOAT_EQ(my_dimension, my_normal_lowrank.dimension()); + EXPECT_FLOAT_EQ(my_rank, my_normal_lowrank.rank()); + + const Eigen::VectorXd& mu_out = my_normal_lowrank.mu(); + const Eigen::MatrixXd& B_out = my_normal_lowrank.B(); + const Eigen::MatrixXd& d_out = my_normal_lowrank.d(); + + for (int i = 0; i < my_dimension; ++i) { + EXPECT_FLOAT_EQ(0.0, mu_out(i)); + EXPECT_FLOAT_EQ(0.0, d_out(i)); + for (int j = 0; j < my_rank; ++j) { + EXPECT_FLOAT_EQ(0.0, B_out(i,j)); + } + } +} + +TEST(normal_lowrank_test, dimension_and_rank) { + Eigen::VectorXd mu(4); + mu << 5.7, -3.2, 0.1332, -1.87; + + Eigen::MatrixXd B(4, 3); + B << 1.3, 0, 0, + 2.3, 41, 0, + 3.3, 42, 92, + 4.3, 43, 93; + + Eigen::VectorXd d(4); + d << 0.63, 0.94, 1.32, 1.18; + + stan::variational::normal_lowrank my_normal_lowrank(mu, B, d); + + EXPECT_FLOAT_EQ(mu.size(), my_normal_lowrank.dimension()); + EXPECT_FLOAT_EQ(B.cols(), my_normal_lowrank.rank()); +} + +TEST(normal_lowrank_test, mean_vector) { + Eigen::Vector3d mu; + mu << 5.7, -3.2, 0.1332; + + Eigen::MatrixXd B(3, 2); + B << 1.3, 0, + 2.3, 41, + 3.3, 42; + + Eigen::Vector3d d; + d << 0.63, 0.94, 1.32; + + stan::variational::normal_lowrank my_normal_lowrank(mu, B, d); + + const Eigen::Vector3d& mu_out = my_normal_lowrank.mu(); + + for (int i = 0; i < my_normal_lowrank.dimension(); ++i) + EXPECT_FLOAT_EQ(mu(i), mu_out(i)); + + double nan = std::numeric_limits::quiet_NaN(); + Eigen::Vector3d mu_nan = Eigen::VectorXd::Constant(3, nan); + + EXPECT_THROW(stan::variational::normal_lowrank my_normal_lowrank_nan(mu_nan, B, d);, + std::domain_error); + EXPECT_THROW(my_normal_lowrank.set_mu(mu_nan);, + std::domain_error); + Eigen::MatrixXd B_nan = Eigen::MatrixXd::Constant(3,3,nan); + EXPECT_THROW(stan::variational::normal_lowrank my_normal_lowrank_nan(mu, B_nan, d);, + std::domain_error); + + my_normal_lowrank.set_to_zero(); + const Eigen::Vector3d& mu_out_zero = my_normal_lowrank.mu(); + + for (int i = 0; i < my_normal_lowrank.dimension(); ++i) { + EXPECT_FLOAT_EQ(0.0, mu_out_zero(i)); + } +} + +TEST(normal_lowrank_test, lowrank_factor) { + Eigen::Vector3d mu; + mu << 5.7, -3.2, 0.1332; + + Eigen::MatrixXd B(3, 2); + B << 1.3, 0, + 2.3, 41, + 3.3, 42; + + Eigen::Vector3d d; + d << 0.63, 0.94, 1.32; + + stan::variational::normal_lowrank my_normal_lowrank(mu, B, d); + + const Eigen::MatrixXd& B_out = my_normal_lowrank.B(); + + for (int j = 0, nRows = B.rows(), nCols = B.cols(); j < nCols; ++j) { + for (int i = 0; i < nRows; ++i) { + EXPECT_FLOAT_EQ(B(i, j), B_out(i, j)); + } + } + + double nan = std::numeric_limits::quiet_NaN(); + Eigen::MatrixXd B_nan = Eigen::MatrixXd::Constant(3,2,nan); + EXPECT_THROW(my_normal_lowrank.set_B(B_nan);, + std::domain_error); + my_normal_lowrank.set_to_zero(); + const Eigen::MatrixXd& B_out_zero = my_normal_lowrank.B(); + + for (int i = 0; i < my_normal_lowrank.dimension(); ++i) { + for (int j = 0; j < my_normal_lowrank.rank(); ++j) { + EXPECT_FLOAT_EQ(0.0, B_out_zero(i,j)); + } + } +} + +TEST(normal_lowrank_test, entropy) { + Eigen::Vector3d mu; + mu << 5.7, -3.2, 0.1332; + + Eigen::MatrixXd B(3,2); + B << 1.3, 0, + 2.3, 41, + 3.3, 42; + + Eigen::Vector3d d; + d << 0.63, 0.94, 1.32; + + stan::variational::normal_lowrank my_normal_lowrank(mu, B, d); + + double entropy_true = 8.86050306582748; + + const double entropy_out = my_normal_lowrank.entropy(); + + EXPECT_FLOAT_EQ(entropy_out, entropy_true); +} + +TEST(normal_lowrank_test, transform) { + Eigen::Vector3d mu; + mu << 5.7, -3.2, 0.1332; + + Eigen::MatrixXd B(3,2); + B << 1.3, 0, + 2.3, 41, + 3.3, 42; + + Eigen::Vector3d d; + d << 0.63, 0.94, 0.1332; + + Eigen::VectorXd x(5); + x << 7.1, -9.2, 0.59, 0.24, -1.92; + + Eigen::Vector3d x_transformed; + x_transformed << 15.3017, -363.8444, -363.092544; + + stan::variational::normal_lowrank my_normal_lowrank(mu, B, d); + + Eigen::Vector3d x_result; + x_result = my_normal_lowrank.transform(x); + + for (int i = 0; i < my_normal_lowrank.dimension(); ++i) + EXPECT_FLOAT_EQ(x_result(i), x_transformed(i)); + + double nan = std::numeric_limits::quiet_NaN(); + Eigen::VectorXd x_nan = Eigen::VectorXd::Constant(5, nan); + EXPECT_THROW(my_normal_lowrank.transform(x_nan);, + std::domain_error); +} + +TEST(normal_lowrank_test, calc_log_g) { + Eigen::VectorXd x(5); + x << 7.1, -9.2, 0.59, 0.24, -1.92; + + Eigen::Vector3d mu; + mu << 5.7, -3.2, 0.1332; + + Eigen::MatrixXd B(3,2); + B << 1.3, 0, + 2.3, 41, + 3.3, 42; + + Eigen::Vector3d d; + d << 0.63, 0.94, 0.1332; + + stan::variational::normal_lowrank my_normal_lowrank(mu, B, d); + + double log_g_true = -69.57104999999999; + + const double log_g_out = my_normal_lowrank.calc_log_g(x); + + EXPECT_FLOAT_EQ(log_g_out, log_g_true); +} +