Skip to content

Commit

Permalink
Merge pull request #74 from andrjohns/feature/copula_refactor
Browse files Browse the repository at this point in the history
Mixed Copula Refactor
  • Loading branch information
spinkney authored Mar 12, 2022
2 parents 76f94ce + 119d9a4 commit 2c06a47
Show file tree
Hide file tree
Showing 6 changed files with 327 additions and 479 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
library(cmdstanr)
# setwd("~\helpful_stan_functions")
fp <- file.path("./examples/copula/stan/mixed_copula_example.stan")
mod <- cmdstan_model(fp, include_paths = c("./functions/copula",
"./functions/distribution"))
fp <- file.path("./examples/copula/stan/centered_gaussian_copula_example.stan")
mod <- cmdstan_model(fp, include_paths = c("./functions",
"./functions/copula",
"./functions/distribution"))

## obtain non-extreme random correlation matrix
Gamma = matrix(1, 3, 3)
Expand Down Expand Up @@ -37,27 +38,27 @@ family.list = list(
)

#' Joint analysis of multivariate GLMs of mixed types
#'
#'
#' Uses rstan to estimate a multivariate GLM with correlated
#' responses via Gaussian copula
#'
#'
#' @param formula.list a list of formulas
#' @param family.list a list of families giving the density of each formula. Canonical link functions always used.
#' @param data data.frame giving all variables in `formula.list`
#' @param ... arguments to pass onto `rstan::sampling`
#'
#'
#' @return rstan object fitting the posterior
get_data = function(formula.list, family.list, data) {

N = nrow(data)
J = length(formula.list)

## reorder variables if necessary
n.indx = sapply(family.list, function(x) x$family) == 'gaussian'
b.indx = sapply(family.list, function(x) x$family) == 'binomial'
p.indx = sapply(family.list, function(x) x$family) == 'poisson'
new.order = c(which(n.indx), which(b.indx), which(p.indx))

## gtotal number of endpoints and matrix of each outcome type
Jn = sum(n.indx)
Jb = sum(b.indx)
Expand All @@ -66,26 +67,26 @@ get_data = function(formula.list, family.list, data) {
Yn = Y[, n.indx, drop = F]
Yb = Y[, b.indx, drop = F]
Yp = Y[, p.indx, drop = F]

## store character vector giving dist name and number so results make sense
distnames = c( paste0('gaussian[', 1:Jn, ']'),
paste0('binomial[', 1:Jb, ']'),
paste0('poisson[', 1:Jp, ']')
)


if ( !all( new.order == 1:J ) ) {
message("Rearranging order of dependent variables to normal, binomial, poisson")
formula.list = formula.list[new.order]
family.list = family.list[new.order]
}

## get design matrices and number of covariates per regression
Xlist = lapply(formula.list, model.matrix)
Xbig = do.call(cbind, Xlist)
K = ncol(Xbig)
Kj = sapply(Xlist, ncol)

## obtain matrix giving start and end values of x matrix per endpoint
Xindx = matrix(nrow = J, ncol = 2)
xstart = 1
Expand All @@ -94,7 +95,7 @@ get_data = function(formula.list, family.list, data) {
Xindx[j, ] = c(xstart, xend)
xstart = xend + 1
}

## construct stan data
standat = list(
'N' = N,
Expand All @@ -110,27 +111,9 @@ get_data = function(formula.list, family.list, data) {
'Xindx' = Xindx,
'Kj' = Kj
)

## fit stan model
# fit = rstan::sampling(stanmod, data = standat, ...)
# fit = stanmod$sample(data = standat,
# parallel_chains = 4,
# iter_warmup = 1000,
# iter_sampling = 1000)
# ## rename betas for clarity about which equation each comes from, e.g., beta[j][0:(Kj-1)]
# beta.old.indx = which(grepl('beta', names(fit)))
# beta.new = character()
# for( j in 1:J ) {
# beta.new = c(beta.new, paste0(distnames[j], '[', seq(0,Kj[j] - 1), ']') )
# }
# names(fit)[beta.old.indx] = beta.new
#
## return stan object
return(standat)
}



stan_data <- get_data(formula.list[1:3], family.list[1:3], data)
stan_data[["J"]] <- rep(1, 3)

Expand All @@ -140,9 +123,8 @@ stan_data[["special"]] <- 1
# stan_data[["special"]] <- 0
mod_out <- mod$sample(data = stan_data,
seed = 908453,
# init = 0 ,
init = 0 ,
chains = 2,
parallel_chains = 2,
iter_warmup = 400,
iter_sampling = 400)

57 changes: 57 additions & 0 deletions examples/copula/stan/centered_gaussian_copula_example.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
functions {
#include centered_gaussian_copula.stanfunctions
}
data {
int<lower=0> N; // number of observations
array[3] int<lower=0> J; // total number of outcomes
int<lower=0> K; // number of covariates for concatenated design matrices (X1, .., XK)
matrix[N, J[1]] Yn; // normal outcomes
array[N, J[2]] int<lower=0, upper=1> Yb; // bernoulli outcomes
array[N, J[3]] int<lower=0> Yp; // poisson outcomes
matrix[N, K] X; // concatenated design matrices (X1, ..., XK)
array[3] int<lower=0> Kj; // J-dim integer array giving how many covariates per outcome
int<lower=0> special;
}
transformed data {
int J_all = sum(J);
// Create separate design matrices for each outcome type
matrix[N, Kj[1]] Xn = X[ : , 1:Kj[1]];
matrix[N, Kj[2]] Xb = X[ : , (Kj[1] + 1):(Kj[1] + Kj[2])];
matrix[N, Kj[3]] Xp = X[ : , (Kj[2] + 1):(Kj[2] + Kj[3])];
vector[J_all] mu_zero = rep_vector(0, J_all);
}
parameters {
vector[Kj[1]] beta_n; // Vector of normal regression coefficients
vector[Kj[2]] beta_b; // Vector of bernoulli regression coefficients
vector[Kj[3]] beta_p; // Vector of poisson regression coefficients
cholesky_factor_corr[J_all] L; // Cholesky decomposition of JxJ correlation matrix
vector<lower=0>[J[1]] sigmasq; // Jn-dim vector of variances (may be 0-dim)
matrix<lower=0, upper=1>[N, J[2]] uraw_b; // latent variables for bernoulli outcomes
matrix<lower=0, upper=1>[N, J[3]] uraw_p; // latent variables for Poisson outcomes
}
transformed parameters {
}
model {
// initialize variables
vector[J[1]] sigma = sqrt(sigmasq); // stdev
// Calculate the means for each regression
matrix[N, J[1]] mu_n = Xn * rep_matrix(beta_n, J[1]);
matrix[N, J[2]] mu_b = Xb * rep_matrix(beta_b, J[2]);
matrix[N, J[3]] mu_p = Xp * rep_matrix(beta_p, J[3]);

sigmasq ~ inv_gamma(1.0e-4, 1.0e-4);
// priors for regression coefficients
beta_n ~ normal(0.0, 10.0);
beta_b ~ normal(0.0, 10.0);
beta_p ~ normal(0.0, 10.0);
L ~ lkj_corr_cholesky(1.0);

// Increment LL for copula
{ normal_marginal(Yn, mu_n, sigma),
bernoulli_marginal(Yb, mu_b, uraw_b),
poisson_marginal(Yp, mu_p, uraw_p) } ~ centered_gaussian_copula_cholesky(L);

}
generated quantities {
corr_matrix[J_all] Gamma = multiply_lower_tri_self_transpose(L);
}
66 changes: 0 additions & 66 deletions examples/copula/stan/mixed_copula_example.stan

This file was deleted.

Loading

0 comments on commit 2c06a47

Please sign in to comment.