From 29ea9c02009ca8051965648aa2d5f8ee1b1c7e5f Mon Sep 17 00:00:00 2001 From: "@stangandaho" Date: Tue, 7 May 2024 01:13:11 +0100 Subject: [PATCH] Coverage --- R/auxiliary_functions.R | 369 ------------------ R/auxiliary_graf.R | 838 ---------------------------------------- 2 files changed, 1207 deletions(-) diff --git a/R/auxiliary_functions.R b/R/auxiliary_functions.R index 26063f1..033189f 100644 --- a/R/auxiliary_functions.R +++ b/R/auxiliary_functions.R @@ -5,195 +5,6 @@ ## %######################################################%## -#' pre_tr_te -#' -#' @noRd -pre_tr_te <- function(data, p_names, h) { - train <- list() - test <- list() - - if (any(c("train", "train-test", "test") - %in% - unique(data[, p_names[h]]))) { - np2 <- 1 - - filt <- grepl("train", data[, p_names[h]]) - train[[1]] <- data[filt, ] %>% - dplyr::select(-p_names[!p_names == p_names[h]]) - - filt <- grepl("test", data[, p_names[h]]) - test[[1]] <- data[filt, ] %>% - dplyr::select(-p_names[!p_names == p_names[h]]) - } else { - np2 <- max(data[p_names[h]]) - - for (i in 1:np2) { - train[[i]] <- data[data[p_names[h]] != i, ] %>% - dplyr::select(-p_names[!p_names == p_names[h]]) - - test[[i]] <- data[data[p_names[h]] == i, ] %>% - dplyr::select(-p_names[!p_names == p_names[h]]) - } - } - return(list(train = train, test = test, np2 = np2)) -} - - - -# Inverse bioclim -#' -#' @noRd -#' -bio <- function(data, env_layer) { - . <- NULL - if (class(data)[1] != "data.frame") { - data <- data.frame(data) - } - if (!methods::is(env_layer, "SpatRaster")) { - env_layer <- terra::rast(env_layer) - } - - data <- na.omit(data) - - result <- env_layer[[1]] - result[] <- NA - - minv <- apply(data, 2, min) - maxv <- apply(data, 2, max) - vnames <- names(data) - - data_2 <- data %>% - na.omit() %>% - apply(., 2, sort) %>% - data.frame() - - rnk <- function(x, y) { - b <- apply(y, 1, FUN = function(z) sum(x < z)) - t <- apply(y, 1, FUN = function(z) sum(x == z)) - r <- (b + 0.5 * t) / length(x) - i <- which(r > 0.5) - r[i] <- 1 - r[i] - r * 2 - } - - var_df <- terra::as.data.frame(env_layer) - var_df <- na.omit(var_df) - - k <- (apply(t(var_df) >= minv, 2, all) & - apply(t(var_df) <= maxv, 2, all)) - - for (j in vnames) { - var_df[k, j] <- rnk( - data_2[, j], - var_df[k, j, drop = FALSE] - ) - } - var_df[!k, ] <- 0 - res <- apply(var_df, 1, min) - result[as.numeric(names(res))] <- res - return(result) -} - -inv_bio <- function(e, p) { - if (!methods::is(e, "SpatRaster")) { - e <- terra::rast(e) - } - r <- bio(data = terra::extract(e, p)[-1], env_layer = e) - r <- (r - terra::minmax(r)[1]) / - (terra::minmax(r)[2] - terra::minmax(r)[1]) - r <- r <= 0.01 # environmental constrain - r[which(r[, ] == FALSE)] <- NA - return(r) -} - - -#' Inverse geo -#' -#' @noRd -#' -inv_geo <- function(e, p, d) { - colnames(p) <- c("x", "y") - p <- terra::vect(p, geom = c("x", "y"), crs = terra::crs(e)) - b <- terra::buffer(p, width = d) - b <- terra::rasterize(b, e, background = 0) - e <- terra::mask(e, b, maskvalues = 1) - return(e) -} - -#' Boyce -#' -#' @description This function calculate Boyce index performance metric. Codes were adapted from -#' enmSdm package. -#' -#' @noRd -boyce <- function(pres, - contrast, - n_bins = 101, - n_width = 0.1) { - lowest <- min(c(pres, contrast), na.rm = TRUE) - highest <- max(c(pres, contrast), na.rm = TRUE) + .Machine$double.eps - window_width <- n_width * (highest - lowest) - - lows <- seq(lowest, highest - window_width, length.out = n_bins) - highs <- seq(lowest + window_width + .Machine$double.eps, highest, length.out = n_bins) - - ## initiate variables to store predicted/expected (P/E) values - freq_pres <- NA - freq_contrast <- NA - - # tally proportion of test presences/background in each class - for (i in 1:n_bins) { - # number of presence predictions in a class - freq_pres[i] <- - sum(pres >= lows[i] & pres < highs[i], na.rm = TRUE) - - # number of background predictions in this class - freq_contrast[i] <- - sum(contrast >= lows[i] & contrast < highs[i], na.rm = TRUE) - } - - # mean bin prediction - mean_pred <- rowMeans(cbind(lows, highs)) - - # add small number to each bin that has 0 background frequency but does have a presence frequency > 0 - if (any(freq_pres > 0 & freq_contrast == 0)) { - small_value <- 0.5 - freq_contrast[freq_pres > 0 & freq_contrast == 0] <- small_value - } - - # remove classes with 0 presence frequency - if (any(freq_pres == 0)) { - zeros <- which(freq_pres == 0) - mean_pred[zeros] <- NA - freq_pres[zeros] <- NA - freq_contrast[zeros] <- NA - } - - # remove classes with 0 background frequency - if (any(0 %in% freq_contrast)) { - zeros <- which(freq_pres == 0) - mean_pred[zeros] <- NA - freq_pres[zeros] <- NA - freq_contrast[zeros] <- NA - } - - P <- freq_pres / length(pres) - E <- freq_contrast / length(contrast) - PE <- P / E - - # remove NAs - rm_nas <- stats::complete.cases(data.frame(mean_pred, PE)) - # mean_pred <- mean_pred[rm_nas] - # PE <- PE[rm_nas] - - # calculate Boyce index - result <- stats::cor( - x = ifelse(is.na(mean_pred), 0, mean_pred), - y = ifelse(is.na(PE), 0, PE), method = "spearman" - ) - return(result) -} - # maxnet:::predict.maxnet() #' Predict maxnet #' @importFrom stats model.matrix @@ -243,183 +54,3 @@ predict_maxnet <- function(object, newdata, clamp = TRUE, type = c("link", "expo return(1/(1 + exp(-object$entropy - link))) } } - -#' Outliers with Reverse Jackknife -#' -#' @noRd -#' -rev_jack <- function(v) { - v2 <- v - v <- unique(v) - lgh <- length(v) - 1 - t1 <- (0.95 * sqrt(length(v))) + 0.2 - x <- sort(v) - y <- rep(0, lgh) - for (i in seq_len(lgh)) { - x1 <- x[i + 1] - if (x[i] < mean(v)) { - y[i] <- (x1 - x[i]) * (mean(v) - x[i]) - } else { - y[i] <- (x1 - x[i]) * (x1 - mean(v)) - } - } - my <- mean(y) - z <- y / (sqrt(sum((y - my)^2) / lgh)) - out <- rep(0, length(v2)) - if (any(z > t1)) { - f <- which(z > t1) - v <- x[f] - if (v < median(x)) { - xa <- (v2 <= v) * 1 - out <- out + xa - } - if (v > median(x)) { - xb <- (v2 >= v) * 1 - out <- out + xb - } - } else { - out <- out - } - return(which(out == 1)) -} - -#' Calculate amount of data for each training dataset in a given partition -#' -#' @noRd -#' -n_training <- function(data, partition) { - . <- partt <- NULL - if (any(c("train", "train-test", "test") - %in% - (data %>% - dplyr::select(dplyr::starts_with({{partition}})) %>% - dplyr::pull() %>% - unique()))) { - nn_part <- data %>% - dplyr::select(dplyr::starts_with({{partition}})) %>% - apply(., 2, table) %>% - data.frame() - nn_part <- nn_part %>% dplyr::mutate(partt = rownames(nn_part)) - nn_part$partt[grepl("train", nn_part$partt)] <- "train" - nn_part <- nn_part %>% - dplyr::filter(partt == "train") %>% - dplyr::select(-partt) - nn_part <- colSums(nn_part) - } else { - data <- data %>% - dplyr::select(dplyr::starts_with({{partition}})) - - nn_part <- list() - for(ppp in 1:ncol(data)){ - nn_part[[ppp]] <- data %>% dplyr::pull(ppp) %>% table() %>% c() - sm <- nn_part[[ppp]] %>% sum() - nn_part[[ppp]] <- sapply(nn_part[[ppp]], function(x) sum(sm-x)) - } - nn_part <- unlist(nn_part) - } - return(nn_part) -} - -#' Calculate number of coefficient for gam models -#' -#' @noRd -#' -n_coefficients <- function(data, predictors, predictors_f = NULL, k = 10){ - data <- data.frame(data) - if(k<0){ - k=10 - } - if(!is.null(predictors_f)){ - n_levels <- rep(NA, length(predictors_f)) - for(fff in 1:length(predictors_f)){ - n_levels[fff] <- unique(data[, predictors_f]) %>% na.omit() %>% length() - } - n_levels <- sum(n_levels) - }else{ - n_levels <- 0 - } - n <- (k - 1) * length(predictors) + n_levels - return(n) -} - -#' Euclidean distance for extrapolation -#' -#' @noRd -#' -euc_dist <- function(x, y) { - if (!methods::is(x, "matrix")) { - x <- as.matrix(x) - } - if (!methods::is(y, "matrix")) { - y <- as.matrix(y) - } - result <- matrix(0, nrow = nrow(x), ncol = nrow(y)) - for (ii in 1:nrow(y)) { - result[, ii] <- sqrt(colSums((t(x) - y[ii, ]) ^ 2)) - } - rownames(result) <- rownames(x) - colnames(result) <- rownames(y) - return(result) -} - -#' Moran I, based on ape package -#' -#' @noRd -#' -morani <- function(x, weight, na.rm = FALSE, scaled = TRUE) { - if (dim(weight)[1] != dim(weight)[2]) { - stop("'weight' must be a square matrix") - } - n <- length(x) - if (dim(weight)[1] != n) { - stop("'weight' must have as many rows as observations in 'x'") - } - ei <- -1 / (n - 1) - nas <- is.na(x) - if (any(nas)) { - if (na.rm) { - x <- x[!nas] - n <- length(x) - weight <- weight[!nas, !nas] - } else { - warning("'x' has missing values: maybe you wanted to set na.rm = TRUE?") - return(NA) - } - } - rs <- rowSums(weight) - rs[rs == 0] <- 1 - weight <- weight / rs - s <- sum(weight) - m <- mean(x) - y <- x - m - cv <- sum(weight * y %o% y) - v <- sum(y^2) - res <- (n / s) * (cv / v) - if (scaled) { - imax <- (n / s) * (sd(rowSums(weight) * y) / sqrt(v / (n - 1))) - res <- res / imax - } - - return(res) -} - -#' Mahalanobis distance -#' -#' @noRd -#' -mah_dist <- function(x, y, cov) { - if (!methods::is(x, "matrix")) { - x <- as.matrix(x) - } - if (!methods::is(y, "matrix")) { - y <- as.matrix(y) - } - result <- matrix(0, nrow = nrow(x), ncol = nrow(y)) - for (ii in 1:nrow(y)) { - # root square of squared Mahalanobis distance - result[, ii] <- sqrt(mahalanobis(x = x, center = y[ii, ], cov = cov)) - } - rownames(result) <- rownames(x) - colnames(result) <- rownames(y) - return(result) -} diff --git a/R/auxiliary_graf.R b/R/auxiliary_graf.R index 57eb62a..7c637b6 100644 --- a/R/auxiliary_graf.R +++ b/R/auxiliary_graf.R @@ -11,844 +11,6 @@ # https://doi.org/10.1111/2041-210X.12523 -# density and gradient of the prior over the log hyperparameters -theta.prior <- function(theta, pars) { - if (any(is.na(pars))) { - return(list( - density = 0, - gradient = rep(0, length(theta)) - )) - } - - density <- sum(stats::dnorm(theta, pars[1], pars[2], log = TRUE)) - - gradient <- (pars[1] - theta) / pars[2]^2 - - return(list( - density = density, - gradient = gradient - )) -} - -# psiline -psiline <- - function(s, adiff, a, K, y, d0, mn = 0, wt) { - a <- a + s * as.vector(adiff) - f <- K %*% a + mn - psi(a, f, mn, y, d0, wt) - } - -# psi -psi <- - function(a, f, mn, y, d0, wt) { - 0.5 * t(a) %*% (f - mn) - sum(wt * d0(f, y)) - } - - -# moments of the standard normal distribution for EP on the probit GP -ep.moments <- function(y, sigma2, mu) { - - # 1 + sigma^{2} - sigma2p1 <- 1 + sigma2 - - # get the likelihood - z <- y * (mu / sqrt(sigma2p1)) - - # get cdf and pdf of the likelihood - cdf_z <- stats::pnorm(z) - pdf_z <- stats::dnorm(z) - - # new \hat{\mu} (mean; second moment) - muhat <- mu + (y * sigma2 * pdf_z) / cdf_z - - # new \hat{\sigma^{2}} (variance; third moment) - sigma2hat <- sigma2 - - (sigma2^2 * pdf_z) / - (sigma2p1 * cdf_z) * - (z + pdf_z / cdf_z) - - # log of the 0th moment (cdf) - logM0 <- log(cdf_z) - - # return the three elements - return(list( - logM0 = logM0, - muhat = muhat, - sigma2hat = sigma2hat - )) -} - -# update.ep -update.ep <- - function(i, y, mn, lis) { - # update at index \code{i} for the EP approximation given \code{y} - # (on +1, -1 scale), \code{mn} (on the Gaussian scale) and the current - # list of parameters \code{lis}. - - Sigma <- lis$Sigma - ttau <- lis$ttau - tnu <- lis$tnu - mu <- lis$mu - - # calculate approximate cavity parameters \nu_{-i} and \tau_{-i} - # \sigma^{2}_i = \Sigma_{i, i} - # \tau_{-i} = \sigma^{-2}_i - \tilde{\tau}_i - # \nu_{-i} = \mu_i / \sigma^{2}_i + m_i * \tau_{-i} - \tilde{\nu}_i - sig2_i <- Sigma[i, i] - tau_ni <- 1 / sig2_i - ttau[i] - nu_ni <- mu[i] / sig2_i + mn[i] * tau_ni - tnu[i] - - # compute marginal moments \hat{\mu}_i and \hat{\sigma}_i^2 - # calculate required derivatives of individual log partition function - z <- nu_ni / tau_ni / (sqrt(1 + 1 / tau_ni)) - yz <- y[i] * z - lZ <- stats::pnorm(yz, log.p = TRUE) - n_p <- stats::dnorm(yz) / exp(lZ) - dlZ <- y[i] * n_p / sqrt(1 + 1 / tau_ni) - d2lZ <- -n_p * (yz + n_p) / (1 + 1 / tau_ni) - - # save old \tilde{\tau} before finding new one - ttau_old <- ttau[i] - - # update \tilde{\tau}_i, forcing it non-negative - # then update \tilde{\nu}_i - ttau[i] <- max(-d2lZ / (1 + d2lZ / tau_ni), 0) - tnu[i] <- (dlZ + (mn[i] - nu_ni / tau_ni) * d2lZ) / (1 + d2lZ / tau_ni) - - # rank-1 update \Sigma and \mathbf{\mu} - # \delta\sigma^2 - ds2 <- ttau[i] - ttau_old - # get column i - # si <- Sigma[, i] - # recompute\Sigma \& \mu - lis <- update.sigma.mu(Sigma, ds2, i, tnu) - - # return update list of parameters - return(list( - Sigma = lis$Sigma, - ttau = ttau, - tnu = tnu, - mu = lis$mu - )) - } - -# update.sigma.mu -update.sigma.mu <- - function(Sigma, ds2, i, tnu) { - # this takes 70% of total time in gpml Matlab code - # tried re-coding in C++, but all the cost is the matrx algebra, - # so little improvement. The approach is inherently slow. - - # get column i - si <- Sigma[, i] - # recalculate Sigma - Sigma <- Sigma - ds2 / (1 + ds2 * si[i]) * si %*% t(si) - # and mu - mu <- Sigma %*% tnu - - return(list( - Sigma = Sigma, - mu = mu - )) - } - -# cov.SE -cov.SE <- function(x1, x2 = NULL, e1 = NULL, e2 = NULL, l) { - n1 <- nrow(x1) - n2 <- ifelse(is.null(x2), n1, nrow(x2)) - n3 <- ncol(x1) - - # distance matrices - if (is.null(x2)) { - e2 <- e1 - # if no second matrix do with distance matrices for speed up - dists <- lapply(1:n3, function(i, x) dist(x[, i])^2, x1) - } else { - dists <- list() - for (i in 1:n3) { - dists[[i]] <- x1[, i]^2 %*% t(rep(1, n2)) + - rep(1, n1) %*% t(x2[, i]^2) - 2 * x1[, i] %*% t(x2[, i]) - } - } - - # with error matrices - if (!is.null(e1)) { - E1 <- list() - ones <- t(rep(1, n2)) - for (i in 1:n3) { - E1[[i]] <- e1[, i] %*% ones - } - - if (!is.null(e2)) { - E2 <- list() - ones <- t(rep(1, n1)) - for (i in 1:n3) { - E2[[i]] <- t(e2[, i] %*% ones) - } - } else { - E2 <- as.list(rep(0, n3)) - } - - # run through each covariate - - sumdiffs <- 0 - denom <- 1 - lower <- lower.tri(E1[[1]]) - for (i in 1:n3) { - err <- E1[[i]] + E2[[i]] - if (is.null(x2)) { - err <- err[lower] # save only lower portion for speed up - } - sumdiffs <- sumdiffs + dists[[i]] / (err + l[i]) - denom <- denom * (1 + err / l[i]) - } - # inverse kronecker delta - ikds <- as.numeric(sumdiffs > 0) - diag(ikds <- 1) - denom <- sqrt(denom) * ikds - K <- exp(-0.5 * sumdiffs) / denom - } else { - # without error matrices - sumdiffs <- 0 - for (i in 1:n3) { - sumdiffs <- sumdiffs + dists[[i]] / l[i] - } - K <- exp(-0.5 * sumdiffs) # to matrix? - } - - if (class(sumdiffs)[1] == "dist") { - K <- as.matrix(K) - diag(K) <- 1 - } - K -} - -# cov.SE.d1 -cov.SE.d1 <- function(x, e = NULL, l) { - # get gradients (matrices) of the kernel wrt. the parameters - # CURRENTLY IGNORES e!! - - # number of parameters - n <- length(l) - - # assign vector for gradients - grads <- list() - - # get full covariance matrix - K <- cov.SE(x1 = x, e1 = e, l = l) - - # loop through them - for (i in 1:n) { - - # squared distances - d2_i <- as.matrix(dist(x[, i])^2) - - # gradient for each parameter - grads[[i]] <- K * (1 / l[i]^2) * d2_i / 2 - } - # return as a list - return(grads) -} - -# d0 -d0 <- - function(z, y) { - if (length(y) != length(z)) y <- rep(y, length(z)) - pr <- y > 0 & y < 1 - npr <- !pr - ans <- vector("numeric", length(y)) - y[pr] <- stats::qnorm(y[pr]) - y[npr] <- 2 * y[npr] - 1 - ans[pr] <- stats::dnorm(y[pr], z[pr], log = TRUE) - ans[npr] <- stats::pnorm(y[npr] * z[npr], log.p = TRUE) - ans - } - -# d1 -d1 <- - function(z, y) { - pr <- y > 0 & y < 1 - npr <- !pr - ans <- vector("numeric", length(y)) - y[pr] <- stats::qnorm(y[pr]) - y[npr] <- 2 * y[npr] - 1 - ans[pr] <- y[pr] - z[pr] - ans[npr] <- y[npr] * stats::dnorm(z[npr]) / stats::pnorm(y[npr] * z[npr]) - ans - } - -# d2 -d2 <- - function(z, y) { - pr <- y > 0 & y < 1 - npr <- !pr - ans <- vector("numeric", length(y)) - y[npr] <- 2 * y[npr] - 1 - ans[pr] <- -1 - a <- stats::dnorm(z[npr])^2 / stats::pnorm(y[npr] * z[npr])^2 - b <- y[npr] * z[npr] * stats::dnorm(z[npr]) / stats::pnorm(y[npr] * z[npr]) - ans[npr] <- -a - b - ans - } - -# d3 -d3 <- function(z, y) { - pr <- y > 0 & y < 1 - npr <- !pr - ans <- vector("numeric", length(y)) - y[npr] <- 2 * y[npr] - 1 - ans[pr] <- 0 - n <- stats::dnorm(z[npr]) - p <- stats::pnorm(y[npr] * z[npr]) - f <- z[npr] - a <- n / p^3 - b <- f * (y[npr]^2 + 2) * n * p - c <- y[npr] * (f^2 - 1) * p^2 + 2 * y[npr] * n^2 - ans[npr] <- a * (b + c) - ans -} - - -# define the objective and gradient functions to optimise the hyperparameters -# of a GRaF model -objective <- function(theta, prior.pars, isfac, args, fun) { - - # unpack theta - l <- ifelse(isfac, 0.01, NA) - l[!isfac] <- exp(theta) - - # set the lengthscales - args$l <- l - - # run the model - m <- do.call(fun, args) - - # log likelihood and prior - llik <- -m$mnll - lpri <- theta.prior(theta, prior.pars)$density - - # log posterior - lpost <- llik + lpri - - # and objective - objective <- -lpost - - return(objective) -} - - -gradient <- function(theta, prior.pars, isfac, args, fun) { - - # unpack theta - l <- ifelse(isfac, 0.01, NA) - l[!isfac] <- exp(theta) - - # set the lengthscales - args$l <- l - - # run the model - m <- do.call(fun, args) - - # gradient of llik w.r.t. l - dLdl <- m$l_grads[!isfac] - - # gradient of l w.r.t. theta - dldtheta <- exp(theta) - - # gradient of lpri w.r.t. theta - dpdtheta <- theta.prior(theta, prior.pars)$gradient - - # gradient of llik w.r.t. theta - dLdtheta <- dLdl * dldtheta - - # gradient of lpost w.r.t. theta - dPdtheta <- dLdtheta + dpdtheta - - # gradient of objective w.r.t. lpost - dOdP <- -1 - - # gradient of objective w.r.t. theta - dOdtheta <- dOdP * dPdtheta - - return(dOdtheta) -} - -optimise.graf <- function(args) { - # pass all the arguments of a call to graf, memoize and - # optimise the model, and return afitted version - - # set opt.l to FALSE - args[["opt.l"]] <- FALSE - - # memoise graf - # mgraf <- memoise(graf) - mgraf <- graf - - # set up initial lengthscales - k <- ncol(args$x) - l <- rep(1, k) - - # find factors and drop them from theta - notfacs <- 1:k - - facs <- which(unlist(lapply(args$x, is.factor))) - - if (length(facs) > 0) { - notfacs <- notfacs[-facs] - l[facs] <- 0.01 - } - - # log them - theta <- log(l[notfacs]) - - # logical vector of factors - isfac <- 1:k %in% facs - - # optimisation arguments - if (args$method == "Laplace") { - meth <- "L-BFGS-B" - grad <- gradient - } else { - meth <- "BFGS" - grad <- NULL - } - - low <- -Inf - up <- Inf - - opt <- stats::optim(theta, - fn = objective, - gr = grad, - prior.pars = args$theta.prior.pars, - isfac = isfac, - args = args, - fun = mgraf, - hessian = args$hessian, - lower = low, - upper = up, - method = meth, - control = args$opt.control - ) - - # get the resultant lengthscales - l[notfacs] <- exp(opt$par) - - args$l <- l - - # fit the final model and return - m <- do.call(mgraf, args) - - # replace hessian with the hessian matrix or NULL - if (args$hessian) { - m$hessian <- opt$hessian - } else { - m$hessian <- NULL - } - - # un-memoize graf - # forget(mgraf) - - return(m) -} - - -# graf.fit.laplace -graf.fit.laplace <- - function(y, x, mn, l, wt, e = NULL, tol = 10^-6, itmax = 50, - verbose = FALSE) { - if (is.vector(x)) x <- as.matrix(x) - mn <- stats::qnorm(mn) - n <- length(y) - - # create the covariance matrix - K <- cov.SE(x1 = x, e1 = e, e2 = NULL, l = l) - - # an identity matrix for the calculations - eye <- diag(n) - - # initialise - a <- rep(0, n) - f <- mn - obj.old <- Inf - obj <- -sum(wt * d0(f, y)) - it <- 0 - - # start newton iterations - while (obj.old - obj > tol & it < itmax) { - it <- it + 1 - obj.old <- obj - W <- -(wt * d2(f, y)) - rW <- sqrt(W) - cf <- f - mn - mat1 <- rW %*% t(rW) * K + eye - L <- tryCatch(chol(mat1), - error = function(x) { - return(NULL) - } - ) - b <- W * cf + wt * d1(f, y) - mat2 <- rW * (K %*% b) - adiff <- b - rW * backsolve(L, forwardsolve(t(L), mat2)) - a - dim(adiff) <- NULL - - # find optimum step size using Brent's method - res <- stats::optimise(psiline, c(0, 2), adiff, a, as.matrix(K), y, d0, mn, wt) - a <- a + res$minimum * adiff - f <- K %*% a + mn - obj <- psi(a, f, mn, y, d0, wt) - } - - # recompute key components - cf <- f - mn - W <- -(wt * d2(f, y)) - rW <- sqrt(W) - mat1 <- rW %*% t(rW) * K + eye - L <- tryCatch(chol(mat1), - error = function(x) { - return(NULL) - } - ) - - # return marginal negative log-likelihood - mnll <- (a %*% cf)[1, 1] / 2 + sum(log(diag(L)) - (wt * d0(f, y))) - - # get partial gradients of the objective wrt l - - # gradient components - W12 <- matrix(rep(rW, n), n) - R <- W12 * backsolve(L, forwardsolve(t(L), diag(rW))) - C <- forwardsolve(t(L), (W12 * K)) - - # partial gradients of the kernel - dK <- cov.SE.d1(x, e, l) - - # rate of change of likelihood w.r.t. the mode - s2 <- (diag(K) - colSums(C^2)) / 2 * d3(f, y) - - # vector to store gradients - l_grads <- rep(NA, length(l)) - - for (i in 1:length(l)) { - grad <- sum(R * dK[[i]]) / 2 - grad <- grad - (t(a) %*% dK[[i]] %*% a) / 2 - b <- dK[[i]] %*% d1(f, y) - grad <- grad - t(s2) %*% (b - K %*% (R %*% b)) - l_grads[i] <- -grad - } - - if (verbose) cat(paste(" ", it, "Laplace iterations\n")) - if (it == itmax) print("timed out, don't trust the inference!") - return(list( - y = y, x = x, MAP = f, ls = l, a = a, W = W, L = L, K = K, - e = e, obsx = x, obsy = y, mnll = mnll, wt = wt, - l_grads = l_grads - )) - } - -# graf.fit.ep -graf.fit.ep <- - function(y, x, mn, l, wt, e = NULL, tol = 1e-6, itmax = 50, itmin = 2, - verbose = FALSE) { - # fit a GRaF model using expectation-propagation - # as implemented in the gpml matlab library - # If parallel = TRUE, the EP uses the parallel update - # as implemented in GPStuff - - # whether to use parallel EP (rather than sequential) - parallel <- FALSE - - if (is.vector(x)) { - x <- as.matrix(x) - } - - # mn to probability scale - mn <- stats::qnorm(mn) - n <- length(y) - # covariance matrix - K <- cov.SE(x1 = x, e1 = e, e2 = NULL, l = l) - # identity matrix - eye <- diag(n) - - # convert observations to +1, -1, save 0, 1 version - oldy <- y - y <- y * 2 - 1 - - # initialise - - # \tilde{\mathbf{\nu}} = \mathbf{0} - tnu <- rep(0, n) - # \tilde{\mathbf{\tau}} = \mathbf{0} - ttau <- rep(0, n) - # \mathbf{\mu} = \mathbf{0} - mu <- rep(0, n) - # \Sigma = \mathbf{K} (only used in sequential EP) - Sigma <- K - - # calculate marginal negative log likelihood at ttau = tnu = mu = 0s - z <- mu / sqrt(1 + diag(K)) - mnll <- -sum(stats::pnorm(y * z), log.p = TRUE) - - # ~~~~~~~~~~~~~~~~~~~~~ - # set up for EP algorithm - # set up damping factor (for parallel EP) as in GPStuff - df <- 0.8 - - # set old mnll to Inf & start iteration counter - mnll_old <- Inf - logM0_old <- logM0 <- rep(0, n) - it <- 1 - converged <- FALSE - - while (!converged) { - if (parallel) { - - # calculate key parameters - dSigma <- diag(Sigma) - tau <- 1 / dSigma - ttau - nu <- 1 / dSigma * mn - tnu - mu <- nu / tau - sigma2 <- 1 / tau - - # get marginal moments of posterior - lis <- ep.moments(y, sigma2, mu) - - # recalculate parameters from these moments - # \delta\tilde{\tau} (change in \tilde{\tau}) - dttau <- 1 / lis$sigma2hat - tau - ttau - - # \tilde{\tau} - ttau <- ttau + df * dttau - - # \delta\tilde{\nu} (change in \tilde{\nu}) - dtnu <- 1 / lis$sigma2hat * lis$muhat - nu - tnu - - # \tilde{\nu} - tnu <- tnu + df * dtnu - } else { - # otherwise use sequential EP - - lis <- list( - Sigma = Sigma, - ttau = ttau, - tnu = tnu, - mu = mu - ) - - # cycle through in random order - for (i in sample(1:n)) { - lis <- update.ep(i, y, mn, lis) - } # end permuted for loop - - Sigma <- lis$Sigma - ttau <- lis$ttau - tnu <- lis$tnu - mu <- lis$mu - - sigma2 <- diag(Sigma) - } # end parallel / sequential - - # recompute the approximate posterior parameters \Sigma and \mathbf{\mu} - # using eq. 3.53 and eq. 3.68 - - # sW = \tilde{S}^{\frac{1}{2}} = \sqrt{\mathbf{\tilde{\tau}}} - sW <- sqrt(ttau) - - # L = cholesky(I_n + \tilde{S}^{\frac{1}{2}} * K * \tilde{S}^{\frac{1}{2}}) - L <- chol(sW %*% t(sW) * K + eye) - - # V = L^T \\ \tilde{S}^{\frac{1}{2}} * K - sWmat <- matrix(rep(sW, n), n) # byrow = TRUE? - V <- backsolve(L, sWmat * K, transpose = TRUE) - - # \Sigma = \mathbf{K} - \mathbf{V}^T \mathbf{V} - Sigma <- K - t(V) %*% V - - # \mathbf{\mu} = \Sigma \tilde{\mathbf{\nu}} - mu <- Sigma %*% tnu # + mn - - # calculate new mnll and assess convergence - # compute logZ_{EP} using eq. 3.65, 3.73 and 3.74 and the existing L - # \mathbf{\sigma}^2 = diag(\Sigma) - sigma2 <- diag(Sigma) - tau_n <- 1 / sigma2 - ttau - nu_n <- mu / sigma2 - tnu + mn * tau_n - - z <- nu_n / tau_n / (sqrt(1 + 1 / tau_n)) - lZ <- stats::pnorm(y * z, log.p = TRUE) - - # split the final equation up into 5 bits... - mnll.a <- sum(log(diag(L))) - sum(lZ) - t(tnu) %*% Sigma %*% tnu / 2 - mnll.b <- t(nu_n - mn * tau_n) - mnll.c <- ((ttau / tau_n * (nu_n - mn * tau_n) - 2 * tnu) / (ttau + tau_n)) / 2 - mnll.d <- sum(tnu^2 / (tau_n + ttau)) / 2 - mnll.e <- sum(log(1 + ttau / tau_n)) / 2 - - mnll <- as.numeric(mnll.a - mnll.b %*% mnll.c + mnll.d - mnll.e) - - # improvement in negative log marginal likelihood - dmnll <- abs(mnll - mnll_old) - - # improvement in log of the 0th moment - dlogM0 <- max(abs(logM0 - logM0_old)) - - # both under tolerance? - sub_tol <- dmnll < tol & dlogM0 < tol - - if ((sub_tol & it >= itmin) | it >= itmax) { - # stop if there was little improvement and there have been at least - # itmin iterations or if there have been itmax or more - # iterations - converged <- TRUE - } else { - it <- it + 1 - mnll_old <- mnll - } - } # end while loop - - # throw an error if the iterations maxed out before convergence - if (it >= itmax) { - stop(paste0("maximum number of iterations (", itmax, ") reached - without convergence")) - } - - # calculate posterior parameters - sW <- sqrt(ttau) - alpha <- tnu - sW * backsolve(L, backsolve(L, sW * (K %*% tnu), transpose = TRUE)) - f <- crossprod(K, alpha) + mn - - # return relevant parameters - return(list( - y = oldy, - x = x, - MAP = f, - ls = l, - a = alpha, - W = ttau, - L = L, - K = K, - e = e, - obsx = x, - obsy = oldy, - mnll = mnll, - wt = wt, - l_grads = NULL - )) - } - -graf <- - function(y, x, error = NULL, weights = NULL, prior = NULL, l = NULL, opt.l = FALSE, - theta.prior.pars = c(log(10), 1), hessian = FALSE, opt.control = list(), - verbose = FALSE, method = c("Laplace", "EP")) { - method <- match.arg(method) - - # optionally optimise graf (by recursively calling this function) - if (opt.l) { - - # get all visible object as a list - args <- capture.all() - - # get the expected objects - expected_args <- names(formals(graf)) - - # remove any unexpected arguments - args <- args[names(args) %in% expected_args] - - # pass this to optimiser - fit <- optimise.graf(args) - - # skip out of this function and return - return(fit) - } - - - if (!is.data.frame(x)) stop("x must be a dataframe") - - # convert any ints to numerics - for (i in 1:ncol(x)) if (is.integer(x[, i])) x[, i] <- as.numeric(x[, i]) - - obsx <- x - k <- ncol(x) - n <- length(y) - - if (is.null(weights)) { - # if weights aren't provided - weights <- rep(1, n) - } else { - # if they are, run some checks - # throw an error if weights are specified with EP - if (method == "EP") { - stop("weights are not implemented for the EP algorithm (yet)") - } - # or if any are negative - if (any(weights < 0)) { - stop("weights must be positive or zero") - } - } - - # find factors and convert them to numerics - notfacs <- 1:k - facs <- which(unlist(lapply(x, is.factor))) - if (length(facs) > 0) notfacs <- notfacs[-facs] - for (fac in facs) { - x[, fac] <- as.numeric(x[, fac]) - } - x <- as.matrix(x) - - # scale the matrix, retaining scaling - scaling <- apply(as.matrix(x[, notfacs]), 2, function(x) c(mean(x), stats::sd(x))) - for (i in 1:length(notfacs)) { - x[, notfacs[i]] <- (x[, notfacs[i]] - scaling[1, i]) / scaling[2, i] - } - - # set up the default prior, if not specified - exp.prev <- sum(weights[y == 1]) / sum(weights) - if (is.null(prior)) { - mnfun <- function(x) rep(exp.prev, nrow(x)) - } else { - mnfun <- prior - } - - # give an approximation to l, if not specified (or optimised) - if (is.null(l)) { - l <- rep(0.01, k) - l[notfacs] <- apply(x[y == 1, notfacs, drop = FALSE], 2, sd) * 8 - l[l == 0] <- 1 - } - - # calculate mean (on unscaled data and probability scale) - mn <- mnfun(obsx) - - # fit model - if (method == "Laplace") { - # by Laplace approximation - fit <- graf.fit.laplace(y = y, x = as.matrix(x), mn = mn, l = l, wt = weights, e = error, verbose = verbose) - } else { - # or using the expectation-propagation algorithm - fit <- graf.fit.ep(y = y, x = as.matrix(x), mn = mn, l = l, wt = weights, e = error, verbose = FALSE) - } - - fit$mnfun <- mnfun - fit$obsx <- obsx - fit$facs <- facs - fit$hessian <- hessian - fit$scaling <- scaling - fit$peak <- obsx[which(fit$MAP == max(fit$MAP))[1], ] - class(fit) <- "graf" - fit - } - -capture.all <- function() { - # capture all visible objects in the parent environment and pass to a list - env <- parent.frame() - object_names <- objects(env) - objects <- lapply(object_names, - get, - envir = env - ) - names(objects) <- object_names - return(objects) -} - # pred pred <- function(predx, fit, mn, std = TRUE, maxn = 250, same = FALSE) {