Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

extend gpdfit to return marginal posterior of k #186 #188

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,5 @@ Suggests:
VignetteBuilder: knitr
Encoding: UTF-8
SystemRequirements: pandoc (>= 1.12.3), pandoc-citeproc
RoxygenNote: 7.1.1
RoxygenNote: 7.1.2
Roxygen: list(markdown = TRUE)
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ export(.ndraws)
export(.thin_draws)
export(E_loo)
export(compare)
export(dgpd)
export(elpd)
export(example_loglik_array)
export(example_loglik_matrix)
Expand Down Expand Up @@ -115,12 +116,15 @@ export(pareto_k_ids)
export(pareto_k_influence_values)
export(pareto_k_table)
export(pareto_k_values)
export(pgpd)
export(print_dims)
export(pseudobma_weights)
export(psis)
export(psis_n_eff_values)
export(psislw)
export(qgpd)
export(relative_eff)
export(rgpd)
export(sis)
export(stacking_weights)
export(tis)
Expand Down
102 changes: 102 additions & 0 deletions R/gdp.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#' The Generalized Pareto Distribution
#'
#' Density, distribution function, quantile function and random generation
#' for the generalized Pareto distribution with location equal to \code{mu},
#' scale equal to \code{sigma}, and shape equal to \code{k}.
#'
#' @param x,q vector of quantiles.
#' @param p vector of probabilities.
#' @param n number of observations. If \code{length(n) > 1}, the length is taken
#' to be the number required.
#' @param mu scalar location parameter
#' @param sigma scalar, positive scale parameter
#' @param k scalar shape parameter
#' @param log,log.p logical; if TRUE, probabilities p are given as log(p).
#' @param lower.tail logical; if TRUE (default), probabilities are \eqn{P[X \le x]}
#' otherwise, \eqn{P[X > x]}.
#'
#' @name GPD


#' @rdname GPD
#' @export
dgpd <- function(x, mu = 0, sigma = 1, k = 0, log = FALSE) {
stopifnot(length(mu) == 1 && length(sigma) == 1 && length(k) == 1)
if (is.na(sigma) || sigma <= 0) {
return(rep(NaN, length(x)))
}
d <- (x - mu) / sigma
ind <- (d > 0) & ((1 + k * d) > 0)
ind[is.na(ind)] <- FALSE
if (k == 0) {
d[ind] <- -d[ind] - log(sigma)
} else {
d[ind] <- log1p(k * d[ind]) * -(1 / k + 1) - log(sigma)
}
d[!ind] <- -Inf
if (!log) {
d <- exp(d)
}
d
}


#' @rdname GPD
#' @export
pgpd <- function(q, mu = 0, sigma = 1, k = 0, lower.tail = TRUE, log.p = FALSE) {
stopifnot(length(mu) == 1 && length(sigma) == 1 && length(k) == 1)
if (is.na(sigma) || sigma <= 0) {
return(rep(NaN, length(q)))
}
q <- pmax(q - mu, 0) / sigma
if (k == 0) {
p <- 1 - exp(-q)
} else {
p <- -expm1(log(pmax(1 + k * q, 0)) * -(1 / k))
}
if (!lower.tail) {
p <- 1 - p
}
if (log.p) {
p <- log(p)
}
p
}

#' @rdname GPD
#' @export
qgpd <- function(p, mu = 0, sigma = 1, k = 0, lower.tail = TRUE, log.p = FALSE) {
stopifnot(length(mu) == 1 && length(sigma) == 1 && length(k) == 1)
if (is.na(sigma) || sigma <= 0) {
return(rep(NaN, length(p)))
}
if (log.p) {
p <- exp(p)
}
if (!lower.tail) {
p <- 1 - p
}
if (k == 0) {
q <- mu - sigma * log1p(-p)
} else {
q <- mu + sigma * expm1(-k * log1p(-p)) / k
}
q
}

#' @rdname GPD
#' @export
rgpd <- function(n, mu = 0, sigma = 1, k = 0) {
stopifnot(
length(n) == 1 && length(mu) == 1 && length(sigma) == 1 && length(k) == 1
)
if (is.na(sigma) || sigma <= 0) {
return(rep(NaN, n))
}
if (k == 0) {
r <- mu + sigma * rexp(n)
} else {
r <- mu + sigma * expm1(-k * log(runif(n))) / k
}
r
}
73 changes: 22 additions & 51 deletions R/gpdfit.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#' algorithm is to sort the elements of `x`. If `x` is already
#' sorted in ascending order then `sort_x` can be set to `FALSE` to
#' skip the initial sorting step.
#' @return A named list with components `k` and `sigma`.
#' @return A named list with components `k_hat`, `sigma_hat`, `k`, `k_w`, and `k_d`
#'
#' @details Here the parameter \eqn{k} is the negative of \eqn{k} in Zhang &
#' Stephens (2009).
Expand All @@ -29,7 +29,7 @@
#' for the generalized Pareto distribution. *Technometrics* **51**, 316-325.
#'
gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) {
# See section 4 of Zhang and Stephens (2009)
# see section 4 of Zhang and Stephens (2009)
if (sort_x) {
x <- sort.int(x)
}
Expand All @@ -39,61 +39,32 @@ gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) {
jj <- seq_len(M)
xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample
theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar
l_theta <- N * lx(theta, x) # profile log-lik
k <- matrixStats::rowMeans2(log1p(-theta %o% x))
l_theta <- N * (log(-theta / k) - k - 1) # profile log-lik
w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize
theta_hat <- sum(theta * w_theta)
k <- mean.default(log1p(-theta_hat * x))
sigma <- -k / theta_hat
k_hat <- mean.default(log1p(-theta_hat * x))
sigma_hat <- -k_hat / theta_hat

if (wip) {
k <- adjust_k_wip(k, n = N)
}
# quadrature weights for k are same as for theta
k_w <- w_theta
# quadrature weights are just the normalized likelihoods
# we get the unnormalized posterior by multiplying these by the prior
k_d <- k_w * dgpd(-theta, mu = -1 / x[N], sigma = 1 / prior / xstar, k = 0.5)
# normalize using the trapezoidal rule
Z <- sum((k_d[-M] + k_d[-1]) * (k[-M] - k[-1])) / 2
k_d <- k_d / Z

if (is.nan(k)) {
k <- Inf
# adjust k_hat based on weakly informative prior, Gaussian centered on 0.5.
# this stabilizes estimates for very small Monte Carlo sample sizes and low neff
if (wip) {
k_hat <- (k_hat * N + 0.5 * 10) / (N + 10)
}

nlist(k, sigma)
}


# internal ----------------------------------------------------------------

lx <- function(a,x) {
a <- -a
k <- vapply(a, FUN = function(a_i) mean(log1p(a_i * x)), FUN.VALUE = numeric(1))
log(a / k) - k - 1
}

#' Adjust k based on weakly informative prior, Gaussian centered on 0.5. This
#' will stabilize estimates for very small Monte Carlo sample sizes and low neff
#' cases.
#'
#' @noRd
#' @param k Scalar khat estimate.
#' @param n Integer number of tail samples used to fit GPD.
#' @return Scalar adjusted khat estimate.
#'
adjust_k_wip <- function(k, n) {
a <- 10
n_plus_a <- n + a
k * n / n_plus_a + a * 0.5 / n_plus_a
}


#' Inverse CDF of generalized pareto distribution
#' (assuming location parameter is 0)
#'
#' @noRd
#' @param p Vector of probabilities.
#' @param k Scalar shape parameter.
#' @param sigma Scalar scale parameter.
#' @return Vector of quantiles.
#'
qgpd <- function(p, k, sigma) {
if (is.nan(sigma) || sigma <= 0) {
return(rep(NaN, length(p)))
if (is.na(k_hat)) {
k_hat <- Inf
sigma_hat <- NaN
}

sigma * expm1(-k * log1p(-p)) / k
nlist(k_hat, sigma_hat, k, k_w, k_d)
}
21 changes: 10 additions & 11 deletions R/psis.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,10 @@ psis.array <-
function(log_ratios, ...,
r_eff = NULL,
cores = getOption("mc.cores", 1)) {
importance_sampling.array(log_ratios = log_ratios, ...,
r_eff = r_eff,
cores = cores,
method = "psis")
importance_sampling.array(log_ratios = log_ratios, ...,
r_eff = r_eff,
cores = cores,
method = "psis")
}


Expand Down Expand Up @@ -245,14 +245,14 @@ psis_smooth_tail <- function(x, cutoff) {

# save time not sorting since x already sorted
fit <- gpdfit(exp(x) - exp_cutoff, sort_x = FALSE)
k <- fit$k
sigma <- fit$sigma
k <- fit$k_hat
sigma <- fit$sigma_hat
if (is.finite(k)) {
p <- (seq_len(len) - 0.5) / len
qq <- qgpd(p, k, sigma) + exp_cutoff
tail <- log(qq)
p <- (seq_len(len) - 0.5) / len
qq <- qgpd(p, sigma = sigma, k = k) + exp_cutoff
tail <- log(qq)
} else {
tail <- x
tail <- x
}
list(tail = tail, k = k)
}
Expand Down Expand Up @@ -385,4 +385,3 @@ throw_psis_r_eff_warning <- function() {
call. = FALSE
)
}

6 changes: 3 additions & 3 deletions R/psislw.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@ psislw <- function(lw, wcp = 0.2, wtrunc = 3/4,
tail_ord <- order(x_tail)
exp_cutoff <- exp(cutoff)
fit <- gpdfit(exp(x_tail) - exp_cutoff, wip=FALSE, min_grid_pts = 80)
k <- fit$k
sigma <- fit$sigma
k <- fit$k_hat
sigma <- fit$sigma_hat
prb <- (seq_len(tail_len) - 0.5) / tail_len
qq <- qgpd(prb, k, sigma) + exp_cutoff
qq <- qgpd(prb, sigma = sigma, k = k) + exp_cutoff
smoothed_tail <- rep.int(0, tail_len)
smoothed_tail[tail_ord] <- log(qq)
x_new <- x
Expand Down
42 changes: 42 additions & 0 deletions man/GPD.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/gpdfit.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file modified tests/testthat/reference-results/gpdfit.rds
Binary file not shown.
Binary file modified tests/testthat/reference-results/gpdfit_default_grid.rds
Binary file not shown.
Binary file modified tests/testthat/reference-results/gpdfit_old.rds
Binary file not shown.
4 changes: 2 additions & 2 deletions tests/testthat/test_gpdfit.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ test_that("gpdfit returns correct result", {

test_that("qgpd returns the correct result ", {
probs <- seq(from = 0, to = 1, by = 0.25)
q1 <- qgpd(probs, k = 1, sigma = 1)
q1 <- qgpd(probs, k = 1)
expect_equal(q1, c(0, 1/3, 1, 3, Inf))

q2 <- qgpd(probs, k = 1, sigma = 0)
q2 <- qgpd(probs, sigma = 0, k = 1)
expect_true(all(is.nan(q2)))
})