diff --git a/index.html b/index.html index 3ac2596..6a5afc3 100644 --- a/index.html +++ b/index.html @@ -1,3 +1,2 @@ - diff --git a/previews/PR100/.documenter-siteinfo.json b/previews/PR100/.documenter-siteinfo.json index 01ba3f8..eb03655 100644 --- a/previews/PR100/.documenter-siteinfo.json +++ b/previews/PR100/.documenter-siteinfo.json @@ -1 +1 @@ -{"documenter":{"julia_version":"1.11.0","generation_timestamp":"2024-10-12T00:31:18","documenter_version":"1.7.0"}} \ No newline at end of file +{"documenter":{"julia_version":"1.11.1","generation_timestamp":"2024-11-29T18:49:19","documenter_version":"1.8.0"}} \ No newline at end of file diff --git a/previews/PR100/assets/documenter.js b/previews/PR100/assets/documenter.js index 82252a1..7d68cd8 100644 --- a/previews/PR100/assets/documenter.js +++ b/previews/PR100/assets/documenter.js @@ -612,176 +612,194 @@ function worker_function(documenterSearchIndex, documenterBaseURL, filters) { }; } -// `worker = Threads.@spawn worker_function(documenterSearchIndex)`, but in JavaScript! -const filters = [ - ...new Set(documenterSearchIndex["docs"].map((x) => x.category)), -]; -const worker_str = - "(" + - worker_function.toString() + - ")(" + - JSON.stringify(documenterSearchIndex["docs"]) + - "," + - JSON.stringify(documenterBaseURL) + - "," + - JSON.stringify(filters) + - ")"; -const worker_blob = new Blob([worker_str], { type: "text/javascript" }); -const worker = new Worker(URL.createObjectURL(worker_blob)); - /////// SEARCH MAIN /////// -// Whether the worker is currently handling a search. This is a boolean -// as the worker only ever handles 1 or 0 searches at a time. -var worker_is_running = false; - -// The last search text that was sent to the worker. This is used to determine -// if the worker should be launched again when it reports back results. -var last_search_text = ""; - -// The results of the last search. This, in combination with the state of the filters -// in the DOM, is used compute the results to display on calls to update_search. -var unfiltered_results = []; - -// Which filter is currently selected -var selected_filter = ""; - -$(document).on("input", ".documenter-search-input", function (event) { - if (!worker_is_running) { - launch_search(); - } -}); - -function launch_search() { - worker_is_running = true; - last_search_text = $(".documenter-search-input").val(); - worker.postMessage(last_search_text); -} - -worker.onmessage = function (e) { - if (last_search_text !== $(".documenter-search-input").val()) { - launch_search(); - } else { - worker_is_running = false; - } - - unfiltered_results = e.data; - update_search(); -}; +function runSearchMainCode() { + // `worker = Threads.@spawn worker_function(documenterSearchIndex)`, but in JavaScript! + const filters = [ + ...new Set(documenterSearchIndex["docs"].map((x) => x.category)), + ]; + const worker_str = + "(" + + worker_function.toString() + + ")(" + + JSON.stringify(documenterSearchIndex["docs"]) + + "," + + JSON.stringify(documenterBaseURL) + + "," + + JSON.stringify(filters) + + ")"; + const worker_blob = new Blob([worker_str], { type: "text/javascript" }); + const worker = new Worker(URL.createObjectURL(worker_blob)); + + // Whether the worker is currently handling a search. This is a boolean + // as the worker only ever handles 1 or 0 searches at a time. + var worker_is_running = false; + + // The last search text that was sent to the worker. This is used to determine + // if the worker should be launched again when it reports back results. + var last_search_text = ""; + + // The results of the last search. This, in combination with the state of the filters + // in the DOM, is used compute the results to display on calls to update_search. + var unfiltered_results = []; + + // Which filter is currently selected + var selected_filter = ""; + + $(document).on("input", ".documenter-search-input", function (event) { + if (!worker_is_running) { + launch_search(); + } + }); -$(document).on("click", ".search-filter", function () { - if ($(this).hasClass("search-filter-selected")) { - selected_filter = ""; - } else { - selected_filter = $(this).text().toLowerCase(); + function launch_search() { + worker_is_running = true; + last_search_text = $(".documenter-search-input").val(); + worker.postMessage(last_search_text); } - // This updates search results and toggles classes for UI: - update_search(); -}); + worker.onmessage = function (e) { + if (last_search_text !== $(".documenter-search-input").val()) { + launch_search(); + } else { + worker_is_running = false; + } -/** - * Make/Update the search component - */ -function update_search() { - let querystring = $(".documenter-search-input").val(); + unfiltered_results = e.data; + update_search(); + }; - if (querystring.trim()) { - if (selected_filter == "") { - results = unfiltered_results; + $(document).on("click", ".search-filter", function () { + if ($(this).hasClass("search-filter-selected")) { + selected_filter = ""; } else { - results = unfiltered_results.filter((result) => { - return selected_filter == result.category.toLowerCase(); - }); + selected_filter = $(this).text().toLowerCase(); } - let search_result_container = ``; - let modal_filters = make_modal_body_filters(); - let search_divider = `
`; + // This updates search results and toggles classes for UI: + update_search(); + }); - if (results.length) { - let links = []; - let count = 0; - let search_results = ""; - - for (var i = 0, n = results.length; i < n && count < 200; ++i) { - let result = results[i]; - if (result.location && !links.includes(result.location)) { - search_results += result.div; - count++; - links.push(result.location); - } - } + /** + * Make/Update the search component + */ + function update_search() { + let querystring = $(".documenter-search-input").val(); - if (count == 1) { - count_str = "1 result"; - } else if (count == 200) { - count_str = "200+ results"; + if (querystring.trim()) { + if (selected_filter == "") { + results = unfiltered_results; } else { - count_str = count + " results"; + results = unfiltered_results.filter((result) => { + return selected_filter == result.category.toLowerCase(); + }); } - let result_count = `
${count_str}
`; - search_result_container = ` + let search_result_container = ``; + let modal_filters = make_modal_body_filters(); + let search_divider = `
`; + + if (results.length) { + let links = []; + let count = 0; + let search_results = ""; + + for (var i = 0, n = results.length; i < n && count < 200; ++i) { + let result = results[i]; + if (result.location && !links.includes(result.location)) { + search_results += result.div; + count++; + links.push(result.location); + } + } + + if (count == 1) { + count_str = "1 result"; + } else if (count == 200) { + count_str = "200+ results"; + } else { + count_str = count + " results"; + } + let result_count = `
${count_str}
`; + + search_result_container = ` +
+ ${modal_filters} + ${search_divider} + ${result_count} +
+ ${search_results} +
+
+ `; + } else { + search_result_container = `
${modal_filters} ${search_divider} - ${result_count} -
- ${search_results} -
-
+
0 result(s)
+ +
No result found!
`; - } else { - search_result_container = ` -
- ${modal_filters} - ${search_divider} -
0 result(s)
-
-
No result found!
- `; - } + } - if ($(".search-modal-card-body").hasClass("is-justify-content-center")) { - $(".search-modal-card-body").removeClass("is-justify-content-center"); - } + if ($(".search-modal-card-body").hasClass("is-justify-content-center")) { + $(".search-modal-card-body").removeClass("is-justify-content-center"); + } - $(".search-modal-card-body").html(search_result_container); - } else { - if (!$(".search-modal-card-body").hasClass("is-justify-content-center")) { - $(".search-modal-card-body").addClass("is-justify-content-center"); + $(".search-modal-card-body").html(search_result_container); + } else { + if (!$(".search-modal-card-body").hasClass("is-justify-content-center")) { + $(".search-modal-card-body").addClass("is-justify-content-center"); + } + + $(".search-modal-card-body").html(` +
Type something to get started!
+ `); } + } - $(".search-modal-card-body").html(` -
Type something to get started!
- `); + /** + * Make the modal filter html + * + * @returns string + */ + function make_modal_body_filters() { + let str = filters + .map((val) => { + if (selected_filter == val.toLowerCase()) { + return `${val}`; + } else { + return `${val}`; + } + }) + .join(""); + + return ` +
+ Filters: + ${str} +
`; } } -/** - * Make the modal filter html - * - * @returns string - */ -function make_modal_body_filters() { - let str = filters - .map((val) => { - if (selected_filter == val.toLowerCase()) { - return `${val}`; - } else { - return `${val}`; - } - }) - .join(""); - - return ` -
- Filters: - ${str} -
`; +function waitUntilSearchIndexAvailable() { + // It is possible that the documenter.js script runs before the page + // has finished loading and documenterSearchIndex gets defined. + // So we need to wait until the search index actually loads before setting + // up all the search-related stuff. + if (typeof documenterSearchIndex !== "undefined") { + runSearchMainCode(); + } else { + console.warn("Search Index not available, waiting"); + setTimeout(waitUntilSearchIndexAvailable, 1000); + } } +// The actual entry point to the search code +waitUntilSearchIndexAvailable(); + }) //////////////////////////////////////////////////////////////////////////////// require(['jquery'], function($) { diff --git a/previews/PR100/index.html b/previews/PR100/index.html index a7592ad..d7a20d4 100644 --- a/previews/PR100/index.html +++ b/previews/PR100/index.html @@ -1,494 +1,34 @@ -Home · ParetoSmooth.jl - - - - - - -

ParetoSmooth

Documentation for ParetoSmooth.

ParetoSmooth.ModelComparisonType
ModelComparison

A struct containing the results of model comparison.

Fields

  • pointwise::KeyedArray: A KeyedArray of pointwise estimates. See [PsisLoo]@ref.
    • estimates::KeyedArray: A table containing the results of model comparison, with the following columns –
      • cv_elpd: The difference in total leave-one-out cross validation scores between models.
      • cv_avg: The difference in average LOO-CV scores between models.
      • weight: A set of Akaike-like weights assigned to each model, which can be used in pseudo-Bayesian model averaging.
    • std_err::NamedTuple: A named tuple containing the standard error of cv_elpd. Note that these estimators (incorrectly) assume all folds are independent, despite their substantial overlap, which creates a downward biased estimator. LOO-CV differences are not asymptotically normal, so these standard errors cannot be used to calculate a confidence interval.
    • gmpd::NamedTuple: The geometric mean of the predictive distribution. It equals the geometric mean of the probability assigned to each data point by the model, that is, exp(cv_avg). This measure is only meaningful for classifiers (variables with discrete outcomes). We can think of it as measuring how often the model was right: A model that always predicts incorrectly will have a GMPD of 0, while a model that always predicts correctly will have a GMPD of 1. However, the GMPD gives a model "Partial points" between 0 and 1 whenever the model assigns a probability other than 0 or 1 to the outcome that actually happened.

See also: PsisLoo

source
ParetoSmooth.PsisType
Psis{R<:Real, AT<:AbstractArray{R, 3}, VT<:AbstractVector{R}}

A struct containing the results of Pareto-smoothed importance sampling.

Fields

  • weights: A vector of smoothed, truncated, and normalized importance sampling weights.
  • pareto_k: Estimates of the shape parameter k of the generalized Pareto distribution.
  • ess: Estimated effective sample size for each LOO evaluation, based on the variance of the weights.
  • sup_ess: Estimated effective sample size for each LOO evaluation, based on the supremum norm, i.e. the size of the largest weight. More likely than ess to warn when importance sampling has failed. However, it can have a high variance.
  • r_eff: The relative efficiency of the MCMC chain, i.e. ESS / posterior sample size.
  • tail_len: Vector indicating how large the "tail" is for each observation.
  • posterior_sample_size: How many draws from an MCMC chain were used for PSIS.
  • data_size: How many data points were used for PSIS.
source
ParetoSmooth.PsisLooType
PsisLoo <: AbstractCV

A struct containing the results of leave-one-out cross validation computed with Pareto smoothed importance sampling.

Fields

  • estimates::KeyedArray: A KeyedArray with columns :total, :se_total, :mean, :se_mean, and rows :cv_elpd, :naive_lpd, :p_eff. See # Extended help for more.
    • :cv_elpd contains estimates for the out-of-sample prediction error, as estimated using leave-one-out cross validation.
    • :naive_lpd contains estimates of the in-sample prediction error.
    • :p_eff is the effective number of parameters – a model with a p_eff of 2 is "about as overfit" as a model with 2 parameters and no regularization.
  • pointwise::KeyedArray: A KeyedArray of pointwise estimates with 5 columns –
    • :cv_elpd contains the estimated out-of-sample error for this point, as measured
    using leave-one-out cross validation.
    • :naive_lpd contains the in-sample estimate of error for this point.
    • :p_eff is the difference in the two previous estimates.
    • :ess is the L2 effective sample size, which estimates the simulation error caused by using Monte Carlo estimates. It does not measure model performance.
    • :inf_ess is the supremum-based effective sample size, which estimates the simulation error caused by using Monte Carlo estimates. It is more robust than :ess and should therefore be preferred. It does not measure model performance.
    • :pareto_k is the estimated value for the parameter ξ of the generalized Pareto distribution. Values above .7 indicate that PSIS has failed to approximate the true distribution.
  • psis_object::Psis: A Psis object containing the results of Pareto-smoothed importance sampling.
  • gmpd: The geometric mean of the predictive density. It is defined as the geometric mean of the probability assigned to each data point by the model, i.e. exp(cv_avg). This measure is only interpretable for classifiers (variables with discrete outcomes). We can think of it as measuring how often the model was right: A model that always predicts incorrectly will have a GMPD of 0, while a model that always predicts correctly will have a GMPD of 1. However, the GMPD gives a model "Partial points" between 0 and 1 whenever the model assigns a probability other than 0 or 1 to the outcome that actually happened, making it a fully Bayesian measure of model quality.
  • mcse: A float containing the estimated Monte Carlo standard error for the total cross-validation estimate.

Extended help

The total score depends on the sample size, and summarizes the weight of evidence for or against a model. Total scores are on an interval scale, meaning that only differences of scores are meaningful. It is not possible to interpret a total score by looking at it. The total score is not a goodness-of-fit statistic (for this, see the average score).

The average score is the total score, divided by the sample size. It estimates the expected log score, i.e. the expectation of the log probability density of observing the next point. The average score is a relative goodness-of-fit statistic which does not depend on sample size.

Unlike for chi-square goodness of fit tests, models do not have to be nested for model comparison using cross-validation methods.

See also: [loo]@ref, [bayes_cv]@ref, [psis_loo]@ref, [Psis]@ref

source
ParetoSmooth.looMethod
function loo(args...; kwargs...) -> PsisLoo

Compute an approximate leave-one-out cross-validation score.

Currently, this function only serves to call psis_loo, but this could change in the future. The default methods or return type may change without warning, so we recommend using psis_loo instead if reproducibility is required.

See also: psis_loo, PsisLoo.

source
ParetoSmooth.loo_compareMethod
function loo_compare(
+Home · ParetoSmooth.jl

ParetoSmooth

Documentation for ParetoSmooth.

ParetoSmooth.ModelComparisonType
ModelComparison

A struct containing the results of model comparison.

Fields

  • pointwise::KeyedArray: A KeyedArray of pointwise estimates. See [PsisLoo]@ref.
    • estimates::KeyedArray: A table containing the results of model comparison, with the following columns –
      • cv_elpd: The difference in total leave-one-out cross validation scores between models.
      • cv_avg: The difference in average LOO-CV scores between models.
      • weight: A set of Akaike-like weights assigned to each model, which can be used in pseudo-Bayesian model averaging.
    • std_err::NamedTuple: A named tuple containing the standard error of cv_elpd. Note that these estimators (incorrectly) assume all folds are independent, despite their substantial overlap, which creates a downward biased estimator. LOO-CV differences are not asymptotically normal, so these standard errors cannot be used to calculate a confidence interval.
    • gmpd::NamedTuple: The geometric mean of the predictive distribution. It equals the geometric mean of the probability assigned to each data point by the model, that is, exp(cv_avg). This measure is only meaningful for classifiers (variables with discrete outcomes). We can think of it as measuring how often the model was right: A model that always predicts incorrectly will have a GMPD of 0, while a model that always predicts correctly will have a GMPD of 1. However, the GMPD gives a model "Partial points" between 0 and 1 whenever the model assigns a probability other than 0 or 1 to the outcome that actually happened.

See also: PsisLoo

source
ParetoSmooth.PsisType
Psis{R<:Real, AT<:AbstractArray{R, 3}, VT<:AbstractVector{R}}

A struct containing the results of Pareto-smoothed importance sampling.

Fields

  • weights: A vector of smoothed, truncated, and normalized importance sampling weights.
  • pareto_k: Estimates of the shape parameter k of the generalized Pareto distribution.
  • ess: Estimated effective sample size for each LOO evaluation, based on the variance of the weights.
  • sup_ess: Estimated effective sample size for each LOO evaluation, based on the supremum norm, i.e. the size of the largest weight. More likely than ess to warn when importance sampling has failed. However, it can have a high variance.
  • r_eff: The relative efficiency of the MCMC chain, i.e. ESS / posterior sample size.
  • tail_len: Vector indicating how large the "tail" is for each observation.
  • posterior_sample_size: How many draws from an MCMC chain were used for PSIS.
  • data_size: How many data points were used for PSIS.
source
ParetoSmooth.PsisLooType
PsisLoo <: AbstractCV

A struct containing the results of leave-one-out cross validation computed with Pareto smoothed importance sampling.

Fields

  • estimates::KeyedArray: A KeyedArray with columns :total, :se_total, :mean, :se_mean, and rows :cv_elpd, :naive_lpd, :p_eff. See # Extended help for more.
    • :cv_elpd contains estimates for the out-of-sample prediction error, as estimated using leave-one-out cross validation.
    • :naive_lpd contains estimates of the in-sample prediction error.
    • :p_eff is the effective number of parameters – a model with a p_eff of 2 is "about as overfit" as a model with 2 parameters and no regularization.
  • pointwise::KeyedArray: A KeyedArray of pointwise estimates with 5 columns –
    • :cv_elpd contains the estimated out-of-sample error for this point, as measured
    using leave-one-out cross validation.
    • :naive_lpd contains the in-sample estimate of error for this point.
    • :p_eff is the difference in the two previous estimates.
    • :ess is the L2 effective sample size, which estimates the simulation error caused by using Monte Carlo estimates. It does not measure model performance.
    • :inf_ess is the supremum-based effective sample size, which estimates the simulation error caused by using Monte Carlo estimates. It is more robust than :ess and should therefore be preferred. It does not measure model performance.
    • :pareto_k is the estimated value for the parameter ξ of the generalized Pareto distribution. Values above .7 indicate that PSIS has failed to approximate the true distribution.
  • psis_object::Psis: A Psis object containing the results of Pareto-smoothed importance sampling.
  • gmpd: The geometric mean of the predictive density. It is defined as the geometric mean of the probability assigned to each data point by the model, i.e. exp(cv_avg). This measure is only interpretable for classifiers (variables with discrete outcomes). We can think of it as measuring how often the model was right: A model that always predicts incorrectly will have a GMPD of 0, while a model that always predicts correctly will have a GMPD of 1. However, the GMPD gives a model "Partial points" between 0 and 1 whenever the model assigns a probability other than 0 or 1 to the outcome that actually happened, making it a fully Bayesian measure of model quality.
  • mcse: A float containing the estimated Monte Carlo standard error for the total cross-validation estimate.

Extended help

The total score depends on the sample size, and summarizes the weight of evidence for or against a model. Total scores are on an interval scale, meaning that only differences of scores are meaningful. It is not possible to interpret a total score by looking at it. The total score is not a goodness-of-fit statistic (for this, see the average score).

The average score is the total score, divided by the sample size. It estimates the expected log score, i.e. the expectation of the log probability density of observing the next point. The average score is a relative goodness-of-fit statistic which does not depend on sample size.

Unlike for chi-square goodness of fit tests, models do not have to be nested for model comparison using cross-validation methods.

See also: [loo]@ref, [bayes_cv]@ref, [psis_loo]@ref, [Psis]@ref

source
ParetoSmooth.looMethod
function loo(args...; kwargs...) -> PsisLoo

Compute an approximate leave-one-out cross-validation score.

Currently, this function only serves to call psis_loo, but this could change in the future. The default methods or return type may change without warning, so we recommend using psis_loo instead if reproducibility is required.

See also: psis_loo, PsisLoo.

source
ParetoSmooth.loo_compareMethod
function loo_compare(
     cv_results...;
     sort_models::Bool=true,
     best_to_worst::Bool=true,
     [, model_names::Tuple{Symbol}]
-) -> ModelComparison

Construct a model comparison table from several PsisLoo objects.

Arguments

  • cv_results: One or more PsisLoo objects to be compared. Alternatively, a tuple or named tuple of PsisLoo objects can be passed. If a named tuple is passed, these names will be used to label each model.
  • model_names: A vector or tuple of strings or symbols used to identify models. If none, models are numbered using the order of the arguments.
  • sort_models: Sort models by total score.
  • high_to_low: Sort models from best to worst score. If false, reverse the order.

See also: ModelComparison, PsisLoo, psis_loo

source
ParetoSmooth.loo_from_psisMethod
loo_from_psis(
+) -> ModelComparison

Construct a model comparison table from several PsisLoo objects.

Arguments

  • cv_results: One or more PsisLoo objects to be compared. Alternatively, a tuple or named tuple of PsisLoo objects can be passed. If a named tuple is passed, these names will be used to label each model.
  • model_names: A vector or tuple of strings or symbols used to identify models. If none, models are numbered using the order of the arguments.
  • sort_models: Sort models by total score.
  • high_to_low: Sort models from best to worst score. If false, reverse the order.

See also: ModelComparison, PsisLoo, psis_loo

source
ParetoSmooth.loo_from_psisMethod
loo_from_psis(
     log_likelihood::AbstractArray{<:Real}, psis_object::Psis; 
     chain_index::Vector{<:Integer}
-)

Use a precalculated Psis object to estimate the leave-one-out cross validation score.

Arguments

  • log_likelihood::Array: A matrix or 3d array of log-likelihood values indexed as

[data, step, chain]. The chain argument can be left off if chain_index is provided or if all posterior samples were drawn from a single chain.

  • psis_object: A precomputed Psis object used to estimate the LOO-CV score.
  • chain_index::Vector{Int}: An optional vector of integers specifying which chain each step

belongs to. For instance, chain_index[step] should return 2 if log_likelihood[:, step] belongs to the second chain.

See also: psis, loo, PsisLoo.

source
ParetoSmooth.pointwise_log_likelihoodsMethod
pointwise_log_likelihoods(
+)

Use a precalculated Psis object to estimate the leave-one-out cross validation score.

Arguments

  • log_likelihood::Array: A matrix or 3d array of log-likelihood values indexed as

[data, step, chain]. The chain argument can be left off if chain_index is provided or if all posterior samples were drawn from a single chain.

  • psis_object: A precomputed Psis object used to estimate the LOO-CV score.
  • chain_index::Vector{Int}: An optional vector of integers specifying which chain each step

belongs to. For instance, chain_index[step] should return 2 if log_likelihood[:, step] belongs to the second chain.

See also: psis, loo, PsisLoo.

source
ParetoSmooth.pointwise_log_likelihoodsMethod
pointwise_log_likelihoods(
     ll_fun::Function, samples::AbstractArray{<:Real,3}, data;
     splat::Bool=true[, chain_index::Vector{<:Integer}]
-)

Compute the pointwise log likelihoods.

Arguments

  • ll_fun::Function: A function taking a single data point and returning the log-likelihood

of that point. This function must take the form f(θ[1], ..., θ[n], data), where θ is the parameter vector. See also the splat keyword argument.

  • samples::AbstractArray: A three dimensional array of MCMC samples. Here, the first dimension should indicate the step of the MCMC algorithm; the second dimension should indicate the parameter; and the third should indicate the chain.

  • data: An array of data points used to estimate the parameters of the model.

  • splat: If true (default), f must be a function of n different parameters. Otherwise, f is assumed to be a function of a single parameter vector.

  • chain_index::Vector{Int}: An optional vector of integers specifying which chain each step

belongs to. For instance, chain_index[step] should return 2 if log_likelihood[:, step] belongs to the second chain.

Returns

  • Array: A three dimensional array of pointwise log-likelihoods.
source
ParetoSmooth.psis!Method
psis!(
+)

Compute the pointwise log likelihoods.

Arguments

  • ll_fun::Function: A function taking a single data point and returning the log-likelihood

of that point. This function must take the form f(θ[1], ..., θ[n], data), where θ is the parameter vector. See also the splat keyword argument.

  • samples::AbstractArray: A three dimensional array of MCMC samples. Here, the first dimension should indicate the step of the MCMC algorithm; the second dimension should indicate the parameter; and the third should indicate the chain.

  • data: An array of data points used to estimate the parameters of the model.

  • splat: If true (default), f must be a function of n different parameters. Otherwise, f is assumed to be a function of a single parameter vector.

  • chain_index::Vector{Int}: An optional vector of integers specifying which chain each step

belongs to. For instance, chain_index[step] should return 2 if log_likelihood[:, step] belongs to the second chain.

Returns

  • Array: A three dimensional array of pointwise log-likelihoods.
source
ParetoSmooth.psis!Method
psis!(
     is_ratios::AbstractVector{<:Real}; 
     tail_length::Integer, log_weights::Bool=true
-) -> Real

Do PSIS on a single vector, smoothing its tail values in place before returning the estimated shape constant for the pareto_k distribution. This does not normalize the log-weights.

Arguments

  • is_ratios::AbstractVector{<:Real}: A vector of importance sampling ratios, scaled to have a maximum of 1.
  • r_eff::AbstractVector{<:Real}: The relative effective sample size, used to calculate the effective sample size. See [rel_eff]@ref for more information.
  • log_weights::Bool: A boolean indicating whether the input vector is a vector of log ratios, rather than raw importance sampling ratios.

Returns

  • Real: ξ, the shape parameter for the GPD. Bigger numbers indicate thicker tails.

Notes

Unlike the methods for arrays, psis! performs no checks to make sure the input values are valid.

source
ParetoSmooth.psisMethod
psis(
+) -> Real

Do PSIS on a single vector, smoothing its tail values in place before returning the estimated shape constant for the pareto_k distribution. This does not normalize the log-weights.

Arguments

  • is_ratios::AbstractVector{<:Real}: A vector of importance sampling ratios, scaled to have a maximum of 1.
  • r_eff::AbstractVector{<:Real}: The relative effective sample size, used to calculate the effective sample size. See [rel_eff]@ref for more information.
  • log_weights::Bool: A boolean indicating whether the input vector is a vector of log ratios, rather than raw importance sampling ratios.

Returns

  • Real: ξ, the shape parameter for the GPD. Bigger numbers indicate thicker tails.

Notes

Unlike the methods for arrays, psis! performs no checks to make sure the input values are valid.

source
ParetoSmooth.psisMethod
psis(
     log_ratios::AbstractArray{T<:Real}, 
     r_eff::AbstractVector{T}; 
     source::String="mcmc"    
-) -> Psis

Implements Pareto-smoothed importance sampling (PSIS).

Arguments

Positional Arguments

  • log_ratios::AbstractArray: A 2d or 3d array of (unnormalized) importance ratios on the log scale. Indices must be ordered as [data, step, chain]. The chain index can be left off if there is only one chain, or if keyword argument chain_index is provided.
  • r_eff::AbstractVector: An (optional) vector of relative effective sample sizes used in ESS

calculations. If left empty, calculated automatically using the FFTESS method from InferenceDiagnostics.jl. See relative_eff to calculate these values.

Keyword Arguments

  • chain_index::Vector{Int}: An optional vector of integers specifying which chain each step

belongs to. For instance, chain_index[step] should return 2 if log_likelihood[:, step] belongs to the second chain.

  • source::String="mcmc": A string or symbol describing the source of the sample being used. If "mcmc", adjusts ESS for autocorrelation. Otherwise, samples are assumed to be independent. Currently permitted values are ["mcmc", "vi", "other"].
  • calc_ess::Bool=true: If false, do not calculate ESS diagnostics. Attempting to access ESS diagnostics will return an empty array.
  • checks::Bool=true: If true, check inputs for possible errors. Disabling will improve performance slightly.

See also: [relative_eff]@ref, [psis_loo]@ref, [psis_ess]@ref.

source
ParetoSmooth.psis_essMethod
function psis_ess(
+) -> Psis

Implements Pareto-smoothed importance sampling (PSIS).

Arguments

Positional Arguments

  • log_ratios::AbstractArray: A 2d or 3d array of (unnormalized) importance ratios on the log scale. Indices must be ordered as [data, step, chain]. The chain index can be left off if there is only one chain, or if keyword argument chain_index is provided.
  • r_eff::AbstractVector: An (optional) vector of relative effective sample sizes used in ESS

calculations. If left empty, calculated automatically using the FFTESS method from InferenceDiagnostics.jl. See relative_eff to calculate these values.

Keyword Arguments

  • chain_index::Vector{Int}: An optional vector of integers specifying which chain each step

belongs to. For instance, chain_index[step] should return 2 if log_likelihood[:, step] belongs to the second chain.

  • source::String="mcmc": A string or symbol describing the source of the sample being used. If "mcmc", adjusts ESS for autocorrelation. Otherwise, samples are assumed to be independent. Currently permitted values are ["mcmc", "vi", "other"].
  • calc_ess::Bool=true: If false, do not calculate ESS diagnostics. Attempting to access ESS diagnostics will return an empty array.
  • checks::Bool=true: If true, check inputs for possible errors. Disabling will improve performance slightly.

See also: [relative_eff]@ref, [psis_loo]@ref, [psis_ess]@ref.

source
ParetoSmooth.psis_essMethod
function psis_ess(
     weights::AbstractVector{T<:Real},
     r_eff::AbstractVector{T}
-) -> AbstractVector{T}

Calculate the (approximate) effective sample size of a PSIS sample, using the correction in Vehtari et al. 2019. This uses the entropy-based definition of ESS, measuring the K-L divergence of the proposal and target distributions.

Arguments

  • weights: A set of normalized importance sampling weights derived from PSIS.
  • r_eff: The relative efficiency of the MCMC chains from which PSIS samples were derived.

See ?relative_eff to calculate r_eff.

source
ParetoSmooth.psis_looMethod
function psis_loo(
+) -> AbstractVector{T}

Calculate the (approximate) effective sample size of a PSIS sample, using the correction in Vehtari et al. 2019. This uses the entropy-based definition of ESS, measuring the K-L divergence of the proposal and target distributions.

Arguments

  • weights: A set of normalized importance sampling weights derived from PSIS.
  • r_eff: The relative efficiency of the MCMC chains from which PSIS samples were derived.

See ?relative_eff to calculate r_eff.

source
ParetoSmooth.psis_looMethod
function psis_loo(
     log_likelihood::AbstractArray{<:Real} [, args...];
     [, chain_index::Vector{Int}, kwargs...]
-) -> PsisLoo

Use Pareto-Smoothed Importance Sampling to calculate the leave-one-out cross validation score.

Arguments

  • log_likelihood::Array: A matrix or 3d array of log-likelihood values indexed as

[data, step, chain]. The chain argument can be left off if chain_index is provided or if all posterior samples were drawn from a single chain.

  • args...: Positional arguments to be passed to psis.
  • chain_index::Vector{Int}: An optional vector of integers specifying which chain each step

belongs to. For instance, chain_index[step] should return 2 if log_likelihood[:, step] belongs to the second chain.

  • kwargs...: Keyword arguments to be passed to psis.

See also: psis, loo, PsisLoo.

source
ParetoSmooth.relative_effMethod
relative_eff(
+) -> PsisLoo

Use Pareto-Smoothed Importance Sampling to calculate the leave-one-out cross validation score.

Arguments

  • log_likelihood::Array: A matrix or 3d array of log-likelihood values indexed as

[data, step, chain]. The chain argument can be left off if chain_index is provided or if all posterior samples were drawn from a single chain.

  • args...: Positional arguments to be passed to psis.
  • chain_index::Vector{Int}: An optional vector of integers specifying which chain each step

belongs to. For instance, chain_index[step] should return 2 if log_likelihood[:, step] belongs to the second chain.

  • kwargs...: Keyword arguments to be passed to psis.

See also: psis, loo, PsisLoo.

source
ParetoSmooth.relative_effMethod
relative_eff(
     sample::AbstractArray{<:Real, 3};
     source::Union{AbstractString, Symbol} = "default",
     maxlag::Int = typemax(Int),
     kwargs..., 
-)

Calculate the relative efficiency of an MCMC chain, i.e., the effective sample size divided by the nominal sample size.

If lowercase(String(source)) is "default" or "mcmc", the relative effective sample size is computed with MCMCDiagnosticTools.ess, using keyword arguments kind = :basic, maxlag = maxlag, and the remaining keyword arguments kwargs.... Otherwise a vector of ones for each chain is returned.

Arguments

  • sample::AbstractArray{<:Real, 3}: An array of log-likelihood values of the shape (parameters, draws, chains).
source
ParetoSmooth.sup_essMethod
function sup_ess(
+)

Calculate the relative efficiency of an MCMC chain, i.e., the effective sample size divided by the nominal sample size.

If lowercase(String(source)) is "default" or "mcmc", the relative effective sample size is computed with MCMCDiagnosticTools.ess, using keyword arguments kind = :basic, maxlag = maxlag, and the remaining keyword arguments kwargs.... Otherwise a vector of ones for each chain is returned.

Arguments

  • sample::AbstractArray{<:Real, 3}: An array of log-likelihood values of the shape (parameters, draws, chains).
source
ParetoSmooth.sup_essMethod
function sup_ess(
     weights::AbstractMatrix{T},
     r_eff::AbstractVector{T}
-) -> AbstractVector

Calculate the supremum-based effective sample size of a PSIS sample, i.e. the inverse of the maximum weight. This measure is more sensitive than the ess from psis_ess, but also much more variable. It uses the L-∞ norm.

Arguments

  • weights: A set of importance sampling weights derived from PSIS.
  • r_eff: The relative efficiency of the MCMC chains; see also [relative_eff]@ref.
source
ParetoSmooth.naive_lpdFunction
naive_lpd(log_likelihood::AbstractArray{<:Real}[, chain_index])

Calculate the naive (in-sample) estimate of the expected log probability density, otherwise known as the in-sample Bayes score. This method yields heavily biased results, and we advise against using it; it is included only for pedagogical purposes.

This method is unexported and can only be accessed by calling ParetoSmooth.naive_lpd.

Arguments

  • log_likelihood::Array: A matrix or 3d array of log-likelihood values indexed as

[data, step, chain]. The chain argument can be left off if chain_index is provided or if all posterior samples were drawn from a single chain.

  • chain_index::Vector{Int}: An optional vector of integers specifying which chain each step

belongs to. For instance, chain_index[step] should return 2 if log_likelihood[:, step] belongs to the second chain.

source
- +) -> AbstractVector

Calculate the supremum-based effective sample size of a PSIS sample, i.e. the inverse of the maximum weight. This measure is more sensitive than the ess from psis_ess, but also much more variable. It uses the L-∞ norm.

Arguments

  • weights: A set of importance sampling weights derived from PSIS.
  • r_eff: The relative efficiency of the MCMC chains; see also [relative_eff]@ref.
source
ParetoSmooth.naive_lpdFunction
naive_lpd(log_likelihood::AbstractArray{<:Real}[, chain_index])

Calculate the naive (in-sample) estimate of the expected log probability density, otherwise known as the in-sample Bayes score. This method yields heavily biased results, and we advise against using it; it is included only for pedagogical purposes.

This method is unexported and can only be accessed by calling ParetoSmooth.naive_lpd.

Arguments

  • log_likelihood::Array: A matrix or 3d array of log-likelihood values indexed as

[data, step, chain]. The chain argument can be left off if chain_index is provided or if all posterior samples were drawn from a single chain.

  • chain_index::Vector{Int}: An optional vector of integers specifying which chain each step

belongs to. For instance, chain_index[step] should return 2 if log_likelihood[:, step] belongs to the second chain.

source
diff --git a/previews/PR100/objects.inv b/previews/PR100/objects.inv index 5e3d489..69b7808 100644 Binary files a/previews/PR100/objects.inv and b/previews/PR100/objects.inv differ diff --git a/previews/PR100/turing/index.html b/previews/PR100/turing/index.html index dbf342b..12529bb 100644 --- a/previews/PR100/turing/index.html +++ b/previews/PR100/turing/index.html @@ -1,464 +1,5 @@ -Using with Turing · ParetoSmooth.jl - - - - - - -

Turing Example

This example demonstrates how to correctly compute PSIS LOO for a model developed with Turing.jl. Below, we show two ways to correctly specify the model in Turing. What is most important is to specify the model so that pointwise log densities are computed for each observation.

To make things simple, we will use a Gaussian model in each example. Suppose observations $Y = \{y_1,y_2,\dots y_n\}$ come from a Gaussian distribution with an uknown parameter $\mu$ and known parameter $\sigma=1$. The model can be stated as follows:

$\mu \sim \mathrm{normal}(0, 1)$

$Y \sim \mathrm{Normal}(\mu, 1)$

For Loop Method

One way to specify a model to correctly compute PSIS LOO is to iterate over the observations using a for loop, as follows:

using Turing
+Using with Turing · ParetoSmooth.jl

Turing Example

This example demonstrates how to correctly compute PSIS LOO for a model developed with Turing.jl. Below, we show two ways to correctly specify the model in Turing. What is most important is to specify the model so that pointwise log densities are computed for each observation.

To make things simple, we will use a Gaussian model in each example. Suppose observations $Y = \{y_1,y_2,\dots y_n\}$ come from a Gaussian distribution with an uknown parameter $\mu$ and known parameter $\sigma=1$. The model can be stated as follows:

$\mu \sim \mathrm{normal}(0, 1)$

$Y \sim \mathrm{Normal}(\mu, 1)$

For Loop Method

One way to specify a model to correctly compute PSIS LOO is to iterate over the observations using a for loop, as follows:

using Turing
 using ParetoSmooth
 using Distributions
 using Random
@@ -531,5 +72,4 @@
 │   cv_elpd │ -158.57 │      NaN │ -158.57 │     NaN │
 │ naive_lpd │ -157.91 │      NaN │ -157.91 │     NaN │
 │     p_eff │    0.66 │      NaN │    0.66 │     NaN │
-└───────────┴─────────┴──────────┴─────────┴─────────┘
- +└───────────┴─────────┴──────────┴─────────┴─────────┘