diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4f44929..19fc34e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -15,7 +15,7 @@ env: # Our tests may contain a number of stochastic elements. Setting a seed will make sure they are # not flaky (but also hide potential issues). SEED: "0" - cmdstanVersion: "2.31.0" + cmdstanVersion: "2.36.0" jobs: build: diff --git a/stan/docs/interface/fft.rst b/stan/docs/interface/fft.rst index f17cdda..1ec0c90 100644 --- a/stan/docs/interface/fft.rst +++ b/stan/docs/interface/fft.rst @@ -9,7 +9,7 @@ Utility Functions The following functions, and their two-dimensional analogues, primarily exist as utility functions but may be useful for more complex models. - :stan:func:`gp_rfft` transforms Gaussian process realizations to the Fourier domain and scales the coefficients such that they are white noise under the Gaussian process prior. -- :stan:func:`gp_rfft_log_abs_det_jacobian` evaluates the log absolute determinant of the Jacobian associated with the transformations :stan:func:`gp_rfft`. +- :stan:func:`gp_rfft_log_abs_det_jac` evaluates the log absolute determinant of the Jacobian associated with the transformations :stan:func:`gp_rfft`. Together, these two functions are used by :stan:func:`gp_rfft_lpdf` to evaluate the likelihood. diff --git a/stan/gptools/stan/__init__.py b/stan/gptools/stan/__init__.py index 33406dc..1cee65b 100644 --- a/stan/gptools/stan/__init__.py +++ b/stan/gptools/stan/__init__.py @@ -9,6 +9,17 @@ def get_include() -> str: """ Get the include directory for the library. """ + version = cmdstanpy.cmdstan_version() + if version and version < (2, 36): # pragma: no cover + logging.warning( + "cmdstan<2.36.0 had a bug in the evaluation of Matern 3/2 kernels " + "with different length scales for different dimensions; see " + "https://github.com/stan-dev/math/pull/3084 for details. Your " + "model may yield unexpected results or crash if you use " + "nearest-neighbor Gaussian processes with Matern 3/2 kernels. Your " + "cmdstan version is %d.%d; consider upgrading.", + *version, + ) return os.path.dirname(__file__) diff --git a/stan/gptools/stan/gptools/fft1.stan b/stan/gptools/stan/gptools/fft1.stan index 66710ab..7333564 100644 --- a/stan/gptools/stan/gptools/fft1.stan +++ b/stan/gptools/stan/gptools/fft1.stan @@ -75,7 +75,7 @@ Evaluate the log absolute determinant of the Jacobian associated with :param n: Size of the real signal. Necessary because the size cannot be inferred from :code:`rfft`. :returns: Log absolute determinant of the Jacobian. */ -real gp_rfft_log_abs_det_jacobian(vector cov_rfft, int n) { +real gp_rfft_log_abs_det_jac(vector cov_rfft, int n) { vector[n %/% 2 + 1] rfft_scale = gp_evaluate_rfft_scale(cov_rfft, n); return - sum(log(rfft_scale[1:n %/% 2 + 1])) -sum(log(rfft_scale[2:(n + 1) %/% 2])) - log(2) * ((n - 1) %/% 2) + n * log(n) / 2; @@ -94,7 +94,7 @@ real gp_rfft_lpdf(vector y, vector loc, vector cov_rfft) { int n = size(y); int nrfft = n %/% 2 + 1; vector[n] z = gp_rfft(y, loc, cov_rfft); - return std_normal_lpdf(z) + gp_rfft_log_abs_det_jacobian(cov_rfft, n); + return std_normal_lpdf(z) + gp_rfft_log_abs_det_jac(cov_rfft, n); } diff --git a/stan/gptools/stan/gptools/fft2.stan b/stan/gptools/stan/gptools/fft2.stan index 9b810fc..d698444 100644 --- a/stan/gptools/stan/gptools/fft2.stan +++ b/stan/gptools/stan/gptools/fft2.stan @@ -140,7 +140,7 @@ Evaluate the log absolute determinant of the Jacobian associated with Fourier transform :code:`cov_rfft2`). :returns: Log absolute determinant of the Jacobian. */ -real gp_rfft2_log_abs_det_jacobian(matrix cov_rfft2, int width) { +real gp_rfft2_log_abs_det_jac(matrix cov_rfft2, int width) { int height = rows(cov_rfft2); int n = width * height; int fftwidth = width %/% 2 + 1; @@ -185,7 +185,7 @@ Evaluate the log probability of a two-dimensional Gaussian process realization i */ real gp_rfft2_lpdf(matrix y, matrix loc, matrix cov_rfft2) { return std_normal_lpdf(to_vector(gp_rfft2(y, loc, cov_rfft2))) - + gp_rfft2_log_abs_det_jacobian(cov_rfft2, cols(y)); + + gp_rfft2_log_abs_det_jac(cov_rfft2, cols(y)); } diff --git a/stan/gptools/stan/gptools/graph.stan b/stan/gptools/stan/gptools/graph.stan index ae98e58..8faf9ea 100644 --- a/stan/gptools/stan/gptools/graph.stan +++ b/stan/gptools/stan/gptools/graph.stan @@ -44,10 +44,17 @@ Evaluate the location and scale for a node given its predecessors, assuming zero :param epsilon: Nugget variance for numerical stability. :returns: Location and scale parameters for the distribution of the node given its predecessors. */ -vector gp_graph_conditional_loc_scale(vector y, array[] vector x, int kernel, real sigma, - real length_scale, int node, array [] int predecessors, - real epsilon) { +vector gp_graph_conditional_loc_scale( + vector y, array[] vector x, int kernel, real sigma, array [] real length_scale, + int node, array [] int predecessors, real epsilon +) { int k = size(predecessors); + + // If there are no predecessors, we simply use the marginal distribution. + if (k == 0) { + return [0, sqrt(sigma ^ 2 + epsilon)]'; + } + matrix[1, k] cov12; matrix[k, k] cov22; if (kernel == 0) { @@ -85,7 +92,8 @@ Evaluate the log probability of a graph Gaussian process. :returns: Log probability of the graph Gaussian process. */ real gp_graph_lpdf(vector y, vector loc, array [] vector x, int kernel, real sigma, - real length_scale, array [,] int edges, array[] int degrees, real epsilon) { + array [] real length_scale, array [,] int edges, array[] int degrees, real epsilon +) { real lpdf = 0; int offset_ = 1; vector[size(y)] z = y - loc; @@ -99,6 +107,39 @@ real gp_graph_lpdf(vector y, vector loc, array [] vector x, int kernel, real sig } +/** +Evaluate the log probability of a graph Gaussian process. + +:param y: State of each node. +:param loc: Mean of each node. +:param x: Position of each node. +:param kernel: Kernel to use (0 for squared exponential, 1 for Matern 3/2, 2 for Matern 5/2). +:param sigma: Marginal scale of the kernel. +:param length_scale: Correlation length of the kernel for each dimension. +:param edges: Directed edges between nodes constituting a directed acyclic graph. Edges are stored + as a matrix with shape :code:`(2, m)`, where :code:`m` is the number of edges. The first row + comprises parents of children in the second row. The first row can have arbitrary order, but the + second row must be sorted. +:param degrees: Out-degree of each node. +:param epsilon: Nugget variance for numerical stability. +:returns: Log probability of the graph Gaussian process. +*/ +real gp_graph_lpdf( + vector y, + vector loc, + array [] vector x, + int kernel, + real sigma, + real length_scale, + array [,] int edges, + array[] int degrees, + real epsilon +) { + int p = size(x[1]); + return gp_graph_lpdf(y | loc, x, kernel, sigma, rep_array(length_scale, p), edges, degrees, epsilon); +} + + /** Evaluate the log probability of a graph Gaussian process. @@ -121,6 +162,28 @@ real gp_graph_lpdf(vector y, vector loc, array [] vector x, int kernel, real sig } +/** +Evaluate the log probability of a graph Gaussian process. + +:param y: State of each node. +:param loc: Mean of each node. +:param x: Position of each node. +:param kernel: Kernel to use (0 for squared exponential, 1 for Matern 3/2, 2 for Matern 5/2). +:param sigma: Marginal scale of the kernel. +:param length_scale: Correlation length of the kernel for each dimension. +:param edges: Directed edges between nodes constituting a directed acyclic graph. Edges are stored + as a matrix with shape :code:`(2, m)`, where :code:`m` is the number of edges. The first row + comprises parents of children in the second row. The first row can have arbitrary order, but the + second row must be sorted. +:returns: Log probability of the graph Gaussian process. +*/ +real gp_graph_lpdf(vector y, vector loc, array [] vector x, int kernel, real sigma, + array [] real length_scale, array [,] int edges) { + return gp_graph_lpdf(y | loc, x, kernel, sigma, length_scale, edges, + out_degrees(size(y), edges), 1e-12); +} + + /** Transform white noise to a sample from a graph Gaussian process @@ -129,7 +192,7 @@ Transform white noise to a sample from a graph Gaussian process :param x: Position of each node. :param kernel: Kernel to use (0 for squared exponential, 1 for Matern 3/2, 2 for Matern 5/2). :param sigma: Marginal scale of the kernel. -:param length_scale: Correlation length of the kernel. +:param length_scale: Correlation length of the kernel for each dimension. :param edges: Directed edges between nodes constituting a directed acyclic graph. Edges are stored as a matrix with shape :code:`(2, m)`, where :code:`m` is the number of edges. The first row comprises parents of children in the second row. The first row can have arbitrary order, but the @@ -139,7 +202,8 @@ Transform white noise to a sample from a graph Gaussian process :returns: Sample from the Graph gaussian process. */ vector gp_inv_graph(vector z, vector loc, array [] vector x, int kernel, real sigma, - real length_scale, array [,] int edges, array [] int degrees, real epsilon) { + array [] real length_scale, array [,] int edges, array [] int degrees, real epsilon +) { vector[size(z)] y; int offset_ = 1; for (i in 1:size(x)) { @@ -165,14 +229,62 @@ Transform white noise to a sample from a graph Gaussian process as a matrix with shape :code:`(2, m)`, where :code:`m` is the number of edges. The first row comprises parents of children in the second row. The first row can have arbitrary order, but the second row must be sorted. +:param degrees: Out-degree of each node. +:param epsilon: Nugget variance for numerical stability. :returns: Sample from the Graph gaussian process. */ vector gp_inv_graph(vector z, vector loc, array [] vector x, int kernel, real sigma, - real length_scale, array [,] int edges) { + real length_scale, array [,] int edges, array [] int degrees, real epsilon +) { + int p = size(x[1]); + return gp_inv_graph(z, loc, x, kernel, sigma, rep_array(length_scale, p), edges, degrees, epsilon); +} + + +/** +Transform white noise to a sample from a graph Gaussian process + +:param z: White noise for each node. +:param loc: Mean of each node. +:param x: Position of each node. +:param kernel: Kernel to use (0 for squared exponential, 1 for Matern 3/2, 2 for Matern 5/2). +:param sigma: Marginal scale of the kernel. +:param length_scale: Correlation length of the kernel for each dimension. +:param edges: Directed edges between nodes constituting a directed acyclic graph. Edges are stored + as a matrix with shape :code:`(2, m)`, where :code:`m` is the number of edges. The first row + comprises parents of children in the second row. The first row can have arbitrary order, but the + second row must be sorted. +:returns: Sample from the Graph gaussian process. +*/ +vector gp_inv_graph(vector z, vector loc, array [] vector x, int kernel, real sigma, + array [] real length_scale, array [,] int edges) { return gp_inv_graph(z, loc, x, kernel, sigma, length_scale, edges, out_degrees(size(z), edges), 1e-12); } + +/** +Transform white noise to a sample from a graph Gaussian process + +:param z: White noise for each node. +:param loc: Mean of each node. +:param x: Position of each node. +:param kernel: Kernel to use (0 for squared exponential, 1 for Matern 3/2, 2 for Matern 5/2). +:param sigma: Marginal scale of the kernel. +:param length_scale: Correlation length of the kernel. +:param edges: Directed edges between nodes constituting a directed acyclic graph. Edges are stored + as a matrix with shape :code:`(2, m)`, where :code:`m` is the number of edges. The first row + comprises parents of children in the second row. The first row can have arbitrary order, but the + second row must be sorted. +:returns: Sample from the Graph gaussian process. +*/ +vector gp_inv_graph(vector z, vector loc, array [] vector x, int kernel, real sigma, + real length_scale, array [,] int edges) { + int p = size(x[1]); + return gp_inv_graph(z, loc, x, kernel, sigma, rep_array(length_scale, p), edges, + out_degrees(size(z), edges), 1e-12); +} + // Functions for graph Gaussian processes with squared exponential kernel -------------------------- /** @@ -198,6 +310,29 @@ real gp_graph_exp_quad_cov_lpdf(vector y, vector loc, array [] vector x, real si } +/** +Evaluate the log probability of a graph Gaussian with squared exponential kernel. + +:param y: State of each node. +:param loc: Mean of each node. +:param x: Position of each node. +:param sigma: Marginal scale of the kernel. +:param length_scale: Correlation length of the kernel for each dimension. +:param edges: Directed edges between nodes constituting a directed acyclic graph. Edges are stored + as a matrix with shape :code:`(2, m)`, where :code:`m` is the number of edges. The first row + comprises parents of children in the second row. The first row can have arbitrary order, but the + second row must be sorted. +:param degrees: Out-degree of each node. +:param epsilon: Nugget variance for numerical stability. +:returns: Log probability of the graph Gaussian process. +*/ +real gp_graph_exp_quad_cov_lpdf(vector y, vector loc, array [] vector x, real sigma, + array [] real length_scale, array [,] int edges, array[] int degrees, real epsilon +) { + return gp_graph_lpdf(y | loc, x, 0, sigma, length_scale, edges, degrees, epsilon); +} + + /** Evaluate the log probability of a graph Gaussian with squared exponential kernel. @@ -237,6 +372,32 @@ Transform white noise to a sample from a graph Gaussian process with squared exp vector gp_inv_graph_exp_quad_cov(vector z, vector loc, array [] vector x, real sigma, real length_scale, array [,] int edges, array [] int degrees, real epsilon) { + int p = size(x[1]); + return gp_inv_graph(z, loc, x, 0, sigma, rep_array(length_scale, p), edges, degrees, epsilon); +} + + +/** +Transform white noise to a sample from a graph Gaussian process with squared +exponential kernel and different length scales along each feature dimension. + +:param z: White noise for each node. +:param loc: Mean of each node. +:param x: Position of each node. +:param sigma: Marginal scale of the kernel. +:param length_scale: Correlation lengths of the kernel for each dimension. +:param edges: Directed edges between nodes constituting a directed acyclic + graph. Edges are stored as a matrix with shape :code:`(2, m)`, where + :code:`m` is the number of edges. The first row comprises parents of + children in the second row. The first row can have arbitrary order, but the + second row must be sorted. +:param degrees: Out-degree of each node. +:param epsilon: Nugget variance for numerical stability. +:returns: Sample from the Graph gaussian process. +*/ +vector gp_inv_graph_exp_quad_cov(vector z, vector loc, array [] vector x, real sigma, + array [] real length_scale, array [,] int edges, array [] int degrees, real epsilon +) { return gp_inv_graph(z, loc, x, 0, sigma, length_scale, edges, degrees, epsilon); } @@ -261,6 +422,26 @@ vector gp_inv_graph_exp_quad_cov(vector z, vector loc, array [] vector x, real s } +/** +Transform white noise to a sample from a graph Gaussian process with squared exponential kernel. + +:param z: White noise for each node. +:param loc: Mean of each node. +:param x: Position of each node. +:param sigma: Marginal scale of the kernel. +:param length_scale: Correlation length of the kernel for each dimension. +:param edges: Directed edges between nodes constituting a directed acyclic graph. Edges are stored + as a matrix with shape :code:`(2, m)`, where :code:`m` is the number of edges. The first row + comprises parents of children in the second row. The first row can have arbitrary order, but the + second row must be sorted. +:returns: Sample from the Graph gaussian process. +*/ +vector gp_inv_graph_exp_quad_cov(vector z, vector loc, array [] vector x, real sigma, + array [] real length_scale, array [,] int edges) { + return gp_inv_graph(z, loc, x, 0, sigma, length_scale, edges); +} + + // Functions for graph Gaussian processes with Matern 3 / 2 kernel --------------------------------- /** @@ -306,6 +487,49 @@ real gp_graph_matern32_cov_lpdf(vector y, vector loc, array [] vector x, real si } +/** +Evaluate the log probability of a graph Gaussian with Matern 3 / 2 kernel. + +:param y: State of each node. +:param loc: Mean of each node. +:param x: Position of each node. +:param sigma: Marginal scale of the kernel. +:param length_scale: Correlation length of the kernel for each dimension. +:param edges: Directed edges between nodes constituting a directed acyclic graph. Edges are stored + as a matrix with shape :code:`(2, m)`, where :code:`m` is the number of edges. The first row + comprises parents of children in the second row. The first row can have arbitrary order, but the + second row must be sorted. +:param degrees: Out-degree of each node. +:param epsilon: Nugget variance for numerical stability. +:returns: Log probability of the graph Gaussian process. +*/ +real gp_graph_matern32_cov_lpdf(vector y, vector loc, array [] vector x, real sigma, + array [] real length_scale, array [,] int edges, array[] int degrees, + real epsilon) { + return gp_graph_lpdf(y | loc, x, 1, sigma, length_scale, edges, degrees, epsilon); +} + + +/** +Evaluate the log probability of a graph Gaussian with Matern 3 / 2 kernel. + +:param y: State of each node. +:param loc: Mean of each node. +:param x: Position of each node. +:param sigma: Marginal scale of the kernel. +:param length_scale: Correlation length of the kernel for each dimension. +:param edges: Directed edges between nodes constituting a directed acyclic graph. Edges are stored + as a matrix with shape :code:`(2, m)`, where :code:`m` is the number of edges. The first row + comprises parents of children in the second row. The first row can have arbitrary order, but the + second row must be sorted. +:returns: Log probability of the graph Gaussian process. +*/ +real gp_graph_matern32_cov_lpdf(vector y, vector loc, array [] vector x, real sigma, + array [] real length_scale, array [,] int edges) { + return gp_graph_lpdf(y | loc, x, 1, sigma, length_scale, edges); +} + + /** Transform white noise to a sample from a graph Gaussian process with Matern 3 / 2 kernel. @@ -349,6 +573,49 @@ vector gp_inv_graph_matern32_cov(vector z, vector loc, array [] vector x, real s } +/** +Transform white noise to a sample from a graph Gaussian process with Matern 3 / 2 kernel. + +:param z: White noise for each node. +:param loc: Mean of each node. +:param x: Position of each node. +:param sigma: Marginal scale of the kernel. +:param length_scale: Correlation length of the kernel for each dimension. +:param edges: Directed edges between nodes constituting a directed acyclic graph. Edges are stored + as a matrix with shape :code:`(2, m)`, where :code:`m` is the number of edges. The first row + comprises parents of children in the second row. The first row can have arbitrary order, but the + second row must be sorted. +:param degrees: Out-degree of each node. +:param epsilon: Nugget variance for numerical stability. +:returns: Sample from the Graph gaussian process. +*/ +vector gp_inv_graph_matern32_cov(vector z, vector loc, array [] vector x, real sigma, + array [] real length_scale, array [,] int edges, array [] int degrees, + real epsilon) { + return gp_inv_graph(z, loc, x, 1, sigma, length_scale, edges, degrees, epsilon); +} + + +/** +Transform white noise to a sample from a graph Gaussian process with Matern 3 / 2 kernel. + +:param z: White noise for each node. +:param loc: Mean of each node. +:param x: Position of each node. +:param sigma: Marginal scale of the kernel. +:param length_scale: Correlation length of the kernel for each dimension. +:param edges: Directed edges between nodes constituting a directed acyclic graph. Edges are stored + as a matrix with shape :code:`(2, m)`, where :code:`m` is the number of edges. The first row + comprises parents of children in the second row. The first row can have arbitrary order, but the + second row must be sorted. +:returns: Sample from the Graph gaussian process. +*/ +vector gp_inv_graph_matern32_cov(vector z, vector loc, array [] vector x, real sigma, + array [] real length_scale, array [,] int edges) { + return gp_inv_graph(z, loc, x, 1, sigma, length_scale, edges); +} + + // Functions for graph Gaussian processes with Matern 5 / 2 kernel --------------------------------- /** @@ -394,6 +661,49 @@ real gp_graph_matern52_cov_lpdf(vector y, vector loc, array [] vector x, real si } +/** +Evaluate the log probability of a graph Gaussian with Matern 5 / 2 kernel. + +:param y: State of each node. +:param loc: Mean of each node. +:param x: Position of each node. +:param sigma: Marginal scale of the kernel. +:param length_scale: Correlation length of the kernel for each dimension. +:param edges: Directed edges between nodes constituting a directed acyclic graph. Edges are stored + as a matrix with shape :code:`(2, m)`, where :code:`m` is the number of edges. The first row + comprises parents of children in the second row. The first row can have arbitrary order, but the + second row must be sorted. +:param degrees: Out-degree of each node. +:param epsilon: Nugget variance for numerical stability. +:returns: Log probability of the graph Gaussian process. +*/ +real gp_graph_matern52_cov_lpdf(vector y, vector loc, array [] vector x, real sigma, + array [] real length_scale, array [,] int edges, array[] int degrees, + real epsilon) { + return gp_graph_lpdf(y | loc, x, 2, sigma, length_scale, edges, degrees, epsilon); +} + + +/** +Evaluate the log probability of a graph Gaussian with Matern 5 / 2 kernel. + +:param y: State of each node. +:param loc: Mean of each node. +:param x: Position of each node. +:param sigma: Marginal scale of the kernel. +:param length_scale: Correlation length of the kernel for each dimension. +:param edges: Directed edges between nodes constituting a directed acyclic graph. Edges are stored + as a matrix with shape :code:`(2, m)`, where :code:`m` is the number of edges. The first row + comprises parents of children in the second row. The first row can have arbitrary order, but the + second row must be sorted. +:returns: Log probability of the graph Gaussian process. +*/ +real gp_graph_matern52_cov_lpdf(vector y, vector loc, array [] vector x, real sigma, + array [] real length_scale, array [,] int edges) { + return gp_graph_lpdf(y | loc, x, 2, sigma, length_scale, edges); +} + + /** Transform white noise to a sample from a graph Gaussian process with Matern 5 / 2 kernel. @@ -435,3 +745,46 @@ vector gp_inv_graph_matern52_cov(vector z, vector loc, array [] vector x, real s real length_scale, array [,] int edges) { return gp_inv_graph(z, loc, x, 2, sigma, length_scale, edges); } + + +/** +Transform white noise to a sample from a graph Gaussian process with Matern 5 / 2 kernel. + +:param z: White noise for each node. +:param loc: Mean of each node. +:param x: Position of each node. +:param sigma: Marginal scale of the kernel. +:param length_scale: Correlation length of the kernel for each dimension. +:param edges: Directed edges between nodes constituting a directed acyclic graph. Edges are stored + as a matrix with shape :code:`(2, m)`, where :code:`m` is the number of edges. The first row + comprises parents of children in the second row. The first row can have arbitrary order, but the + second row must be sorted. +:param degrees: Out-degree of each node. +:param epsilon: Nugget variance for numerical stability. +:returns: Sample from the Graph gaussian process. +*/ +vector gp_inv_graph_matern52_cov(vector z, vector loc, array [] vector x, real sigma, + array [] real length_scale, array [,] int edges, array [] int degrees, + real epsilon) { + return gp_inv_graph(z, loc, x, 2, sigma, length_scale, edges, degrees, epsilon); +} + + +/** +Transform white noise to a sample from a graph Gaussian process with Matern 5 / 2 kernel. + +:param z: White noise for each node. +:param loc: Mean of each node. +:param x: Position of each node. +:param sigma: Marginal scale of the kernel. +:param length_scale: Correlation length of the kernel for each dimension. +:param edges: Directed edges between nodes constituting a directed acyclic graph. Edges are stored + as a matrix with shape :code:`(2, m)`, where :code:`m` is the number of edges. The first row + comprises parents of children in the second row. The first row can have arbitrary order, but the + second row must be sorted. +:returns: Sample from the Graph gaussian process. +*/ +vector gp_inv_graph_matern52_cov(vector z, vector loc, array [] vector x, real sigma, + array [] real length_scale, array [,] int edges) { + return gp_inv_graph(z, loc, x, 2, sigma, length_scale, edges); +} diff --git a/stan/tests/test_stan_functions.py b/stan/tests/test_stan_functions.py index b0182c2..23e051c 100644 --- a/stan/tests/test_stan_functions.py +++ b/stan/tests/test_stan_functions.py @@ -2,7 +2,6 @@ from gptools.util import coordgrid, fft, kernels import hashlib import inspect -import itertools as it import numpy as np import pathlib import pytest @@ -74,20 +73,22 @@ def assert_stan_function_allclose( raise RuntimeError(f"failed to compile model for {stan_function} at {line_info}") from ex try: - fit = model.sample(arg_values, fixed_param=True, iter_sampling=1, iter_warmup=1, sig_figs=9) - success = fit.stan_variable("success")[0] == 1 - if not success: - raise RuntimeError("failed to sample from model") + fit = model.sample(arg_values, fixed_param=True, iter_sampling=1, iter_warmup=1, sig_figs=9, + chains=1) + success, = fit.stan_variable("success") + if not success or np.isnan(success): + console = pathlib.Path(fit.runset.stdout_files[0]).read_text() + raise RuntimeError(f"failed to sample from model: \n{console}") except Exception as ex: if raises: return raise RuntimeError(f"failed to get Stan result for {stan_function} at {line_info}") from ex if raises: - raise RuntimeError("Stan sampling did not raise an error") + raise RuntimeError(f"sampling did not raise an error for {stan_function} at {line_info}") # Skip validation if we don't have a result type. - if not result_type: + if not result_type or desired is None: return # Verify against expected value. We only check one because we have already verified that they # are the same. @@ -357,50 +358,99 @@ def assert_stan_function_allclose( # Add evaluation of likelihood on a complete graph to check values. n = 10 -for p, kernel in it.product([1, 2], [ - kernels.ExpQuadKernel(1.3, 0.7), - kernels.MaternKernel(1.5, 1.3, 0.7), - kernels.MaternKernel(2.5, 1.3, 0.7), - ]): - x = np.random.normal(0, 1, (n, p)) - epsilon = 1e-3 - loc = np.random.normal(0, 1, n) - cov = kernel.evaluate(x) + epsilon * np.eye(n) - dist = stats.multivariate_normal(loc, cov) - y = dist.rvs() - # Construct a complete graph. - edges = [] - for i in range(1, n): - edges.append(np.transpose([np.roll(np.arange(i), 1), np.ones(i) * i])) - edges = np.concatenate(edges, axis=0).astype(int).T - if isinstance(kernel, kernels.ExpQuadKernel): - stan_function = "gp_graph_exp_quad_cov_lpdf" - elif isinstance(kernel, kernels.MaternKernel): - if kernel.dof == 1.5: - stan_function = "gp_graph_matern32_cov_lpdf" - elif kernel.dof == 2.5: - stan_function = "gp_graph_matern52_cov_lpdf" +for p in [1, 2]: + for kernel in [ + kernels.ExpQuadKernel(1.3, 0.7 * np.ones(p).squeeze()), + kernels.MaternKernel(1.5, 1.3, 0.7 * np.ones(p).squeeze()), + kernels.MaternKernel(2.5, 1.3, 0.7 * np.ones(p).squeeze()), + ]: + x = np.random.normal(0, 1, (n, p)) + epsilon = 1e-3 + loc = np.random.normal(0, 1, n) + cov = kernel.evaluate(x) + epsilon * np.eye(n) + dist = stats.multivariate_normal(loc, cov) + y = dist.rvs() + z = np.random.normal(0, 1, y.shape) + # Construct a complete graph. + edges = [] + for i in range(1, n): + edges.append(np.transpose([np.roll(np.arange(i), 1), np.ones(i) * i])) + edges = np.concatenate(edges, axis=0).astype(int).T + + length_scale_type = "real" if p == 1 else "array [p_] real" + + # Evaluate the centered parametrization log likelihood. + if isinstance(kernel, kernels.ExpQuadKernel): + lpdf_stan_function = "gp_graph_exp_quad_cov_lpdf" + transform_stan_function = "gp_inv_graph_exp_quad_cov" + elif isinstance(kernel, kernels.MaternKernel): + if kernel.dof == 1.5: + lpdf_stan_function = "gp_graph_matern32_cov_lpdf" + transform_stan_function = "gp_inv_graph_matern32_cov" + elif kernel.dof == 2.5: + lpdf_stan_function = "gp_graph_matern52_cov_lpdf" + transform_stan_function = "gp_inv_graph_matern52_cov" + else: + raise ValueError(kernel.dof) else: - raise ValueError(kernel.dof) - else: - raise TypeError(kernel) - add_configuration({ - "stan_function": stan_function, - "arg_types": { - "p_": "int", "num_nodes_": "int", "num_edges_": "int", "y": "vector[num_nodes_]", - "x": "array [num_nodes_] vector[p_]", "sigma": "real", "length_scale": "real", - "edges": "array [2, num_edges_] int", "degrees": "array [num_nodes_] int", - "epsilon": "real", "loc": "vector[num_nodes_]", - }, - "arg_values": { - "p_": p, "num_nodes_": n, "num_edges_": edges.shape[1], "y": y, "loc": loc, "x": x, - "sigma": kernel.sigma, "length_scale": kernel.length_scale, "edges": edges + 1, - "degrees": np.bincount(edges[1], minlength=n), "epsilon": epsilon - }, - "result_type": "real", - "includes": ["gptools/util.stan", "gptools/graph.stan"], - "desired": dist.logpdf(y), - }) + raise TypeError(kernel) + + # Evaluate the log likelihood. + add_configuration({ + "stan_function": lpdf_stan_function, + "arg_types": { + "p_": "int", + "num_nodes_": "int", + "num_edges_": "int", + "y": "vector[num_nodes_]", + "x": "array [num_nodes_] vector[p_]", + "sigma": "real", + "length_scale": length_scale_type, + "edges": "array [2, num_edges_] int", + "degrees": "array [num_nodes_] int", + "epsilon": "real", + "loc": "vector[num_nodes_]", + }, + "arg_values": { + "p_": p, "num_nodes_": n, "num_edges_": edges.shape[1], "y": y, "loc": loc, "x": x, + "sigma": kernel.sigma, "length_scale": kernel.length_scale, "edges": edges + 1, + "degrees": np.bincount(edges[1], minlength=n), "epsilon": epsilon + }, + "result_type": "real", + "includes": ["gptools/util.stan", "gptools/graph.stan"], + "desired": dist.logpdf(y), + }) + + # Evaluate the non-centered transformation. + add_configuration({ + "stan_function": transform_stan_function, + "arg_types": { + "p_": "int", + "num_nodes_": "int", + "num_edges_": "int", + "z": "vector[num_nodes_]", + "x": "array [num_nodes_] vector[p_]", + "sigma": "real", + "length_scale": length_scale_type, + "edges": + "array [2, num_edges_] int", + "loc": "vector[num_nodes_]", + }, + "arg_values": { + "p_": p, + "num_nodes_": n, + "num_edges_": edges.shape[1], + "z": z, + "loc": loc, + "x": x, + "sigma": kernel.sigma, + "length_scale": kernel.length_scale, + "edges": edges + 1, + }, + "result_type": "vector[num_nodes_]", + "includes": ["gptools/util.stan", "gptools/graph.stan"], + "desired": None, # Only check we can execute, not the result. + }) @pytest.mark.parametrize("config", CONFIGURATIONS, ids=get_configuration_ids()) diff --git a/torch/gptools/torch/fft/fft1.py b/torch/gptools/torch/fft/fft1.py index 4fe22a2..6ff8966 100644 --- a/torch/gptools/torch/fft/fft1.py +++ b/torch/gptools/torch/fft/fft1.py @@ -1,4 +1,4 @@ -from gptools.util.fft import transform_rfft, transform_irfft, evaluate_rfft_log_abs_det_jacobian +from gptools.util.fft import transform_rfft, transform_irfft, evaluate_rfft_log_abs_det_jac from gptools.util.fft.fft1 import _get_rfft_scale import torch as th from torch.distributions import constraints @@ -44,7 +44,7 @@ def _inv_call(self, y: th.Tensor) -> th.Tensor: return transform_irfft(y, self.loc, rfft_scale=self.rfft_scale) def log_abs_det_jacobian(self, x: th.Tensor, y: th.Tensor) -> th.Tensor: - return evaluate_rfft_log_abs_det_jacobian(x.shape[-1], rfft_scale=self.rfft_scale) + return evaluate_rfft_log_abs_det_jac(x.shape[-1], rfft_scale=self.rfft_scale) class FourierGaussianProcess1D(th.distributions.TransformedDistribution): diff --git a/torch/gptools/torch/fft/fft2.py b/torch/gptools/torch/fft/fft2.py index 884d4ab..ee0e9d0 100644 --- a/torch/gptools/torch/fft/fft2.py +++ b/torch/gptools/torch/fft/fft2.py @@ -1,4 +1,4 @@ -from gptools.util.fft import transform_rfft2, transform_irfft2, evaluate_rfft2_log_abs_det_jacobian +from gptools.util.fft import transform_rfft2, transform_irfft2, evaluate_rfft2_log_abs_det_jac from gptools.util.fft.fft2 import _get_rfft2_scale import torch as th from .. import OptionalTensor @@ -45,7 +45,7 @@ def _inv_call(self, y: th.Tensor) -> th.Tensor: return transform_irfft2(y, self.loc, rfft2_scale=self.rfft2_scale) def log_abs_det_jacobian(self, x: th.Tensor, y: th.Tensor) -> th.Tensor: - return evaluate_rfft2_log_abs_det_jacobian(x.shape[-1], rfft2_scale=self.rfft2_scale) + return evaluate_rfft2_log_abs_det_jac(x.shape[-1], rfft2_scale=self.rfft2_scale) class FourierGaussianProcess2D(th.distributions.TransformedDistribution): diff --git a/util/gptools/util/fft/__init__.py b/util/gptools/util/fft/__init__.py index 8da5aac..bbd3fbe 100644 --- a/util/gptools/util/fft/__init__.py +++ b/util/gptools/util/fft/__init__.py @@ -1,6 +1,6 @@ -from .fft1 import evaluate_log_prob_rfft, evaluate_rfft_log_abs_det_jacobian, expand_rfft, \ +from .fft1 import evaluate_log_prob_rfft, evaluate_rfft_log_abs_det_jac, expand_rfft, \ transform_irfft, transform_rfft -from .fft2 import evaluate_log_prob_rfft2, evaluate_rfft2_log_abs_det_jacobian, transform_irfft2, \ +from .fft2 import evaluate_log_prob_rfft2, evaluate_rfft2_log_abs_det_jac, transform_irfft2, \ transform_rfft2 from .util import log_prob_stdnorm @@ -8,8 +8,8 @@ __all__ = [ "evaluate_log_prob_rfft", "evaluate_log_prob_rfft2", - "evaluate_rfft_log_abs_det_jacobian", - "evaluate_rfft2_log_abs_det_jacobian", + "evaluate_rfft_log_abs_det_jac", + "evaluate_rfft2_log_abs_det_jac", "expand_rfft", "log_prob_stdnorm", "transform_irfft", diff --git a/util/gptools/util/fft/fft1.py b/util/gptools/util/fft/fft1.py index 9372d93..40b8eb7 100644 --- a/util/gptools/util/fft/fft1.py +++ b/util/gptools/util/fft/fft1.py @@ -185,12 +185,12 @@ def evaluate_log_prob_rfft(y: ArrayOrTensor, loc: ArrayOrTensor, *, rfft_scale = _get_rfft_scale(cov_rfft, cov, rfft_scale, y.shape[-1]) rfft = transform_rfft(y, loc, rfft_scale=rfft_scale) return log_prob_stdnorm(rfft).sum(axis=-1) \ - + evaluate_rfft_log_abs_det_jacobian(y.shape[-1], rfft_scale=rfft_scale) + + evaluate_rfft_log_abs_det_jac(y.shape[-1], rfft_scale=rfft_scale) -def evaluate_rfft_log_abs_det_jacobian(size: int, *, cov_rfft: OptionalArrayOrTensor = None, - cov: OptionalArrayOrTensor = None, - rfft_scale: OptionalArrayOrTensor = None) -> ArrayOrTensor: +def evaluate_rfft_log_abs_det_jac(size: int, *, cov_rfft: OptionalArrayOrTensor = None, + cov: OptionalArrayOrTensor = None, + rfft_scale: OptionalArrayOrTensor = None) -> ArrayOrTensor: """ Evaluate the log absolute determinant of the Jacobian associated with :func:`transform_rfft`. @@ -202,7 +202,7 @@ def evaluate_rfft_log_abs_det_jacobian(size: int, *, cov_rfft: OptionalArrayOrTe :code:`(..., size // 2 + 1)`. Returns: - log_abs_det_jacobian: Log absolute determinant of the Jacobian. + log_abs_det_jac: Log absolute determinant of the Jacobian. """ imagidx = (size + 1) // 2 rfft_scale = _get_rfft_scale(cov_rfft, cov, rfft_scale, size) diff --git a/util/gptools/util/fft/fft2.py b/util/gptools/util/fft/fft2.py index fba7788..8d0d85b 100644 --- a/util/gptools/util/fft/fft2.py +++ b/util/gptools/util/fft/fft2.py @@ -178,9 +178,9 @@ def transform_rfft2(y: ArrayOrTensor, loc: ArrayOrTensor, *, return unpack_rfft2(dispatch[y].fft.rfft2(y - loc) / rfft2_scale, y.shape) -def evaluate_rfft2_log_abs_det_jacobian(width: int, *, cov_rfft2: OptionalArrayOrTensor = None, - cov: OptionalArrayOrTensor = None, - rfft2_scale: OptionalArrayOrTensor = None) -> ArrayOrTensor: +def evaluate_rfft2_log_abs_det_jac(width: int, *, cov_rfft2: OptionalArrayOrTensor = None, + cov: OptionalArrayOrTensor = None, + rfft2_scale: OptionalArrayOrTensor = None) -> ArrayOrTensor: """ Evaluate the log absolute determinant of the Jacobian associated with :func:`transform_rfft2`. @@ -194,7 +194,7 @@ def evaluate_rfft2_log_abs_det_jacobian(width: int, *, cov_rfft2: OptionalArrayO :code:`(..., height, width // 2 + 1)`. Returns: - log_abs_det_jacobian: Log absolute determinant of the Jacobian. + log_abs_det_jac: Log absolute determinant of the Jacobian. """ rfft2_scale = _get_rfft2_scale(cov_rfft2, cov, rfft2_scale, width) height = rfft2_scale.shape[-2] @@ -249,4 +249,4 @@ def evaluate_log_prob_rfft2(y: ArrayOrTensor, loc: ArrayOrTensor, *, rfft2_scale = _get_rfft2_scale(cov_rfft2, cov, rfft2_scale, y.shape[-1]) rfft2 = transform_rfft2(y, loc, rfft2_scale=rfft2_scale) return log_prob_stdnorm(rfft2).sum(axis=(-2, -1)) \ - + evaluate_rfft2_log_abs_det_jacobian(y.shape[-1], rfft2_scale=rfft2_scale) + + evaluate_rfft2_log_abs_det_jac(y.shape[-1], rfft2_scale=rfft2_scale)