diff --git a/src/eddymotion/model/gpr.py b/src/eddymotion/model/gpr.py index 1d1c4da6..b6b9b5ba 100644 --- a/src/eddymotion/model/gpr.py +++ b/src/eddymotion/model/gpr.py @@ -64,7 +64,7 @@ class EddyMotionGPR(GaussianProcessRegressor): r""" - A GP regressor specialized for eddymotion. + A Gaussian process (GP) regressor specialized for eddymotion. This specialization of the default GP regressor is created to allow the following extended behaviors: @@ -80,22 +80,21 @@ class EddyMotionGPR(GaussianProcessRegressor): In principle, Scikit-Learn's implementation normalizes the training data as in [Andersson15]_ (see - `FSL's souce code `__). + `FSL's source code `__). From their paper (p. 167, end of first column): - Typically one just substracts the mean (:math:`\bar{\mathbf{f}}`) + Typically one just subtracts the mean (:math:`\bar{\mathbf{f}}`) from :math:`\mathbf{f}` and then add it back to :math:`f^{*}`, which is analogous to what is often done in "traditional" regression. Finally, the parameter :math:`\sigma^2` maps on to Scikit-learn's ``alpha`` - of the regressor. - Because it is not a parameter of the kernel, hyperparameter selection - through gradient-descent with analytical gradient calculations - would not work (the derivative of the kernel w.r.t. alpha is zero). + of the regressor. Because it is not a parameter of the kernel, hyperparameter + selection through gradient-descent with analytical gradient calculations + would not work (the derivative of the kernel w.r.t. ``alpha`` is zero). - I believe this is overlooked in [Andersson15]_, or they actually did not - use analytical gradient-descent: + This might have been overlooked in [Andersson15]_, or else they actually did + not use analytical gradient-descent: *A note on optimisation* @@ -105,13 +104,12 @@ class EddyMotionGPR(GaussianProcessRegressor): The reason for that is that such methods typically use fewer steps, and when the cost of calculating the derivatives is small/moderate compared to calculating the functions itself (as is the case for Eq. (12)) then - execution time can be much shorter. - However, we found that for the multi-shell case a heuristic optimisation - method such as the Nelder-Mead simplex method (Nelder and Mead, 1965) was - frequently better at avoiding local maxima. - Hence, that was the method we used for all optimisations in the present - paper. - + execution time can be much shorter. However, we found that for the + multi-shell case a heuristic optimisation method such as the Nelder-Mead + simplex method (Nelder and Mead, 1965) was frequently better at avoiding + local maxima. Hence, that was the method we used for all optimisations + in the present paper. + **Multi-shell regression (TODO).** For multi-shell modeling, the kernel :math:`k(\textbf{x}, \textbf{x'})` is updated following Eq. (14) in [Andersson15]_. @@ -272,7 +270,7 @@ def __init__( beta_l : :obj:`float`, optional The :math:`\lambda` hyperparameter. a_bounds : :obj:`tuple`, optional - Bounds for the a parameter. + Bounds for the ``a`` parameter. l_bounds : :obj:`tuple`, optional Bounds for the :math:`\lambda` hyperparameter. @@ -309,10 +307,10 @@ def __call__( Returns ------- - K : ndarray of shape (n_samples_X, n_samples_Y) + K : :obj:`~numpy.ndarray` of shape (n_samples_X, n_samples_Y) Kernel k(X, Y) - K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\ + K_gradient : :obj:`~numpy.ndarray` of shape (n_samples_X, n_samples_X, n_dims),\ optional The gradient of the kernel k(X, X) with respect to the log of the hyperparameter of the kernel. Only returned when `eval_gradient` @@ -340,12 +338,12 @@ def diag(self, X: np.ndarray) -> np.ndarray: Parameters ---------- - X : ndarray of shape (n_samples_X, n_features) + X : :obj:`~numpy.ndarray` of shape (n_samples_X, n_features) Left argument of the returned kernel k(X, Y) Returns ------- - K_diag : ndarray of shape (n_samples_X,) + K_diag : :obj:`~numpy.ndarray` of shape (n_samples_X,) Diagonal of kernel k(X, X) """ return self.beta_l * np.ones(X.shape[0]) @@ -414,10 +412,10 @@ def __call__( Returns ------- - K : ndarray of shape (n_samples_X, n_samples_Y) + K : :obj:`~numpy.ndarray` of shape (n_samples_X, n_samples_Y) Kernel k(X, Y) - K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\ + K_gradient : :obj:`~numpy.ndarray` of shape (n_samples_X, n_samples_X, n_dims),\ optional The gradient of the kernel k(X, X) with respect to the log of the hyperparameter of the kernel. Only returned when ``eval_gradient`` @@ -450,12 +448,12 @@ def diag(self, X: np.ndarray) -> np.ndarray: Parameters ---------- - X : ndarray of shape (n_samples_X, n_features) + X : :obj:`~numpy.ndarray` of shape (n_samples_X, n_features) Left argument of the returned kernel k(X, Y) Returns ------- - K_diag : ndarray of shape (n_samples_X,) + K_diag : :obj:`~numpy.ndarray` of shape (n_samples_X,) Diagonal of kernel k(X, X) """ return self.beta_l * np.ones(X.shape[0])