diff --git a/src/eddymotion/model/gpr.py b/src/eddymotion/model/gpr.py
index 1d1c4da6..b6b9b5ba 100644
--- a/src/eddymotion/model/gpr.py
+++ b/src/eddymotion/model/gpr.py
@@ -64,7 +64,7 @@
 
 class EddyMotionGPR(GaussianProcessRegressor):
     r"""
-    A GP regressor specialized for eddymotion.
+    A Gaussian process (GP) regressor specialized for eddymotion.
 
     This specialization of the default GP regressor is created to allow
     the following extended behaviors:
@@ -80,22 +80,21 @@ class EddyMotionGPR(GaussianProcessRegressor):
 
     In principle, Scikit-Learn's implementation normalizes the training data
     as in [Andersson15]_ (see
-    `FSL's souce code <https://git.fmrib.ox.ac.uk/fsl/eddy/-/blob/2480dda293d4cec83014454db3a193b87921f6b0/DiffusionGP.cpp#L218>`__).
+    `FSL's source code <https://git.fmrib.ox.ac.uk/fsl/eddy/-/blob/2480dda293d4cec83014454db3a193b87921f6b0/DiffusionGP.cpp#L218>`__).
     From their paper (p. 167, end of first column):
 
-        Typically one just substracts the mean (:math:`\bar{\mathbf{f}}`)
+        Typically one just subtracts the mean (:math:`\bar{\mathbf{f}}`)
         from :math:`\mathbf{f}` and then add it back to
         :math:`f^{*}`, which is analogous to what is often done in
         "traditional" regression.
 
     Finally, the parameter :math:`\sigma^2` maps on to Scikit-learn's ``alpha``
-    of the regressor.
-    Because it is not a parameter of the kernel, hyperparameter selection
-    through gradient-descent with analytical gradient calculations
-    would not work (the derivative of the kernel w.r.t. alpha is zero).
+    of the regressor. Because it is not a parameter of the kernel, hyperparameter
+    selection through gradient-descent with analytical gradient calculations
+    would not work (the derivative of the kernel w.r.t. ``alpha`` is zero).
 
-    I believe this is overlooked in [Andersson15]_, or they actually did not
-    use analytical gradient-descent:
+    This might have been overlooked in [Andersson15]_, or else they actually did
+    not use analytical gradient-descent:
 
         *A note on optimisation*
 
@@ -105,13 +104,12 @@ class EddyMotionGPR(GaussianProcessRegressor):
         The reason for that is that such methods typically use fewer steps, and
         when the cost of calculating the derivatives is small/moderate compared
         to calculating the functions itself (as is the case for Eq. (12)) then
-        execution time can be much shorter.
-        However, we found that for the multi-shell case a heuristic optimisation
-        method such as the Nelder-Mead simplex method (Nelder and Mead, 1965) was
-        frequently better at avoiding local maxima.
-        Hence, that was the method we used for all optimisations in the present
-        paper.
-    
+        execution time can be much shorter. However, we found that for the
+        multi-shell case a heuristic optimisation method such as the Nelder-Mead
+        simplex method (Nelder and Mead, 1965) was frequently better at avoiding
+        local maxima. Hence, that was the method we used for all optimisations
+        in the present paper.
+
     **Multi-shell regression (TODO).**
     For multi-shell modeling, the kernel :math:`k(\textbf{x}, \textbf{x'})`
     is updated following Eq. (14) in [Andersson15]_.
@@ -272,7 +270,7 @@ def __init__(
         beta_l : :obj:`float`, optional
             The :math:`\lambda` hyperparameter.
         a_bounds : :obj:`tuple`, optional
-            Bounds for the a parameter.
+            Bounds for the ``a`` parameter.
         l_bounds : :obj:`tuple`, optional
             Bounds for the :math:`\lambda` hyperparameter.
 
@@ -309,10 +307,10 @@ def __call__(
 
         Returns
         -------
-        K : ndarray of shape (n_samples_X, n_samples_Y)
+        K : :obj:`~numpy.ndarray` of shape (n_samples_X, n_samples_Y)
             Kernel k(X, Y)
 
-        K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
+        K_gradient : :obj:`~numpy.ndarray` of shape (n_samples_X, n_samples_X, n_dims),\
                 optional
             The gradient of the kernel k(X, X) with respect to the log of the
             hyperparameter of the kernel. Only returned when `eval_gradient`
@@ -340,12 +338,12 @@ def diag(self, X: np.ndarray) -> np.ndarray:
 
         Parameters
         ----------
-        X : ndarray of shape (n_samples_X, n_features)
+        X : :obj:`~numpy.ndarray` of shape (n_samples_X, n_features)
             Left argument of the returned kernel k(X, Y)
 
         Returns
         -------
-        K_diag : ndarray of shape (n_samples_X,)
+        K_diag : :obj:`~numpy.ndarray` of shape (n_samples_X,)
             Diagonal of kernel k(X, X)
         """
         return self.beta_l * np.ones(X.shape[0])
@@ -414,10 +412,10 @@ def __call__(
 
         Returns
         -------
-        K : ndarray of shape (n_samples_X, n_samples_Y)
+        K : :obj:`~numpy.ndarray` of shape (n_samples_X, n_samples_Y)
             Kernel k(X, Y)
 
-        K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
+        K_gradient : :obj:`~numpy.ndarray` of shape (n_samples_X, n_samples_X, n_dims),\
                 optional
             The gradient of the kernel k(X, X) with respect to the log of the
             hyperparameter of the kernel. Only returned when ``eval_gradient``
@@ -450,12 +448,12 @@ def diag(self, X: np.ndarray) -> np.ndarray:
 
         Parameters
         ----------
-        X : ndarray of shape (n_samples_X, n_features)
+        X : :obj:`~numpy.ndarray` of shape (n_samples_X, n_features)
             Left argument of the returned kernel k(X, Y)
 
         Returns
         -------
-        K_diag : ndarray of shape (n_samples_X,)
+        K_diag : :obj:`~numpy.ndarray` of shape (n_samples_X,)
             Diagonal of kernel k(X, X)
         """
         return self.beta_l * np.ones(X.shape[0])