BUG: Fix Gaussian process

Fix Gaussian process: - Use a default (`test`) kernel to instantiate the Gaussian process in the test, as the spherical kernel is not yet added. - Use a regression problem to obtain 3 features (instead of 4 from the Friedman2 problem) in the test. - Normalize the gradient vectors in thest test. - Swap the signal and the gradient table when fitting the GP in the test to conform to the expected parameters. - Use the appropriate indexing and shape for the query vectors in the test. - Use named parameters when instantiating `GPFit` to avoid providing the gradient table twice. - Get the first element in the `data` vector (whose dimensionality is changed by the `GaussianProcessModel` fitting method when checking for a mask) to enable fitting. - The `model` variable in `gp_prediction` is a scikit-learn `GaussianProcessRegressor` instance do it does not have an `_gpr` attribute.
nipreps · Jul 4, 2024 · 05670e6 · 05670e6
1 parent 0c32def
commit 05670e6
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 11 deletions.
diff --git a/src/eddymotion/model/dipy.py b/src/eddymotion/model/dipy.py
@@ -64,11 +64,11 @@ def gp_prediction(
     # Check it's fitted as they do in sklearn internally
     # https://github.com/scikit-learn/scikit-learn/blob/972e17fe1aa12d481b120ad4a3dc076bae736931/\
     # sklearn/gaussian_process/_gpr.py#L410C9-L410C42
-    if not hasattr(model._gpr, "X_train_"):
+    if not hasattr(model, "X_train_"):
         raise RuntimeError("Model is not yet fitted.")
 
     # Extract orientations from gtab, and highly likely, the b-value too.
-    return model._gpr.predict(gtab, return_std=False)
+    return model.predict(gtab, return_std=False)
 
 
 def get_kernel(
@@ -175,18 +175,20 @@ def fit(
             data[mask[..., None]] if mask is not None else np.reshape(data, (-1, data.shape[-1]))
         )
 
-        if data.shape[-1] != len(gtab):
+        signal_dirs = data.shape[-1]
+        grad_dirs = gtab.gradients.shape[0]
+        if signal_dirs != grad_dirs:
             raise ValueError(
-                f"Mismatched data {data.shape[-1]} and gradient table {len(gtab)} sizes."
+                f"Mismatched data {signal_dirs} and gradient table {grad_dirs} sizes."
             )
 
         gpr = GaussianProcessRegressor(
             kernel=get_kernel(self.kernel_model, gtab=gtab),
             random_state=random_state,
         )
         self._modelfit = GPFit(
-            gpr.fit(gtab.gradients, data),
             gtab=gtab,
+            model=gpr.fit(gtab.gradients, data[0]),
             mask=mask,
         )
         return self._modelfit

diff --git a/test/test_model.py b/test/test_model.py
@@ -24,7 +24,8 @@
 
 import numpy as np
 import pytest
-from sklearn.datasets import make_friedman2
+from dipy.core.gradients import gradient_table
+from sklearn.datasets import make_regression
 
 from eddymotion import model
 from eddymotion.data.dmri import DWI
@@ -108,14 +109,17 @@ def test_average_model():
 
 
 def test_gp_model():
-    gp = GaussianProcessModel(kernel="default")
+    gp = GaussianProcessModel("test", kernel="default")
 
     assert isinstance(gp, model.dipy.GaussianProcessModel)
 
-    X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
-    gp.fit(X, y)
-    X_qry = X[:2, :]
-    prediction, _ = gp.predict(X_qry, return_std=True)
+    X, y = make_regression(n_samples=100, n_features=3, noise=0, random_state=0)
+
+    bvecs = X.T / np.linalg.norm(X.T, axis=0)
+    gtab = gradient_table([1000] * bvecs.shape[-1], bvecs)
+    gp.fit(y, gtab)
+    X_qry = bvecs[:, :2].T
+    prediction = gp.predict(X_qry, return_std=True)
 
     assert prediction.shape == (X_qry.shape[0],)