From 16fa88ec8c44f77327948cf323efe7e5a33f6714 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= <pavlin.g.p@gmail.com>
Date: Fri, 7 Jun 2024 10:49:12 +0200
Subject: [PATCH 1/2] pca: fix failing tests due to svd_flip mismatch between
 sklearn 1.4/1.5

---
 Orange/widgets/unsupervised/tests/test_owpca.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/Orange/widgets/unsupervised/tests/test_owpca.py b/Orange/widgets/unsupervised/tests/test_owpca.py
index 6d7d5debee3..25e8c81b182 100644
--- a/Orange/widgets/unsupervised/tests/test_owpca.py
+++ b/Orange/widgets/unsupervised/tests/test_owpca.py
@@ -223,10 +223,17 @@ def test_normalized_gives_correct_result(self, prepare_table):
         x = (x - x.mean(0)) / x.std(0)
         U, S, Va = np.linalg.svd(x)
         U, S, Va = U[:, :2], S[:2], Va[:2]
-        U, Va = svd_flip(U, Va, u_based_decision=False)
-        pca_embedding = U * S
+        x_pca = U * S
 
-        np.testing.assert_almost_equal(widget_result.X, pca_embedding)
+        # In scikit-learn==1.4.0, the svd_flip function requires a `V` matrix,
+        # therefore, we provide a dummy matrix of the correct size, so we can
+        # call the function. In scikit-learn==1.5.0, we can remove this since
+        # V can be None if we are passing `u_based_decision=True`.
+        dummy_v = np.zeros_like(x_pca.T)
+        x_pca, _ = svd_flip(x_pca, dummy_v, u_based_decision=True)
+        x_widget, _ = svd_flip(widget_result.X.copy(), dummy_v, u_based_decision=True)
+
+        np.testing.assert_almost_equal(x_widget, x_pca)
 
     def test_do_not_mask_features(self):
         # the widget used to replace cached variables when creating the

From b9625daa62df7e20ff103df42d87ba72af4b6e9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= <pavlin.g.p@gmail.com>
Date: Fri, 7 Jun 2024 10:56:52 +0200
Subject: [PATCH 2/2] pca: override svd_solver to arpack when data is sparse
 and solver is set to auto

---
 Orange/projection/pca.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Orange/projection/pca.py b/Orange/projection/pca.py
index d2b13d7e38a..a9ecb1cea94 100644
--- a/Orange/projection/pca.py
+++ b/Orange/projection/pca.py
@@ -48,6 +48,13 @@ def fit(self, X, Y=None):
         if sp.issparse(X) and params["n_components"] == min(X.shape):
             X = X.toarray()
 
+        # In scikit-learn==1.4.0, only the arpack solver is supported for sparse
+        # data and `svd_solver="auto"` doesn't auto-resolve to this. This is
+        # fixed in scikit-learn 1.5.0, but for the time being, override these
+        # settings here
+        if sp.issparse(X) and params["svd_solver"] == "auto":
+            params["svd_solver"] = "arpack"
+
         proj = self.__wraps__(**params)
         proj = proj.fit(X, Y)
         return PCAModel(proj, self.domain, len(proj.components_))