From 16fa88ec8c44f77327948cf323efe7e5a33f6714 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= Date: Fri, 7 Jun 2024 10:49:12 +0200 Subject: [PATCH 1/2] pca: fix failing tests due to svd_flip mismatch between sklearn 1.4/1.5 --- Orange/widgets/unsupervised/tests/test_owpca.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/Orange/widgets/unsupervised/tests/test_owpca.py b/Orange/widgets/unsupervised/tests/test_owpca.py index 6d7d5debee3..25e8c81b182 100644 --- a/Orange/widgets/unsupervised/tests/test_owpca.py +++ b/Orange/widgets/unsupervised/tests/test_owpca.py @@ -223,10 +223,17 @@ def test_normalized_gives_correct_result(self, prepare_table): x = (x - x.mean(0)) / x.std(0) U, S, Va = np.linalg.svd(x) U, S, Va = U[:, :2], S[:2], Va[:2] - U, Va = svd_flip(U, Va, u_based_decision=False) - pca_embedding = U * S + x_pca = U * S - np.testing.assert_almost_equal(widget_result.X, pca_embedding) + # In scikit-learn==1.4.0, the svd_flip function requires a `V` matrix, + # therefore, we provide a dummy matrix of the correct size, so we can + # call the function. In scikit-learn==1.5.0, we can remove this since + # V can be None if we are passing `u_based_decision=True`. + dummy_v = np.zeros_like(x_pca.T) + x_pca, _ = svd_flip(x_pca, dummy_v, u_based_decision=True) + x_widget, _ = svd_flip(widget_result.X.copy(), dummy_v, u_based_decision=True) + + np.testing.assert_almost_equal(x_widget, x_pca) def test_do_not_mask_features(self): # the widget used to replace cached variables when creating the From b9625daa62df7e20ff103df42d87ba72af4b6e9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= Date: Fri, 7 Jun 2024 10:56:52 +0200 Subject: [PATCH 2/2] pca: override svd_solver to arpack when data is sparse and solver is set to auto --- Orange/projection/pca.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Orange/projection/pca.py b/Orange/projection/pca.py index d2b13d7e38a..a9ecb1cea94 100644 --- a/Orange/projection/pca.py +++ b/Orange/projection/pca.py @@ -48,6 +48,13 @@ def fit(self, X, Y=None): if sp.issparse(X) and params["n_components"] == min(X.shape): X = X.toarray() + # In scikit-learn==1.4.0, only the arpack solver is supported for sparse + # data and `svd_solver="auto"` doesn't auto-resolve to this. This is + # fixed in scikit-learn 1.5.0, but for the time being, override these + # settings here + if sp.issparse(X) and params["svd_solver"] == "auto": + params["svd_solver"] = "arpack" + proj = self.__wraps__(**params) proj = proj.fit(X, Y) return PCAModel(proj, self.domain, len(proj.components_))