Skip to content

Commit

Permalink
Merge pull request biolab#6821 from pavlin-policar/pca-fix-2
Browse files Browse the repository at this point in the history
[FIX] PCA: ensure tests pass on sklearn 1.4 and 1.5, which can return different results
  • Loading branch information
markotoplak authored Jun 7, 2024
2 parents 8831a57 + b9625da commit 55905a4
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
7 changes: 7 additions & 0 deletions Orange/projection/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ def fit(self, X, Y=None):
if sp.issparse(X) and params["n_components"] == min(X.shape):
X = X.toarray()

# In scikit-learn==1.4.0, only the arpack solver is supported for sparse
# data and `svd_solver="auto"` doesn't auto-resolve to this. This is
# fixed in scikit-learn 1.5.0, but for the time being, override these
# settings here
if sp.issparse(X) and params["svd_solver"] == "auto":
params["svd_solver"] = "arpack"

proj = self.__wraps__(**params)
proj = proj.fit(X, Y)
return PCAModel(proj, self.domain, len(proj.components_))
Expand Down
13 changes: 10 additions & 3 deletions Orange/widgets/unsupervised/tests/test_owpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,10 +223,17 @@ def test_normalized_gives_correct_result(self, prepare_table):
x = (x - x.mean(0)) / x.std(0)
U, S, Va = np.linalg.svd(x)
U, S, Va = U[:, :2], S[:2], Va[:2]
U, Va = svd_flip(U, Va, u_based_decision=False)
pca_embedding = U * S
x_pca = U * S

np.testing.assert_almost_equal(widget_result.X, pca_embedding)
# In scikit-learn==1.4.0, the svd_flip function requires a `V` matrix,
# therefore, we provide a dummy matrix of the correct size, so we can
# call the function. In scikit-learn==1.5.0, we can remove this since
# V can be None if we are passing `u_based_decision=True`.
dummy_v = np.zeros_like(x_pca.T)
x_pca, _ = svd_flip(x_pca, dummy_v, u_based_decision=True)
x_widget, _ = svd_flip(widget_result.X.copy(), dummy_v, u_based_decision=True)

np.testing.assert_almost_equal(x_widget, x_pca)

def test_do_not_mask_features(self):
# the widget used to replace cached variables when creating the
Expand Down

0 comments on commit 55905a4

Please sign in to comment.