Skip to content

Commit

Permalink
small bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
RichieHakim committed Mar 30, 2024
1 parent cf04d21 commit 000e158
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 12 deletions.
2 changes: 1 addition & 1 deletion bnpm/decomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ def inverse_transform(
X = self.prepare_input(X, center=False, zscale=False)

if self.whiten:
scaled_components = torch.sqrt(self.explained_variance_) * self.components_
scaled_components = torch.sqrt(self.explained_variance_)[:, None] * self.components_
else:
scaled_components = self.components_

Expand Down
40 changes: 29 additions & 11 deletions bnpm/tests/test_PCA.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,33 @@

def test_fit_transform_equivalence():
n_components = 5
pca_sklearn = sklearnPCA(n_components=n_components, svd_solver='full').fit(X_np)
pca_sklearn = sklearnPCA(n_components=n_components).fit(X_np)
pca_torch = PCA(n_components=n_components).fit(X_torch)

# Compare the principal components directly
assert np.allclose(pca_sklearn.components_, pca_torch.components_.numpy(), rtol=1e-2), "Principal components do not match within tolerance."

# Transform the data using both PCA implementations
X_transformed_sklearn = pca_sklearn.transform(X_np)
X_transformed_torch = pca_torch.transform(X_torch).numpy()

# Test for equivalence of the transformed data with adjusted tolerances
max_diff = np.abs(X_transformed_sklearn - X_transformed_torch).max()
assert np.allclose(X_transformed_sklearn, X_transformed_torch, atol=1e-3), f"Transformed data does not match within tolerance. Maximum difference: {max_diff}"

def test_fitTransform_vs_fit_then_transform():
n_components = 5
pca = PCA(n_components=n_components)

# Fit and transform in a single step
X_transformed_fitTransform = pca.fit_transform(X_torch).numpy()

# Fit and transform in two steps
pca.fit(X_torch)
X_transformed_fit_then_transform = pca.transform(X_torch).numpy()

# Test for equivalence of the transformed data
assert np.allclose(X_transformed_sklearn, X_transformed_torch, atol=1e-5)
assert np.allclose(X_transformed_fitTransform, X_transformed_fit_then_transform, atol=1e-3), "Transformed data does not match when fit and transform are done separately."

def test_explained_variance_ratio():
pca_torch = PCA(n_components=5)
Expand All @@ -32,14 +50,15 @@ def test_explained_variance_ratio():
assert np.allclose(pca_torch.explained_variance_ratio_.numpy(), pca_sklearn.explained_variance_ratio_, atol=1e-5)

def test_inverse_transform():
pca_torch = PCA(n_components=5)
pca_torch = PCA(n_components=None)
pca_torch.fit(X_torch)

X_transformed_torch = pca_torch.transform(X_torch)
X_inversed_torch = pca_torch.inverse_transform(X_transformed_torch).numpy()

# Test for approximation of original data after inverse transformation
assert np.allclose(X_np, X_inversed_torch, atol=1e-5)
max_diff = np.abs(X_np - X_inversed_torch).max()
assert np.allclose(X_np, X_inversed_torch, atol=1e-3), f"Inverse transformed data does not match original data within tolerance. Maximum difference: {max_diff}"

def test_components_sign():
pca_torch = PCA(n_components=2)
Expand Down Expand Up @@ -68,7 +87,7 @@ def test_whitening_effect():
X_transformed = pca_whiten.transform(X_torch).numpy()
# Check if the variance across each principal component is close to 1, which is expected after whitening
variances = np.var(X_transformed, axis=0)
assert np.allclose(variances, np.ones(variances.shape), atol=1e-5), "Whitened components do not have unit variance."
assert np.allclose(variances, np.ones(variances.shape), atol=1e-1), "Whitened components do not have unit variance."

def test_retain_all_components():
pca_all = PCA(n_components=None) # Retain all components
Expand Down Expand Up @@ -98,13 +117,12 @@ def test_data_preparation():

# sklearn doesn't directly expose mean_ and std_ for centered and scaled data,
# so we compare against manually calculated values.
X_centered_scaled = (X_np - X_np.mean(axis=0)) / X_np.std(axis=0)
mean_diff = np.abs(X_centered_scaled.mean(axis=0) - pca_center_scale.mean_.numpy())
std_diff = np.abs(X_centered_scaled.std(axis=0) - pca_center_scale.std_.numpy())

assert np.all(mean_diff < 1e-5), "Data centering (mean subtraction) is incorrect."
assert np.all(std_diff < 1e-5), "Data scaling (division by std) is incorrect."
X_mean = X_torch.mean(dim=0).numpy()
X_std = X_torch.std(dim=0).numpy()

assert np.allclose(pca_center_scale.mean_, X_mean, atol=1e-5), "Centered data mean does not match."
assert np.allclose(pca_center_scale.std_, X_std, atol=1e-5), "Scaled data standard deviation does not match."

def test_singular_values_and_vectors():
pca_svd = PCA(n_components=5)
pca_svd.fit(X_torch)
Expand Down

0 comments on commit 000e158

Please sign in to comment.