diff --git a/bnpm/cupy_helpers.py b/bnpm/cupy_helpers.py index 35b13bd..fdb8832 100644 --- a/bnpm/cupy_helpers.py +++ b/bnpm/cupy_helpers.py @@ -1,8 +1,3 @@ -def import_cupy_pkgs(): - import cuml - import cuml.decomposition - import cupy - def set_device(device_num=0, verbose=True): """ Set the device to use. @@ -15,6 +10,7 @@ def set_device(device_num=0, verbose=True): verbose (bool): Whether to print the device name. """ + import cupy if cupy.cuda.runtime.getDeviceCount() > 0: if device_num is None: diff --git a/bnpm/decomposition.py b/bnpm/decomposition.py index 18c6c1e..1cba145 100644 --- a/bnpm/decomposition.py +++ b/bnpm/decomposition.py @@ -23,85 +23,6 @@ ########## PCA ############ ########################### -def simple_pca(X , n_components=None , mean_sub=True, zscore=False, plot_pref=False , n_PCs_toPlot=2): - """ - Performs PCA on X. - RH 2021 - - Args: - X (np.ndarray): - Data to be decomposed. - 2-D array. Columns are features, rows are samples. - n_components (int): - Number of components to keep. If None, then - n_components = X.shape[1] - mean_sub (bool): - Whether or not to mean subtract ('center') the - columns. - zscore (bool): - Whether or not to z-score the columns. This is - equivalent to doing PCA on the correlation-matrix. - plot_pref (bool): - Whether or not to plot the first n_PCs_toPlot of the - PCA. - n_PCs_toPlot (int): - Number of PCs to plot. - - Returns: - components (np.ndarray): - The components of the decomposition. - 2-D array. - Each column is a component vector. Each row is a - feature weight. - scores (np.ndarray): - The scores of the decomposition. - 2-D array. - Each column is a score vector. Each row is a - sample weight. - EVR (np.ndarray): - The explained variance ratio of each component. - 1-D array. - Each element is the explained variance ratio of - the corresponding component. - """ - if mean_sub and not zscore: - X = X - np.mean(X, axis=0) - if zscore: - # X = scipy.stats.zscore(X, axis=0) - X = X - np.mean(X, axis=0) - stds = np.std(X, axis=0) - X = X / stds[None,:] - - if n_components is None: - n_components = X.shape[1] - decomp = sklearn.decomposition.PCA(n_components=n_components) - decomp.fit_transform(X) - components = decomp.components_ - scores = decomp.transform(X) - - if plot_pref: - fig , axs = plt.subplots(4 , figsize=(7,15)) - axs[0].plot(np.arange(n_components)+1, - decomp.explained_variance_ratio_) - axs[0].set_xscale('log') - axs[0].set_xlabel('component #') - axs[0].set_ylabel('explained variance ratio') - - axs[1].plot(np.arange(n_components)+1, - np.cumsum(decomp.explained_variance_ratio_)) - axs[1].set_xscale('log') - axs[1].set_ylabel('cumulative explained variance ratio') - - axs[2].plot(scores[:,:n_PCs_toPlot]) - axs[2].set_xlabel('sample num') - axs[2].set_ylabel('a.u.') - - axs[3].plot(components.T[:,:n_PCs_toPlot]) - axs[3].set_xlabel('feature num') - axs[3].set_ylabel('score') - - return components , scores , decomp.explained_variance_ratio_ - def svd_flip( u: torch.Tensor, diff --git a/bnpm/h5_handling.py b/bnpm/h5_handling.py index 6bd53ec..36ffdd0 100644 --- a/bnpm/h5_handling.py +++ b/bnpm/h5_handling.py @@ -2,6 +2,7 @@ from pathlib import Path import h5py +import numpy as np def close_all_h5(): '''