Skip to content

Commit

Permalink
added pca robustness test by shuffling rows and columns of matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
ntalluri committed Sep 10, 2024
1 parent 1957465 commit 6ae2666
Showing 1 changed file with 29 additions and 0 deletions.
29 changes: 29 additions & 0 deletions test/ml/test_ml.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import filecmp
import random
from pathlib import Path

import pandas as pd
Expand Down Expand Up @@ -61,6 +62,34 @@ def test_pca(self):

assert coord.equals(expected)

def test_pca_robustness(self):
dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt'])

for _i in range(5):
dataframe_shuffled = dataframe.sample(frac=1, axis=1) # permute the columns
ml.pca(dataframe_shuffled, OUT_DIR + 'pca-shuffled-columns.png', OUT_DIR + 'pca-shuffled-columns-variance.txt',
OUT_DIR + 'pca-shuffled-columns-coordinates.tsv')
coord = pd.read_table(OUT_DIR + 'pca-shuffled-columns-coordinates.tsv')
coord = coord.round(5) # round values to 5 digits to account for numeric differences across machines
coord.sort_values(by='algorithm', ignore_index=True, inplace=True)
expected = pd.read_table(EXPECT_DIR + 'expected-pca-coordinates.tsv')
expected = expected.round(5)

assert coord.equals(expected)

for _i in range(5):
dataframe_shuffled = dataframe.sample(frac=1, axis=0) # permute the rows
ml.pca(dataframe_shuffled, OUT_DIR + 'pca-shuffled-rows.png', OUT_DIR + 'pca-shuffled-rows-variance.txt',
OUT_DIR + 'pca-shuffled-rows-coordinates.tsv')
coord = pd.read_table(OUT_DIR + 'pca-shuffled-rows-coordinates.tsv')
coord = coord.round(5) # round values to 5 digits to account for numeric differences across machines
coord.sort_values(by='algorithm', ignore_index=True, inplace=True)
expected = pd.read_table(EXPECT_DIR + 'expected-pca-coordinates.tsv')
expected = expected.round(5)

assert coord.equals(expected)


def test_hac_horizontal(self):
dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt'])
ml.hac_horizontal(dataframe, OUT_DIR + 'hac-horizontal.png', OUT_DIR + 'hac-clusters-horizontal.txt')
Expand Down

0 comments on commit 6ae2666

Please sign in to comment.