From b23a0ba307098f93481547e88e46652eb8f0c8b4 Mon Sep 17 00:00:00 2001 From: Marc Duby Date: Wed, 4 Sep 2024 14:14:45 -0400 Subject: [PATCH] gene_nmf: fixed gene set p_value compute; more sets returned, no change to order --- app/novelty/gene_nmf/dcc/compute_utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/app/novelty/gene_nmf/dcc/compute_utils.py b/app/novelty/gene_nmf/dcc/compute_utils.py index e9eddff..25ea630 100644 --- a/app/novelty/gene_nmf/dcc/compute_utils.py +++ b/app/novelty/gene_nmf/dcc/compute_utils.py @@ -213,6 +213,9 @@ def compute_beta_tildes(X, Y, scale_factors, mean_shifts, y_var=1, resid_correla len_Y = Y.shape[0] Y = Y - np.mean(Y) + # BUG - fixed y_var wrong setting + y_var = np.var(Y, axis=1) + dot_product = np.array(X.T.dot(Y.T) / len_Y).T variances = np.power(scale_factors, 2) @@ -706,11 +709,15 @@ def filter_matrix_columns(matrix_input, vector_input, cutoff_input, max_num_gene if len(selected_column_indices) > max_num_gene_sets: # log if log: - logger.info("filtered gene sets of size: {} is larger than the max: {}, so taking top {}".format(len(selected_column_indices), max_num_gene_sets, max_num_gene_sets)) + logger.info("filtered gene sets of size: {} is LARGER than the max: {}, so taking top {}".format(len(selected_column_indices), max_num_gene_sets, max_num_gene_sets)) # Get the indices of the n lowest values min_values = np.min(vector_input, axis=0) selected_column_indices = np.argsort(min_values)[:max_num_gene_sets] + else: + if log: + logger.info("filtered gene sets of size: {} is SMALLER than the max: {}, so keep the result as is".format(len(selected_column_indices), max_num_gene_sets)) + # filter the reference gene/gene sets matrix down matrix_result = matrix_input[:, selected_column_indices]