NA works ? to be tested

sistm · Oct 24, 2023 · 60752dc · 60752dc
1 parent 0dad5df
commit 60752dc
Showing 1 changed file with 95 additions and 94 deletions.
diff --git a/R/cit_gsa.R b/R/cit_gsa.R
@@ -79,18 +79,18 @@
 #' @examples
 #' #TO DO 
 cit_gsa <- function(M,
-                      X,
-                      Z = NULL,
-                      test = c("asymptotic","permutation"),
-                      n_perm = 100,
-                      n_perm_adaptive = c(n_perm, n_perm, n_perm*3, n_perm*5),
-                      thresholds = c(0.1,0.05,0.01),
-                      parallel = interactive(),
-                      n_cpus = detectCores() - 1,
-                      adaptive = FALSE,
-                      space_y = TRUE,
-                      number_y = 10,
-                      geneset){
+                    X,
+                    Z = NULL,
+                    test = c("asymptotic","permutation"),
+                    n_perm = 100,
+                    n_perm_adaptive = c(n_perm, n_perm, n_perm*3, n_perm*5),
+                    thresholds = c(0.1,0.05,0.01),
+                    parallel = interactive(),
+                    n_cpus = detectCores() - 1,
+                    adaptive = FALSE,
+                    space_y = TRUE,
+                    number_y = 10,
+                    geneset){
 
   # checks
   if(is.matrix(M)){
@@ -103,10 +103,10 @@ cit_gsa <- function(M,
   stopifnot(is.logical(parallel))
   stopifnot(is.logical(adaptive))
   stopifnot(is.numeric(n_perm))
-
+  
 
   M_colnames <- colnames(M)
-
+  
 
   if (sum(is.na(M)) > 1) {
     warning("'M' contains", sum(is.na(M)), "NA values. ",
@@ -279,7 +279,7 @@ cit_gsa <- function(M,
   } else if (test=="asymptotic"){
     ## asymptotic ----
     n_perm <- NA
-
+    
     # Data formatting in list format +  check column names
     if (inherits(geneset,"GSA.genesets")) { 
       geneset <- geneset$genesets
@@ -302,8 +302,8 @@ cit_gsa <- function(M,
         stop("No column names in M match geneset identifiers")
       } 
 
-      } else if(is.vector(geneset)){
-        geneset <- list(geneset)
+    } else if(is.vector(geneset)){
+      geneset <- list(geneset)
     }
 
 
@@ -337,100 +337,101 @@ cit_gsa <- function(M,
       test_stat_gs <- NA
       prop_gs <- list()
       indi_pi_gs <- list()
-
+      
       for (i in 1:length(geneset[[k]])){ # each gene in the gene set 
 
-        if (geneset[[k]][i] == M_colnames[i]){ # check if the name in the gs is not in the tab M
+        if (geneset[[k]][i] %in% M_colnames){ # check if the name in the gs is not in the tab M
+
+          Y <- M[,geneset[[k]][i]]
+
+
+          # 1) Test statistic computation ----
+          if (space_y){
+            y <- seq(from = ifelse(length(which(Y==0))==0, min(Y), min(Y[-which(Y==0)])),
+                     to = max(Y[-which.max(as.matrix(Y))]), length.out = number_y)
+          } else{
+            y <- sort(unique(Y))
+          }
+          p <- length(y)
+
+          index_jumps <- sapply(y[-p], function(i){sum(Y <= i)}) 
+          beta <- c(apply(X = H[, order(Y), drop=FALSE], MARGIN = 1, FUN = cumsum)[index_jumps, ]) / n_Y_all # same number than thresholds
+          test_stat <- sum(beta^2) * n_Y_all
+
+          test_stat_gs[i] <- test_stat # test statistic for each genes in the gene set 
+
+
+          # 2) Pi computation ----
+          indi_pi <- matrix(NA, n_Y_all, (p-1)) 
+
+          for (j in 1:(p-1)){ 
+            indi_Y <- 1 * (Y <= y[j])
+            indi_pi[,j] <- indi_Y
+          }
+
+          indi_pi_gs[[i]] <- indi_pi
+          prop <- colMeans(indi_pi)
+          prop_gs[[i]] <- prop # prop for each genes in the gene set 
 
-        Y <- M[,geneset[[k]][i]]
-
-
-        # 1) Test statistic computation ----
-        if (space_y){
-          y <- seq(from = ifelse(length(which(Y==0))==0, min(Y), min(Y[-which(Y==0)])),
-                   to = max(Y[-which.max(as.matrix(Y))]), length.out = number_y)
-        } else{
-          y <- sort(unique(Y))
         }
-        p <- length(y)
+      } 
+
+      if(!is.na(test_stat_gs)){
+        test_stat_list[[k]] <- test_stat_gs
+        indi_pi_gs_tab <- do.call(cbind, indi_pi_gs)
+        prop_gs_vec <- unlist(prop_gs)
 
-        index_jumps <- sapply(y[-p], function(i){sum(Y <= i)}) 
-        beta <- c(apply(X = H[, order(Y), drop=FALSE], MARGIN = 1, FUN = cumsum)[index_jumps, ]) / n_Y_all # same number than thresholds
-        test_stat <- sum(beta^2) * n_Y_all
 
-        test_stat_gs[i] <- test_stat # test statistic for each genes in the gene set 
 
+        # 3) Sigma matrix creation ----
+        Sigma2 <- matrix(NA,length(prop_gs_vec)*nrow(H),length(prop_gs_vec)*nrow(H)) 
+        new_prop <- matrix(NA,length(prop_gs_vec),length(prop_gs_vec))
 
-        # 2) Pi computation ----
-        indi_pi <- matrix(NA, n_Y_all, (p-1)) 
-        for (j in 1:(p-1)){ 
-          indi_Y <- 1*(Y<=y[j])
-          indi_pi[,j] <- indi_Y
-        }
-        indi_pi_gs[[i]] <- indi_pi
-        prop <- colMeans(indi_pi)
-        prop_gs[[i]] <- prop # prop for each genes in the gene set 
 
-        } else {
-          test_stat_gs[i] = 0
-          indi_pi_gs[[i]] = 0
-          prop_gs[[i]] = 0
-        }
-    } 
-      test_stat_list[[k]] <- test_stat_gs
-
-      indi_pi_gs_tab <- do.call(cbind, indi_pi_gs)
-      prop_gs_vec <- unlist(prop_gs)
-
-
-
-      # 3) Sigma matrix creation ----
-      Sigma2 <- matrix(NA,length(prop_gs_vec)*nrow(H),length(prop_gs_vec)*nrow(H)) 
-      new_prop <- matrix(NA,length(prop_gs_vec),length(prop_gs_vec))
-
-
-      if (nrow(H)>1){ # > 1 condition X
-
-        for (i in 1:nrow(new_prop)){  # new prop/new pi computation = the one of the gene set, here it's a matrix 
-          for(j in 1:ncol(new_prop)){
-
-            new_prop[i,j] <- mean((indi_pi_gs_tab[,i]-prop_gs_vec[i]) * (indi_pi_gs_tab[,j]-prop_gs_vec[j])) + prop_gs_vec[i] * prop_gs_vec[j] 
-
+        if (nrow(H)>1){ # > 1 condition X
+
+          for (i in 1:nrow(new_prop)){  # new prop/new pi computation = the one of the gene set, here it's a matrix 
+            for(j in 1:ncol(new_prop)){
+
+              new_prop[i,j] <- mean((indi_pi_gs_tab[,i]-prop_gs_vec[i]) * (indi_pi_gs_tab[,j]-prop_gs_vec[j])) + prop_gs_vec[i] * prop_gs_vec[j] 
+
+            }
+          }
+
+          Sigma2 <- 1/n * tcrossprod(H) %x%  (new_prop - prop_gs_vec %x%  t(prop_gs_vec))
+
+
+        } else { # 1 condition  
+
+          for (i in 1:nrow(Sigma2)) {  
+            for (j in 1:ncol(Sigma2)) {
+
+              new_prop <- mean((indi_pi_gs_tab[,i]-prop_gs_vec[i]) * (indi_pi_gs_tab[,j]-prop_gs_vec[j])) + prop_gs_vec[i] * prop_gs_vec[j] 
+
+              Sigma2[i, j] <- 1/n * tcrossprod(H) * (new_prop - prop_gs_vec[i] * prop_gs_vec[j])
+            }
           }
         }
 
-        Sigma2 <- 1/n * tcrossprod(H) %x%  (new_prop - prop_gs_vec %x%  t(prop_gs_vec))
+        Sigma2_list[[k]] <- Sigma2
 
+        decomp_list[[k]] <- eigen(Sigma2_list[[k]], symmetric=TRUE, only.values=TRUE)
 
-      } else { # 1 condition  
+        pval[k] <- survey::pchisqsum(sum(test_stat_list[[k]]), lower.tail = FALSE, df = rep(1, ncol(Sigma2_list[[k]])),a =decomp_list[[k]]$values , method = "saddlepoint") 
 
-        for (i in 1:nrow(Sigma2)) {  
-          for (j in 1:ncol(Sigma2)) {
-
-            new_prop <- mean((indi_pi_gs_tab[,i]-prop_gs_vec[i]) * (indi_pi_gs_tab[,j]-prop_gs_vec[j])) + prop_gs_vec[i] * prop_gs_vec[j] 
-
-            Sigma2[i, j] <- 1/n * tcrossprod(H) * (new_prop - prop_gs_vec[i] * prop_gs_vec[j])
-          }
-        }
+      }else{
+        warning("0 genes from geneset ", k, " observed in expression data")
+        pval[k] <- NA
+        test_stat_list[[k]] <- NA
       }
-
-      Sigma2_list[[k]] <- Sigma2
-
-      decomp_list[[k]] <- eigen(Sigma2_list[[k]], symmetric=TRUE, only.values=TRUE)
-
-      pval[k] <- survey::pchisqsum(sum(test_stat_list[[k]]), lower.tail = FALSE, df = rep(1, ncol(Sigma2_list[[k]])),a =decomp_list[[k]]$values , method = "saddlepoint") 
-
     }
-
-    df <- data.frame(raw_pval=pval,
-                     adj_pval =p.adjust(pval, method = "BH"),
-                     test_statistic = unlist(lapply(test_stat_list,sum)))
-
-
-
-    } 
-
 
+    df <- data.frame(raw_pval = pval,
+                     adj_pval = p.adjust(pval, method="BH"),
+                     test_statistic = unlist(lapply(test_stat_list, sum, na.rm=TRUE)))
+  } 
+
+
   #rownames(df) <- M_colnames
 
   output <- list(which_test = test,
@@ -439,7 +440,7 @@ cit_gsa <- function(M,
 
   class(output) <- "cit_gsa"
   return(output)
-
+  
 
 
 }