com in english

sistm · Oct 10, 2023 · ba18273 · ba18273
1 parent 9e27c4a
commit ba18273
Showing 1 changed file with 30 additions and 23 deletions.
diff --git a/R/cit_gsa.R b/R/cit_gsa.R
@@ -280,29 +280,34 @@ cit_gsa <- function(M,
     ## asymptotic ----
     n_perm <- NA
 
-    # mise en forme des données : liste + vérif noms colonnes
-    if (inherits(geneset,"GSA.genesets")) {
+    # Data formatting in list format +  check column names
+    if (inherits(geneset,"GSA.genesets")) { 
       geneset <- geneset$genesets
-      # Stop si aucun id des colonnes de M  ne match avec les id des gene set
-      if (all(M_colnames != unique(unlist(geneset)))){ # si vrai affiche msg
+
+      # Stop if no M column ids match gene set ids
+      if (all(M_colnames != unique(unlist(geneset)))){ # if true, displays message
         stop("No column names in M match geneset identifiers")
-      } # si 1 colonne à le même nom pas d'erreur, mais si identifiant numéros va mettre erreur docn NON !!
+      } 
+
     } else if (inherits(geneset,"BiocSet")){
       geneset<- BiocSet::es_elementset(geneset)
       geneset <- lapply(X  = unique(geneset$set),
                         FUN = function(x){
                           geneset[geneset$set == x,]$element
                         }
       )
-      # Stop si aucun id des colonnes de M  ne match avec les id des gene set
-      if (all(M_colnames != unique(unlist(geneset)))){ # si vrai affiche msg
+
+      # Stop if no M column ids match gene set ids
+      if (all(M_colnames != unique(unlist(geneset)))){ # if true, displays message
         stop("No column names in M match geneset identifiers")
-      } # si 1 colonne à le même nom pas d'erreur, mais si identifiant numéros va mettre erreur docn NON !!
-    } else if(is.vector(geneset)){
-      geneset <- list(geneset)
+      } 
+
+      } else if(is.vector(geneset)){
+        geneset <- list(geneset)
     }
 
 
+
     if (is.null(Z)){ 
       colnames(X) <- sapply(1:ncol(X), function(i){paste0('X',i)})
       modelmat <- as.matrix(model.matrix(~.,data=X))
@@ -314,28 +319,30 @@ cit_gsa <- function(M,
 
     indexes_X <- which(substring(colnames(modelmat), 1, 1) == "X")
 
-    # initialisation pour chaque gs du gmt/biocset
+
+    # Initialisation for each gene set 
     test_stat_list <- list()
     Sigma2_list <- list()
     decomp_list <- list()
     pval <- NA
 
 
-    for (k in 1:length(geneset)){ # chaque liste de gs du gmt/biocset
+    for (k in 1:length(geneset)){ # each list of gene set
 
-      # initialisation pour chaque gene du gs
+      # Initialisation for each gene in the gene set
       test_stat_gs <- NA
       prop_gs <- list()
       indi_pi_gs <- list()
 
-      for (i in 1:length(geneset[[k]])){ # chaque gène du gs
+      for (i in 1:length(geneset[[k]])){ # each gene in the gene set 
 
         Y <- M[,geneset[[k]][i]]
 
         n_Y_all <- length(Y)
-        H <- n_Y_all*(solve(crossprod(modelmat)) %*% t(modelmat))[indexes_X, , drop=FALSE] # taille de Y , même pour chaque gène puisque X et Y ne changent pas
+        H <- n_Y_all*(solve(crossprod(modelmat)) %*% t(modelmat))[indexes_X, , drop=FALSE] 
+        # length of Y, same for each genes because X and Y are the same 
 
-        # 1) calcule de la stat de test ----
+        # 1) Test statistic computation ----
         if (space_y){
           y <- seq(from = ifelse(length(which(Y==0))==0, min(Y), min(Y[-which(Y==0)])),
                    to = max(Y[-which.max(as.matrix(Y))]), length.out = number_y)
@@ -345,21 +352,21 @@ cit_gsa <- function(M,
         p <- length(y)
 
         index_jumps <- sapply(y[-p], function(i){sum(Y <= i)}) 
-        beta <- c(apply(X = H[, order(Y), drop=FALSE], MARGIN = 1, FUN = cumsum)[index_jumps, ]) / n_Y_all # même nb que seuil
+        beta <- c(apply(X = H[, order(Y), drop=FALSE], MARGIN = 1, FUN = cumsum)[index_jumps, ]) / n_Y_all # same number than thresholds
         test_stat <- sum(beta^2) * n_Y_all
 
-        test_stat_gs[i] <- test_stat # stat de test pour chaque gène du gs
+        test_stat_gs[i] <- test_stat # test statistic for each genes in the gene set 
 
 
-        # 2) calcule de pi ----
+        # 2) Pi computation ----
         indi_pi <- matrix(NA, n_Y_all, (p-1)) 
         for (j in 1:(p-1)){ 
           indi_Y <- 1*(Y<=y[j])
           indi_pi[,j] <- indi_Y
         }
         indi_pi_gs[[i]] <- indi_pi
         prop <- colMeans(indi_pi)
-        prop_gs[[i]] <- prop # prop pour chaque gènes du gs
+        prop_gs[[i]] <- prop # prop for each genes in the gene set 
 
       } 
       test_stat_list[[k]] <- test_stat_gs
@@ -368,13 +375,13 @@ cit_gsa <- function(M,
       prop_gs_vec <- unlist(prop_gs)
 
 
-      # 3) création de la matrice Sigma ----
+      # 3) Sigma matrix creation ----
       Sigma2 <- matrix(NA,length(prop_gs_vec)*nrow(H),length(prop_gs_vec)*nrow(H)) 
       new_prop <- matrix(NA,length(prop_gs_vec),length(prop_gs_vec))
 
-      if (nrow(H)>1){ # > 1 conditions
+      if (nrow(H)>1){ # > 1 condition X
 
-        for (i in 1:nrow(new_prop)){  #  calcule de la nouvelle proportion/du nouveau pi = celle du gene set : ici une matrice
+        for (i in 1:nrow(new_prop)){  # new prop computation/new pi computation = the one of the gene set, here it's a matrix 
           for(j in 1:ncol(new_prop)){
 
             new_prop[i,j] <- mean((indi_pi_gs_tab[,i]-prop_gs_vec[i]) * (indi_pi_gs_tab[,j]-prop_gs_vec[j])) + prop_gs_vec[i] * prop_gs_vec[j]