Skip to content

Commit

Permalink
NA works ? to be tested
Browse files Browse the repository at this point in the history
  • Loading branch information
borishejblum committed Oct 24, 2023
1 parent 0dad5df commit 60752dc
Showing 1 changed file with 95 additions and 94 deletions.
189 changes: 95 additions & 94 deletions R/cit_gsa.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,18 +79,18 @@
#' @examples
#' #TO DO
cit_gsa <- function(M,
X,
Z = NULL,
test = c("asymptotic","permutation"),
n_perm = 100,
n_perm_adaptive = c(n_perm, n_perm, n_perm*3, n_perm*5),
thresholds = c(0.1,0.05,0.01),
parallel = interactive(),
n_cpus = detectCores() - 1,
adaptive = FALSE,
space_y = TRUE,
number_y = 10,
geneset){
X,
Z = NULL,
test = c("asymptotic","permutation"),
n_perm = 100,
n_perm_adaptive = c(n_perm, n_perm, n_perm*3, n_perm*5),
thresholds = c(0.1,0.05,0.01),
parallel = interactive(),
n_cpus = detectCores() - 1,
adaptive = FALSE,
space_y = TRUE,
number_y = 10,
geneset){

# checks
if(is.matrix(M)){
Expand All @@ -103,10 +103,10 @@ cit_gsa <- function(M,
stopifnot(is.logical(parallel))
stopifnot(is.logical(adaptive))
stopifnot(is.numeric(n_perm))


M_colnames <- colnames(M)


if (sum(is.na(M)) > 1) {
warning("'M' contains", sum(is.na(M)), "NA values. ",
Expand Down Expand Up @@ -279,7 +279,7 @@ cit_gsa <- function(M,
} else if (test=="asymptotic"){
## asymptotic ----
n_perm <- NA

# Data formatting in list format + check column names
if (inherits(geneset,"GSA.genesets")) {
geneset <- geneset$genesets
Expand All @@ -302,8 +302,8 @@ cit_gsa <- function(M,
stop("No column names in M match geneset identifiers")
}

} else if(is.vector(geneset)){
geneset <- list(geneset)
} else if(is.vector(geneset)){
geneset <- list(geneset)
}


Expand Down Expand Up @@ -337,100 +337,101 @@ cit_gsa <- function(M,
test_stat_gs <- NA
prop_gs <- list()
indi_pi_gs <- list()

for (i in 1:length(geneset[[k]])){ # each gene in the gene set

if (geneset[[k]][i] == M_colnames[i]){ # check if the name in the gs is not in the tab M
if (geneset[[k]][i] %in% M_colnames){ # check if the name in the gs is not in the tab M

Y <- M[,geneset[[k]][i]]


# 1) Test statistic computation ----
if (space_y){
y <- seq(from = ifelse(length(which(Y==0))==0, min(Y), min(Y[-which(Y==0)])),
to = max(Y[-which.max(as.matrix(Y))]), length.out = number_y)
} else{
y <- sort(unique(Y))
}
p <- length(y)

index_jumps <- sapply(y[-p], function(i){sum(Y <= i)})
beta <- c(apply(X = H[, order(Y), drop=FALSE], MARGIN = 1, FUN = cumsum)[index_jumps, ]) / n_Y_all # same number than thresholds
test_stat <- sum(beta^2) * n_Y_all

test_stat_gs[i] <- test_stat # test statistic for each genes in the gene set


# 2) Pi computation ----
indi_pi <- matrix(NA, n_Y_all, (p-1))

for (j in 1:(p-1)){
indi_Y <- 1 * (Y <= y[j])
indi_pi[,j] <- indi_Y
}

indi_pi_gs[[i]] <- indi_pi
prop <- colMeans(indi_pi)
prop_gs[[i]] <- prop # prop for each genes in the gene set

Y <- M[,geneset[[k]][i]]


# 1) Test statistic computation ----
if (space_y){
y <- seq(from = ifelse(length(which(Y==0))==0, min(Y), min(Y[-which(Y==0)])),
to = max(Y[-which.max(as.matrix(Y))]), length.out = number_y)
} else{
y <- sort(unique(Y))
}
p <- length(y)
}

if(!is.na(test_stat_gs)){
test_stat_list[[k]] <- test_stat_gs
indi_pi_gs_tab <- do.call(cbind, indi_pi_gs)
prop_gs_vec <- unlist(prop_gs)

index_jumps <- sapply(y[-p], function(i){sum(Y <= i)})
beta <- c(apply(X = H[, order(Y), drop=FALSE], MARGIN = 1, FUN = cumsum)[index_jumps, ]) / n_Y_all # same number than thresholds
test_stat <- sum(beta^2) * n_Y_all

test_stat_gs[i] <- test_stat # test statistic for each genes in the gene set

# 3) Sigma matrix creation ----
Sigma2 <- matrix(NA,length(prop_gs_vec)*nrow(H),length(prop_gs_vec)*nrow(H))
new_prop <- matrix(NA,length(prop_gs_vec),length(prop_gs_vec))

# 2) Pi computation ----
indi_pi <- matrix(NA, n_Y_all, (p-1))
for (j in 1:(p-1)){
indi_Y <- 1*(Y<=y[j])
indi_pi[,j] <- indi_Y
}
indi_pi_gs[[i]] <- indi_pi
prop <- colMeans(indi_pi)
prop_gs[[i]] <- prop # prop for each genes in the gene set

} else {
test_stat_gs[i] = 0
indi_pi_gs[[i]] = 0
prop_gs[[i]] = 0
}
}
test_stat_list[[k]] <- test_stat_gs

indi_pi_gs_tab <- do.call(cbind, indi_pi_gs)
prop_gs_vec <- unlist(prop_gs)



# 3) Sigma matrix creation ----
Sigma2 <- matrix(NA,length(prop_gs_vec)*nrow(H),length(prop_gs_vec)*nrow(H))
new_prop <- matrix(NA,length(prop_gs_vec),length(prop_gs_vec))


if (nrow(H)>1){ # > 1 condition X

for (i in 1:nrow(new_prop)){ # new prop/new pi computation = the one of the gene set, here it's a matrix
for(j in 1:ncol(new_prop)){

new_prop[i,j] <- mean((indi_pi_gs_tab[,i]-prop_gs_vec[i]) * (indi_pi_gs_tab[,j]-prop_gs_vec[j])) + prop_gs_vec[i] * prop_gs_vec[j]

if (nrow(H)>1){ # > 1 condition X

for (i in 1:nrow(new_prop)){ # new prop/new pi computation = the one of the gene set, here it's a matrix
for(j in 1:ncol(new_prop)){

new_prop[i,j] <- mean((indi_pi_gs_tab[,i]-prop_gs_vec[i]) * (indi_pi_gs_tab[,j]-prop_gs_vec[j])) + prop_gs_vec[i] * prop_gs_vec[j]

}
}

Sigma2 <- 1/n * tcrossprod(H) %x% (new_prop - prop_gs_vec %x% t(prop_gs_vec))


} else { # 1 condition

for (i in 1:nrow(Sigma2)) {
for (j in 1:ncol(Sigma2)) {

new_prop <- mean((indi_pi_gs_tab[,i]-prop_gs_vec[i]) * (indi_pi_gs_tab[,j]-prop_gs_vec[j])) + prop_gs_vec[i] * prop_gs_vec[j]

Sigma2[i, j] <- 1/n * tcrossprod(H) * (new_prop - prop_gs_vec[i] * prop_gs_vec[j])
}
}
}

Sigma2 <- 1/n * tcrossprod(H) %x% (new_prop - prop_gs_vec %x% t(prop_gs_vec))
Sigma2_list[[k]] <- Sigma2

decomp_list[[k]] <- eigen(Sigma2_list[[k]], symmetric=TRUE, only.values=TRUE)

} else { # 1 condition
pval[k] <- survey::pchisqsum(sum(test_stat_list[[k]]), lower.tail = FALSE, df = rep(1, ncol(Sigma2_list[[k]])),a =decomp_list[[k]]$values , method = "saddlepoint")

for (i in 1:nrow(Sigma2)) {
for (j in 1:ncol(Sigma2)) {

new_prop <- mean((indi_pi_gs_tab[,i]-prop_gs_vec[i]) * (indi_pi_gs_tab[,j]-prop_gs_vec[j])) + prop_gs_vec[i] * prop_gs_vec[j]

Sigma2[i, j] <- 1/n * tcrossprod(H) * (new_prop - prop_gs_vec[i] * prop_gs_vec[j])
}
}
}else{
warning("0 genes from geneset ", k, " observed in expression data")
pval[k] <- NA
test_stat_list[[k]] <- NA
}

Sigma2_list[[k]] <- Sigma2

decomp_list[[k]] <- eigen(Sigma2_list[[k]], symmetric=TRUE, only.values=TRUE)

pval[k] <- survey::pchisqsum(sum(test_stat_list[[k]]), lower.tail = FALSE, df = rep(1, ncol(Sigma2_list[[k]])),a =decomp_list[[k]]$values , method = "saddlepoint")

}

df <- data.frame(raw_pval=pval,
adj_pval =p.adjust(pval, method = "BH"),
test_statistic = unlist(lapply(test_stat_list,sum)))



}


df <- data.frame(raw_pval = pval,
adj_pval = p.adjust(pval, method="BH"),
test_statistic = unlist(lapply(test_stat_list, sum, na.rm=TRUE)))
}


#rownames(df) <- M_colnames

output <- list(which_test = test,
Expand All @@ -439,7 +440,7 @@ cit_gsa <- function(M,

class(output) <- "cit_gsa"
return(output)



}
Expand Down

0 comments on commit 60752dc

Please sign in to comment.