Skip to content

Commit

Permalink
Merge pull request #55 from BaderLab/altSparsify
Browse files Browse the repository at this point in the history
New functionality and example data
  • Loading branch information
shraddhapai authored Jun 26, 2018
2 parents bd6baa2 + 4dfc859 commit e709b2f
Show file tree
Hide file tree
Showing 328 changed files with 27,060 additions and 616 deletions.
68 changes: 68 additions & 0 deletions misc/Asthma_PBMC/Asthma_plotResults.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#' plot BRCA results
rm(list=ls())
require(netDx)
require(netDx.examples)
require(GEOquery)
require(org.Hs.eg.db)

rootDir <- "/home/shraddhapai/BaderLab/2018_AsthmaPBMC"
inDir <- sprintf("%s/input",rootDir)
datDir <- sprintf("%s/output/basic_180220/pred",rootDir)
outDir <- sprintf("%s/output/basic_180220/plot",rootDir)
pathFile <-sprintf("%s/anno/Human_AllPathways_November_01_2017_symbol.gmt",
rootDir)

dat <- getGEO(filename=sprintf("%s/GSE40732_series_matrix.txt.gz",inDir),
GSEMatrix=TRUE)
xpr <- exprs(dat)
pheno <- pData(dat)
# map GB ID to symbol
x <- mapIds(org.Hs.eg.db, keys=rownames(xpr),
column="SYMBOL",keytype="ACCNUM",
multiVals="first")
common <- intersect(names(x),rownames(xpr))
xpr <- xpr[which(rownames(xpr) %in% common),]
x <- x[which(names(x) %in% common)]

midx <- match(rownames(xpr),names(x))
gnames <- x[midx]
agg <- aggregate(xpr, by=list(gene_name=gnames),FUN=mean)
xpr <- agg[,-1]
rownames(xpr) <- agg[,1]

pheno <- pheno[,c("geo_accession","characteristics_ch1")]
st <- rep(NA, nrow(pheno))
st[which(pheno[,2] %in% "asthma: FALSE")] <- "control"
st[which(pheno[,2] %in% "asthma: TRUE")] <- "asthma"
pheno[,2] <- st
colnames(pheno) <- c("ID","STATUS")
pheno[,1] <- as.character(pheno[,1])

pathwayList <- readPathways(pathFile)
head(pathwayList)

if (!file.exists(outDir)) dir.create(outDir)
predClasses <- unique(pheno$STATUS)
postscript(sprintf("%s/perf.eps",outDir))
#predDir <- sprintf("%s/rng%i/predictionResults.txt",
# datDir,1:11)
predPerf <- plotPerf(datDir, predClasses=predClasses)
dev.off()
auroc <- unlist(lapply(predPerf, function(x) x$auroc))
aupr <- unlist(lapply(predPerf, function(x) x$aupr))
acc <- unlist(lapply(predPerf, function(x) x$accuracy))

###featList <- list(
### control=sprintf("%s/rng%i/control/GM_results/control_pathway_CV_score.txt",
### datDir,1:11),
### asthma=sprintf("%s/rng%i/asthma/GM_results/asthma_pathway_CV_score.txt",
### datDir,1:11))
featScores <- getFeatureScores(datDir,predClasses=predClasses)
featSelNet <- lapply(featScores, function(x) {
callFeatSel(x, fsCutoff=10, fsPctPass=0.9)
})

netInfoFile <- sprintf("%s/inputNets.txt",datDir)
netInfo <- read.delim(netInfoFile,sep="\t",h=FALSE,as.is=TRUE)
EMap_input <- writeEMapInput_many(featScores,pathwayList,
netInfo,outDir=outDir)
69 changes: 69 additions & 0 deletions misc/Asthma_PBMC/netDx.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Ependymoma
rm(list=ls())

require(GEOquery)
require(netDx)
require(netDx.examples)
require(org.Hs.eg.db)

rootDir <- "/home/shraddhapai/BaderLab/2018_AsthmaPBMC"
inDir <- sprintf("%s/input",rootDir)
outDir <- sprintf("%s/output",rootDir)
pathFile <-sprintf("%s/anno/Human_AllPathways_February_01_2018_symbol.gmt",
rootDir)

dat <- getGEO(filename=sprintf("%s/GSE40732_series_matrix.txt.gz",inDir),
GSEMatrix=TRUE)
xpr <- exprs(dat)
pheno <- pData(dat)
# map GB ID to symbol
x <- mapIds(org.Hs.eg.db, keys=rownames(xpr), column="SYMBOL",keytype="ACCNUM",
multiVals="first")
common <- intersect(names(x),rownames(xpr))
xpr <- xpr[which(rownames(xpr) %in% common),]
x <- x[which(names(x) %in% common)]

midx <- match(rownames(xpr),names(x))
gnames <- x[midx]
agg <- aggregate(xpr, by=list(gene_name=gnames),FUN=mean)
xpr <- agg[,-1]
rownames(xpr) <- agg[,1]

pheno <- pheno[,c("geo_accession","characteristics_ch1")]
st <- rep(NA, nrow(pheno))
st[which(pheno[,2] %in% "asthma: FALSE")] <- "control"
st[which(pheno[,2] %in% "asthma: TRUE")] <- "asthma"
pheno[,2] <- st
colnames(pheno) <- c("ID","STATUS")
pheno[,1] <- as.character(pheno[,1])

pathwayList <- readPathways(pathFile)
head(pathwayList)

makeNets <- function(dataList, groupList, netDir,...) {
netList <- c()
# make RNA nets: group by pathway
if (!is.null(groupList[["rna"]])) {
netList <- makePSN_NamedMatrix(dataList$rna,
rownames(dataList$rna),
groupList[["rna"]],netDir,verbose=FALSE,
writeProfiles=TRUE,...)
netList <- unlist(netList)
cat(sprintf("Made %i RNA pathway nets\n", length(netList)))
}
cat(sprintf("Total of %i nets\n", length(netList)))
return(netList)
}

dt <- format(Sys.Date(),"%y%m%d")
megaDir <- sprintf("%s/basic_%s",outDir,dt)
if (!file.exists(megaDir)) dir.create(megaDir)

gps <- list(rna=pathwayList)
dats <- list(rna=xpr)

runPredictor_nestedCV(pheno,
dataList=dats,groupList=gps,
makeNetFunc=makeNets, ### custom network creation function
outDir=sprintf("%s/pred",megaDir),
numCores=4L,nFoldCV=10L, CVcutoff=9L,numSplits=100L,CVmemory=13L)
17 changes: 9 additions & 8 deletions misc/BRCA/BRCA_example.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,6 @@ subtypes<- c("LumA")
pheno$STATUS[which(!pheno$STATUS %in% subtypes)] <- "other"
subtypes <- c(subtypes,"other") # add residual

pathFile <- sprintf("%s/extdata/Human_160124_AllPathways.gmt",
path.package("netDx.examples"))
pathwayList <- readPathways(pathFile)
head(pathwayList)


BRCA_makeNets <- function(dataList, groupList, netDir,...) {
netList <- c()
Expand All @@ -29,9 +24,14 @@ BRCA_makeNets <- function(dataList, groupList, netDir,...) {
return(netList)
}

rootDir <- "/home/shraddhapai/BaderLab/2017_BRCA/output/"
rootDir <- "/home/shraddhapai/BaderLab/2017_BRCA"
dt <- format(Sys.Date(),"%y%m%d")
megaDir <- sprintf("%s/BRCA_%s",rootDir,dt)
megaDir <- sprintf("%s/output/BRCA_part2_%s",rootDir,dt)

pathFile <- sprintf("%s/anno/Human_AllPathways_February_01_2018_symbol.gmt",
rootDir)
pathwayList <- readPathways(pathFile)
head(pathwayList)

gps <- list(rna=pathwayList)
dats <- list(rna=xpr)
Expand All @@ -40,4 +40,5 @@ runPredictor_nestedCV(pheno,
dataList=dats,groupList=gps,
makeNetFunc=BRCA_makeNets, ### custom network creation function
outDir=megaDir,
numCores=10L,nFoldCV=10L, CVcutoff=9L,numSplits=25L)
numCores=8L,nFoldCV=10L, CVcutoff=9L,numSplits=100L,CVmemory=13L,
startAt=3L)
File renamed without changes.
36 changes: 36 additions & 0 deletions misc/BRCA/BRCA_oneNet_plotResults.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#' plot BRCA results

require(netDx)
require(netDx.examples)
data(TCGA_BRCA)

rootDir <- "/home/shraddhapai/BaderLab/2017_BRCA/output"

pheno$STATUS[which(pheno$STATUS!="LumA")] <- "other"

# get one rna net perf
inDir <- sprintf("%s/BRCA_OneRNAnet_180221/.",rootDir)
outDir <- sprintf("%s/BRCA_OneRNAnet_180221/plot",rootDir)
predClasses <- unique(pheno$STATUS)
postscript(sprintf("%s/perf.eps",outDir))
predPerf_oneRNA <- plotPerf(inDir, predClasses=predClasses)
dev.off()

inDir_path <- sprintf("%s/BRCA_part2_180223", rootDir)
outDir <- sprintf("%s/BRCA_part2_180223/plot",rootDir)
postscript(sprintf("%s/perf.eps",outDir))
predPerf_path <- plotPerf(inDir_path, predClasses=predClasses)
dev.off()

auc_onerna <- unlist(lapply(predPerf_oneRNA,function(x) x$auroc))
auc_path <- unlist(lapply(predPerf_path,function(x) x$auroc))
wmw <- wilcox.test(auc_onerna,auc_path)
pdf("BRCA_oneNetVsPath.pdf")
boxplot(list(OneNet=auc_onerna, Pathways=auc_path),
main=sprintf("BRCA, one vs path\n(WMW p < %1.2e)",
wmw$p.value))
dev.off()
cat(sprintf("One RNA: AUC=%1.2f +/- %1.2f\n",mean(auc_onerna),sd(auc_onerna)))
cat(sprintf("Pathways: AUC=%1.2f +/- %1.2f\n",mean(auc_path),sd(auc_path)))


40 changes: 40 additions & 0 deletions misc/BRCA/BRCA_oneRNAmat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# BRCA example with nested cv

require(netDx)
require(netDx.examples)
data(TCGA_BRCA)

subtypes<- c("LumA")
pheno$STATUS[which(!pheno$STATUS %in% subtypes)] <- "other"
subtypes <- c(subtypes,"other") # add residual


BRCA_makeNets <- function(dataList, groupList, netDir,...) {
netList <- c()
# make RNA nets: group by pathway
if (!is.null(groupList[["rna"]])) {
netList <- makePSN_NamedMatrix(dataList$rna,
rownames(dataList$rna),
groupList[["rna"]],netDir,verbose=FALSE,
writeProfiles=TRUE,...)
netList <- unlist(netList)
cat(sprintf("Made %i RNA pathway nets\n", length(netList)))
}
cat(sprintf("Total of %i nets\n", length(netList)))
return(netList)
}

rootDir <- "/home/shraddhapai/BaderLab/2017_BRCA"
dt <- format(Sys.Date(),"%y%m%d")
megaDir <- sprintf("%s/output/BRCA_OneRNAnet_%s",rootDir,dt)

geneSet <- list(allrna=rownames(xpr))

gps <- list(rna=geneSet)
dats <- list(rna=xpr)

runPredictor_nestedCV(pheno,
dataList=dats,groupList=gps,
makeNetFunc=BRCA_makeNets, ### custom network creation function
outDir=megaDir,
numCores=4L,nFoldCV=10L, CVcutoff=9L,numSplits=100L)
36 changes: 30 additions & 6 deletions misc/BRCA/BRCA_plotResults.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ require(netDx)
require(netDx.examples)
data(TCGA_BRCA)

rootDir <- "/Users/shraddhapai/Dropbox/netDx/BaderLab/2017_BRCA/output/BRCA_180117"
rootDir <- "/home/shraddhapai/BaderLab/2017_BRCA"

pathFile <- sprintf("%s/extdata/Human_160124_AllPathways.gmt",
path.package("netDx.examples"))
pathFile <- sprintf("%s/anno/Human_AllPathways_February_01_2018_symbol.gmt",
rootDir)
pathwayList <- readPathways(pathFile)
head(pathwayList)

xpr_genes <- rownames(xpr)
pathwayList <- lapply(pathwayList,function(x) x[which(x %in% xpr_genes)])
Expand All @@ -19,12 +20,21 @@ pheno$STATUS[which(pheno$STATUS!="LumA")] <- "other"
# outDir=sprintf("%s/plot",rootDir),
# fsCutoff=10,fsPctPass=0.7,pathwaySet=pathwayList)

inDir <- sprintf("%s/pred",rootDir)
outDir <- sprintf("%s/plot",rootDir)
inDir <- sprintf("%s/output/BRCA_part2_180223",rootDir)
outDir <- sprintf("%s/output/BRCA_part2_180223/plot",rootDir)
if (!file.exists(outDir)) dir.create(outDir)
predClasses <- unique(pheno$STATUS)
postscript(sprintf("%s/perf.eps",outDir))

#predFiles <- sprintf("%s/rng%i/predictionResults.txt", inDir,1:79)
predPerf <- plotPerf(inDir, predClasses=predClasses)
dev.off()

featFiles <- list(
LumA=sprintf("%s/rng%i/LumA/GM_results/LumA_pathway_CV_score.txt", inDir,1:79),
other=sprintf("%s/rng%i/other/GM_results/other_pathway_CV_score.txt", inDir,1:79)
)
#featScores <- getFeatureScores(featFiles,predClasses=predClasses)
featScores <- getFeatureScores(inDir,predClasses=predClasses)
featSelNet <- lapply(featScores, function(x) {
callFeatSel(x, fsCutoff=10, fsPctPass=0.7)
Expand All @@ -33,4 +43,18 @@ featSelNet <- lapply(featScores, function(x) {
netInfoFile <- sprintf("%s/inputNets.txt",inDir)
netInfo <- read.delim(netInfoFile,sep="\t",h=FALSE,as.is=TRUE)
EMap_input <- writeEMapInput_many(featScores,pathwayList,
netInfo,outDir=outDir)
netInfo,outDir=outDir,pctPass=0.7)

auroc <- unlist(lapply(predPerf,function(x) x$auroc))
aupr <- unlist(lapply(predPerf,function(x) x$aupr))
acc <- unlist(lapply(predPerf,function(x) x$accuracy))
cat(sprintf("mean auroc = %1.2f ; aupr = %1.2f ; acc = %1.2f%%",
mean(auroc), mean(aupr), mean(acc)))

cat("Performance\n")
#cat(sprintf("AUROC = %1.2f +/- %1.2f\n", mean(auroc),sd(auroc)/sqrt(length(auroc))))
#cat(sprintf("AUPR = %1.2f +/- %1.2f\n", mean(aupr),sd(aupr)/sqrt(length(aupr))))
#cat(sprintf("ACC = %1.2f +/- %1.2f\n", mean(acc),sd(acc)/sqrt(length(acc))))
cat(sprintf("AUROC = %1.2f +/- %1.2f\n", mean(auroc),sd(auroc)))
cat(sprintf("AUPR = %1.2f +/- %1.2f\n", mean(aupr),sd(aupr)))
cat(sprintf("ACC = %1.2f +/- %1.2f\n", mean(acc),sd(acc)))
66 changes: 66 additions & 0 deletions misc/Ependymoma/Epen_plotDNAm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#' plot BRCA results
rm(list=ls())
require(netDx)
require(netDx.examples)
#rootDir <- "/Users/shraddhapai/Dropbox/netDx/BaderLab/2017_Ependymoma"
rootDir <- "/home/shraddhapai/BaderLab/2017_Ependymoma"

phenoFile <- "/home/shraddhapai/BaderLab/2018_Epen_DNAm/input/GSE90496_pData.txt"
pheno <- read.delim(phenoFile,sep="\t",h=T,as.is=T)
ttype <- pheno$characteristics_ch1

inFile <- sprintf("%s/input/netDx_prepared/Ependymoma_cohortMerged_180125.Rdata",rootDir)
load(inFile)
# ----------------------
# input processing
pheno <- read.delim(phenoFile,sep="\t",h=T,as.is=T)
ttype <- pheno$characteristics_ch1
idx <- which(ttype %in% c("methylation class: EPN, PF A","methylation class: EPN, PF B"))
cat(sprintf("Got %i samples\n",length(idx)))
pheno <- pheno[idx,] # limit to EPN samples
cpos <- regexpr("sample", pheno$title)
bpos <- regexpr("\\[reference", pheno$title)
str <- as.integer(substr(pheno$title, cpos+7, bpos-2)) # get sample number
pheno$ID <- paste("SAMPLE", str,sep=".")
pheno <- pheno[,c("ID","characteristics_ch1")]
st <- rep("",nrow(pheno))
st[grep("PF A", pheno[,2])] <- "PFA"
st[grep("PF B", pheno[,2])] <- "PFB"
pheno$STATUS <- st

#out <- plotAllResults(pheno, sprintf("%s/pred",rootDir),
# outDir=sprintf("%s/plot",rootDir),
# fsCutoff=10,fsPctPass=0.7,pathwaySet=pathwayList)

#setName <- "Epen_prunedOneNet_0.001_180409"
#setName <- "Epen_prunedPathway_0.01_180409"
setName <- "Epen_prunedOneNet_0.001_180410"
inDir <- sprintf("%s/output/%s/pred",rootDir,setName)
outDir <- sprintf("%s/output/%s/plot",rootDir,setName)

if (!file.exists(outDir)) dir.create(outDir)
predClasses <- unique(pheno$STATUS)
postscript(sprintf("%s/perf.eps",outDir))
predPerf <- plotPerf(inDir, predClasses=predClasses)
dev.off()
auroc <- unlist(lapply(predPerf, function(x) x$auroc*100))
aupr <- unlist(lapply(predPerf, function(x) x$aupr*100))
acc <- unlist(lapply(predPerf, function(x) x$accuracy))

cat("--------------\n")
cat(sprintf("Performance: %s\n",setName))
cat(sprintf("AUROC = %1.2f +/- %1.2f\n",mean(auroc),sd(auroc)))
cat(sprintf("AUPR = %1.2f +/- %1.2f\n",mean(aupr),sd(aupr)))
cat(sprintf("Accuracy = %1.2f +/- %1.2f\n",mean(acc),sd(acc)))
cat("--------------\n")


###featScores <- getFeatureScores(inDir,predClasses=predClasses)
###featSelNet <- lapply(featScores, function(x) {
### callFeatSel(x, fsCutoff=10, fsPctPass=0.9)
###})
###
###netInfoFile <- sprintf("%s/inputNets.txt",inDir)
###netInfo <- read.delim(netInfoFile,sep="\t",h=FALSE,as.is=TRUE)
###EMap_input <- writeEMapInput_many(featScores,pathwayList,
### netInfo,outDir=outDir)
Loading

0 comments on commit e709b2f

Please sign in to comment.