-
Notifications
You must be signed in to change notification settings - Fork 0
/
deseq2_norm.R
75 lines (59 loc) · 2.74 KB
/
deseq2_norm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
##################################################################################
##### normalization using DESeq2 - get normalized and vst transformed counts #####
##################################################################################
### install DESeq2, tximport packages from Bioconductor
#if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager", repos='http://cran.us.r-project.org')
# BiocManager::install("DESeq2")
# BiocManager::install("tximport")
library(tidyverse)
library(DESeq2)
library(tximport)
args = commandArgs(trailingOnly=TRUE)
inputpath = args[1] # with path
outputFile = args[2] # with path
metadataPath = args[3]
data_path <- "../data/"
print(getwd())
setwd(data_path)
# loading metadata
metadata <- read_csv(metadataPath))
colnames(metadata)[1] <- "sampleID"
# loading rsem filelist
files <- list.files(path=inputpath, pattern="*.genes.results", recursive = TRUE, full.names = TRUE)
# create sample table
sampleTable <- data.frame(sampleID = str_extract(files, "\\d{5}\\w"), CellType = str_extract(files, "CD\\d{1,2}"), files = files) %>%
left_join(., metadata, by="sampleID")
row.names(sampleTable) <- paste(sampleTable$sampleID, sampleTable$CellType, sep=".")
# function for loading rsem counts and normalization
getNormCounts <- function(celltypes, files, sampleTable){
rsem.gene.sub <- tximport(files[grep(celltypes, files)], type = "rsem", txIn = FALSE, txOut = FALSE)
sampleTable.sub <- sampleTable %>% filter(files %in% files[grep(celltypes, files)]) %>%
arrange(match(files, files[grep(celltypes, files)]))
rsem.gene.sub$length[rsem.gene.sub$length == 0] <- 1
cds.sub <- DESeqDataSetFromTximport(txi=rsem.gene.sub, colData=sampleTable.sub, design=~1)
cds.sub <- cds.sub[ rowSums(counts(cds.sub)) > 0, ]
cds.sub <- DESeq(cds.sub)
return(cds.sub)
}
# run function
celltypes <- c("CD4","CD8","CD14")
deseq_obj <- list()
for (i in celltypes){
deseq_obj[[i]] <- getNormCounts(i, files, sampleTable)
}
### save count matrix
# save DESeq2 object
save(deseq_obj, file = "DESeq2_object.RData")
# normalized counts
write.csv(counts(deseq_obj$CD4, normalized = TRUE), "counts_norm_CD4.csv")
write.csv(counts(deseq_obj$CD8, normalized = TRUE), "counts_norm_CD8.csv")
write.csv(counts(deseq_obj$CD14, normalized = TRUE), "counts_norm_CD14.csv")
# vst(variance stabilization transformation) transformed counts
write.csv(assay(vst(deseq_obj$CD4)), "counts_vst_CD4.csv")
write.csv(assay(vst(deseq_obj$CD8)), "counts_vst_CD8.csv")
write.csv(assay(vst(deseq_obj$CD14)), "counts_vst_CD14.csv")
# rlog transformed counts
write.csv(assay(rlog(deseq_obj$CD4)), "counts_rlog_CD4.csv")
write.csv(assay(rlog(deseq_obj$CD8)), "counts_rlog_CD8.csv")
write.csv(assay(rlog(deseq_obj$CD14)), "counts_rlog_CD14.csv")