-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathID_DESeq2.R
89 lines (74 loc) · 2.67 KB
/
ID_DESeq2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env Rscript
#SBATCH --job-name="ID DESeq2"
#SBATCH --out=.slurmlogs/slurm.id_deseq2.%j_%N.out
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH --mem=10G
#SBATCH --time=5-00:00:00
#SBATCH --mail-type=ALL
#SBATCH [email protected]
# This script calculates the differntial experssion between
# tumor and non tumor samples within each cohort and in the
# degernate / dimensional reduced data with ncomponents.
if (!exists("ncomps")){
args = commandArgs(trailingOnly = TRUE)
if (length(args) < 1){
stop(paste0("No numer of components specified.\n",
"You can specify the number by providing the it behind ",
"--args when calling the script from the command line ",
"or setting the 'ncomps' variable in the R enviorment."))
}else{
ncomps = as.numeric(args[1])
}
}
ncomps = ceiling(ncomps)
if (!is.finite(ncomps) || ncomps < 1) {
stop("The number of components musst be an integer larger than 0.")
}
if (!exists("cohort") && length(args) > 1) {
cohort = args[2]
} else if (!exists("cohort")) {
cohort = NULL
}
message(paste("This is ncomps:", ncomps, "; for cohort: ", cohort))
suppressPackageStartupMessages(library(DESeq2))
inPath = "data/deg_counts_od"
outPath = "data/ID_DESeq2_od"
dir.create(outPath, showWarnings = FALSE, recursive = TRUE)
pheno = readRDS(paste0(inPath, "/pheno.RDS"))
counts = readRDS(paste0(inPath, "/counts.RDS"))
dg_path = paste0(inPath, "/", ncomps - 1, "-comp.RDS")
if (!file.exists(dg_path)) {
message(paste("The file", dg_path, "does not exist. Stopping..."))
quit()
}
raw_deg_counts = readRDS(dg_path)
deg_counts = data.frame(t(raw_deg_counts))
colnames(deg_counts) = colnames(counts)
rownames(deg_counts) = rownames(counts)
if (!is.null(cohort)) {
ind = pheno$Cohort == cohort
deg_counts = deg_counts[, ind]
pheno = pheno[ind, ]
}
broken = apply(is.na(deg_counts), 1, any)
if (any(broken)) {
message(paste0("There are ", sum(broken),
" features with NA values that are removed."))
deg_counts = deg_counts[!broken, ]
}
noPheno = is.na(pheno$cep)
if (any(noPheno)) {
message(paste0("There are ", sum(noPheno),
" samples with NA values that are removed."))
deg_counts = deg_counts[, !noPheno]
pheno = pheno[!noPheno, ]
}
dds_dat = DESeqDataSetFromMatrix(countData = round(deg_counts),
colData = pheno,
design = ~ cep)
dds = DESeq(dds_dat)
res = results(dds)
data = list(dds_data = dds_dat, dds = dds, dds_result = res)
saveRDS(data, paste0(outPath, "/DESeq2_results_", cohort, "_",
ncomps - 1, "-comp.RDS"))