-
Notifications
You must be signed in to change notification settings - Fork 0
/
mergeTN.R
executable file
·56 lines (43 loc) · 1.69 KB
/
mergeTN.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/opt/common/CentOS_6-dev/R/R-3.1.3/bin/Rscript
library(argparse)
library(data.table)
parser = ArgumentParser()
parser$add_argument('-t', '--tumor_counts', type='character', help='Tumor counts file name')
parser$add_argument('-n', '--normal_counts', type='character', help='Normal counts file name')
parser$add_argument('-o', '--outfile', type='character', help='output file (gzipped')
args=parser$parse_args()
TUMOR = args$tumor_counts
NORMAL = args$normal_counts
GZOUT = gzfile(args$outfile)
MINCOV_NORMAL=25
read_counts <- function(file){
dt = fread(paste0("gunzip --stdout ", file), showProgress=FALSE)
setkeyv(dt, c("Chrom", "Pos", "Ref", "Alt"))
dt[,Refidx := NULL]
dt[,TOTAL_depth := NULL]
dt[,MAPQ_depth := NULL]
dt[,ID := NULL]
dt[,INS := NULL]
dt[,DEL := NULL]
dt[Ref != "N" & nchar(Ref) == 1 & nchar(Alt) == 1]
}
write("Reading normal ...", stdout())
NORMAL_dt = read_counts(NORMAL)
NORMAL_dt = NORMAL_dt[BASEQ_depth >= MINCOV_NORMAL]
write("done ...", stderr())
write("Reading tumor ...", stderr())
TUMOR_dt = read_counts(TUMOR)
write("done ...", stderr())
mergeTN = merge(TUMOR_dt, NORMAL_dt, suffixes = c(".TUM", ".NOR"))
setnames(mergeTN,
c("Chrom", "Pos", "Ref", "Alt", "TUM.DP", "TUM.Ap", "TUM.Cp",
"TUM.Gp", "TUM.Tp", "TUM.An", "TUM.Cn", "TUM.Gn", "TUM.Tn", "NOR.DP",
"NOR.Ap", "NOR.Cp", "NOR.Gp", "NOR.Tp", "NOR.An", "NOR.Cn", "NOR.Gn",
"NOR.Tn"))
mergeTN[, Chrom := factor(Chrom, levels=c(c(1:22, "X", "Y", "MT"), paste0("chr", c(1:22, "X", "Y", "M"))))]
mergeTN = mergeTN[order(Chrom, Pos)]
write.table(mergeTN, file=GZOUT,
quote = F,
col.names = T,
row.names = F,
sep = "\t")