-
Notifications
You must be signed in to change notification settings - Fork 0
/
2.TPM-combine.R
31 lines (25 loc) · 1.08 KB
/
2.TPM-combine.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
library(tidyverse)
library(openxlsx)
#######################################################################
wdir <- getwd()
args <- commandArgs(T)
filelist <- list.files(paste0(args[1],"/total.rdrp.megahit.fa_contigs/"))
files <- paste(paste0(args[1],"/total.rdrp.megahit.fa_contigs/"),filelist,sep="")
data <- list()
train <- list()
finalData <- read.table(files[1], header=T, sep="\t")
# all(finalData$gene_id == finalData$Name)
finalData <- finalData[c(1,which(colnames(finalData) %in% 'TPM' ))]
TPMtoName <- strsplit(filelist[1],split = '_',fixed = T)[[1]][1]
colnames(finalData)[2] <- TPMtoName
by_name <- colnames(finalData)[1]
finalData <- finalData[!duplicated(finalData[,1]),]
for (i in 2:(length(files))){
data <- read.table(files[i], header=T, sep="\t")
data <- data[c(1,which(colnames(data) %in% 'TPM' ))]
TPMtoName <- strsplit(filelist[i],split = '_',fixed = T)[[1]][1]
colnames(data)[2] <- TPMtoName
data <- data[!duplicated(data[,1]),]
finalData <-dplyr::full_join(finalData,data,by = by_name)
}
write.csv(finalData,file = paste0(args[1],'/RSEM.csv'),row.names = F)