-
Notifications
You must be signed in to change notification settings - Fork 0
/
xtile_TCGA.R
132 lines (100 loc) · 4.04 KB
/
xtile_TCGA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
data = read.table("TCGA_BRCA_clin_1142_1080.txt", header = T, check.names = FALSE)
datat <- as.data.frame(t(data))
Data <- as.matrix(data)
x <- model.matrix(status ~., datat)[,-c(1,2)]
x_hat <- data.frame(x)
y <- as.matrix(datat[,c(1,2)])
colnames(y) <- c("time", "status")
y_hat <- data.frame(y)
## coef
coef_gene <- read.csv("univariate_cox_coef.csv", header = T, sep=',')
coef <- as.matrix(coef_gene[,2])
rownames(coef) <- coef_gene[,1]
# my_overlap_coef ------------------------------------------------------
my_overlap <- function(x, y){
coefs.v <- x[,1] %>% { .[. != 0]}
coefs.v %>% {
data.frame(gene.name = names(.),
coefficient = .,
stringsAsFactors = FALSE)
} %>%
arrange(gene.name) %>%
knitr::kable()
sele <- rownames(as.matrix(coefs.v))
gene <- rownames(y)[-c(1,2)]
overlap <- intersect(sele, gene)
lab <- x[,1] %>% { .[. != 0]} %>% as.matrix
coefs.v <- lab[overlap,]
my <- list(coefs.v, overlap)
return(my)
}
# 画图 ----------------------------------------------------------------------
source('D:\\E\\博士\\R_程序\\BRCA\\R\\myoverlap_separate2GroupsCox.R')
library(ggpubr)
library(magrittr)
library(survminer)
coef_test <- my_overlap(coef, Data)
plotp_Train <- separate2GroupsCox(as.vector(coef_test[[1]]), x_hat[, coef_test[[2]]],
plot.title = 'GSE1456_smfs', as.data.frame(y),
legend.outside = T)
plot_train <- plotp_Train$plot
## for Xtile
p_index <- cbind(y,plotp_Train$index)
colnames(p_index) <- c(colnames(y), "riskscore")
# write.table(p_index, file = "TCGA_OS_3gene.txt", quote = F, row.names = F, sep="\t")
# write.table(p_index, file = "TCGA_OS_3gene.csv", quote = F, row.names = F)
# Extract features on TCGA --------------------------------------------------------------
library(survminer)
library(survival)
library(ggplot2)
setwd("D:\\E\\博士\\R_程序\\BRCA\\Data\\TCGA_NEW\\result")
data = read.table("TCGA_OS_3gene.txt", header = T, check.names = FALSE)
gene_name <- colnames(data)[3]
exprSet <- as.data.frame(t(data))
## Set cutoff value
alpha <- 0.50
risk_score <- t(as.matrix(exprSet[gene_name,]))
cut_off <- rep(as.numeric(quantile(exprSet[gene_name,],alpha)), dim(exprSet)[2])
data$time <- data$time/365
data$riskscore <- ifelse(risk_score > cut_off, 'high','low')
table(data$risk_score)
fit <- survfit(Surv(time, status)~riskscore, data = data)
p <- ggsurvplot(fit, data = data,
conf.int = F,
# surv.median.line = "hv",
risk.table = TRUE,
tables.height= 0.25,
cumcensor = T,
legend = c(0.83,0.95),
# P value
pval = TRUE,
pval.size=6,
font.pval= c(14, "bold", "black"),
pval.coord = c(0.00, 0.05),
# legend
legend.title = '', # gene_name
legend.labs=c("High risk", "Low risk"),
font.legend= c(14, "plain", "black"),
# font.main = c(100, "bold", "black"),
# xlim = c(0,72), # present narrower X axis, but not affect
# survival estimates.
palette=c("red", "blue"),
font.x = c(14, "plain", "black"),
font.y = c(14, "plain", "black"),
font.tickslab = c(14, "plain", "black"),
xlab = "Time in years",
break.time.by = 6
) # break X axis in time intervals by 500.
# 添加HR和CI
res_cox <- coxph(Surv(time, status) ~riskscore, data=data)
HR <- round(summary(res_cox)$conf.int[1],2)
ci_l <- round(summary(res_cox)$conf.int[3],2)
ci_r <- round(summary(res_cox)$conf.int[4],2)
p1 <- p
p1$plot = p1$plot +
ggplot2::annotate("text",x = 3.13, y = 0.3, label = paste("HR : ", HR), size = 5) +
ggplot2::annotate("text",x = 6.28, y = 0.2,
label = paste("(","95%CI : ", ci_l,"-",ci_r,")", sep = ""), size = 5)
pdf("TCGA_3_os.pdf", width = 4.8, height = 6, onefile = FALSE)
p1
dev.off()