-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpart4.R
204 lines (166 loc) · 8.24 KB
/
part4.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
## Dataviz2.0 - part4
install.packages("ggsci")
install.packages("tidyverse")
install.packages("ggpubr")
install.packages("ggrepel")
install.packages("cowplot")
install.packages("gridExtra")
library(ggsci)
library(tidyverse)
library(ggpubr)
library(ggrepel)
library(cowplot)
library(gridExtra)
#Create the data
#We'll use the plots you've already created in part3: box1, ml_scatter, scatter2
## Data Import
gene_loc <- read.table("GSE69360.gene-locations.txt",
header = T)
## Plotting the data
scatter <- ggplot(gene_loc, aes(x=End-Start, y=Length, group=Chr, color=Chr)) +
geom_point()
scatter
### It is hard to visualize the entire data.
### Let's pretend we are only interested in a small set of chromosomes.
### Let's subset the data and add a few variables!
target <- c("chrX", "chrY", "chrM", "chr17")
gene_loc2 <- filter(gene_loc, Chr %in% target)
log_EndStart <- log10(gene_loc2$End-gene_loc2$Start)
log_length <- log10(gene_loc2$Length)
gene_loc2$log_length <- log_length
gene_loc2$log_EndStart <- log_EndStart
head(gene_loc2)
###################################################
##Combine boxplot and regression plot previously created
### Boxplot
box1 <- ggplot(gene_loc2, aes(x = Chr, y = Length,
group=Chr, color=Chr)) +
geom_boxplot() +
xlab("Chr")+
theme_bw()
box1 <- box1 + scale_color_jco()+
theme(legend.position = "none") # remove legend
#box1 + scale_fill_discrete(name = "Chromosome") #rename legend
## Scatterplot + regression line
ml_scatter <- ggscatter(gene_loc2, x = "log_EndStart", y = "log_length",
color = "Chr", palette = "jco",
add = "reg.line", add.params = list(color = "black"), # customize regression line
fullrange = TRUE) +
facet_wrap(~Chr) +
stat_cor(label.y = 4.4) +
stat_regline_equation(label.y = 4.2)
ml_scatter
# arrange facet grid as 1x4
ml_scatter <- ml_scatter + facet_grid(cols = vars(Chr))
#Arranging boxplot and scatterplot on one page
## we’ll use the function ggarrange()[in ggpubr], which is a wrapper around the function plot_grid() [in cowplot package].
## Compared to the standard function plot_grid(), ggarange() can arrange multiple ggplots over multiple pages.
## use either
fig <- ggarrange(ml_scatter, box1 + rremove ("x.text"),
labels = c("A", "B"),
ncol =1, nrow = 2,
common.legend = TRUE)
fig
#Annotate the arranged figure
annotate_figure(fig,
top = text_grob("Visualizing gene length", color = "red", face = "bold", size = 14), #grob -- graphical object
bottom = text_grob("Data source: \n gse69360 data set", color = "blue", #customize text
hjust = 1, x = 1, face = "italic", size = 10), # horizontal justification: 0= left align, 1= right align
left = text_grob("Figure arranged using ggpubr", color = "green", rot = 90), # rot = angle to rotate the text
right = "I'm done, thanks :-)!",
fig.lab = "Figure 1", fig.lab.face = "bold"
)
#Adding descriptive text
#Text to be added
text <- paste("gse69360 data set is a resource of ribosomal",
"RNA-depleted RNA-Seq data from different normal",
"adults and fetal human tissues. The dataset was first",
"published in https://www.nature.com/articles/sdata201563", sep = " ")
text.p <- ggparagraph(text = text, face = "italic", size = 11, color = "black")
final_plt <- ggarrange(ml_scatter, box1, text.p + rremove ("x.text"),
labels = c("A", "B"),
ncol =1, nrow = 3,
common.legend = TRUE)
final_plt
#Export plot
ggsave(mean_lth_viz.pdf, width = 5, height = 5, units = "in")
# Place scatterplot together with density plot
# Scatter plot colored by groups ("Chr")
sp <- gene_loc2 %>%
ggscatter("log_EndStart", "log_length",
color = "Chr", palette = "jco",
size = 1, alpha = 0.2)+
border()
# Marginal density plot of x (top panel) and y (right panel)
xplot <- gene_loc2 %>%
ggdensity("log_EndStart", fill = "Chr", palette = "jco")
yplot <- gene_loc2 %>%
ggdensity("log_length", fill = "Chr", palette = "jco")+
rotate()
# Cleaning the plots
yplot <- yplot + clean_theme()
xplot <- xplot + clean_theme()
# Arranging the plot
fig_2 <- ggarrange(xplot, NULL, sp, yplot,
ncol = 2, nrow = 2, align = "hv",
widths = c(2, 1), heights = c(1, 2),
common.legend = TRUE)
# Annotating the plot
scat_density_plt <- annotate_figure(fig_2,
top = text_grob("Scatter plot with marginal density", color = "red", face = "bold", size = 14), #grob -- graphical object
bottom = text_grob("Data source: \n gse69360 data set", color = "blue", #customize text
hjust = 1, x = 1, face = "italic", size = 10), # horizontal justification: 0= left align, 1= right align
left = text_grob("Figure arranged using ggpubr", color = "green", rot = 90), # rot = angle to rotate the text
right = "I'm done, thanks :-)!",
fig.lab = "Figure 1", fig.lab.face = "bold"
)
scat_density_plt
ggsave(marginal_distribution.pdf, width = 5, height = 5, units = "in")
################################
#Insert a table into a plot
#Regression plot summary data with labels & confidence interval made in part 2 using code below.
a <- gene_loc %>%
group_by(Chr) %>%
summarize(meanLength = mean(Length), numGenes = n())
head(a)
scatter2 <- ggplot(a, aes(x = numGenes, y = meanLength)) +
geom_point()+
theme_bw()+
geom_text_repel(aes(label = Chr), color="red", segment.color="blue")+
geom_smooth(method = loess, color = "lightblue", alpha = 0.1)
scatter2
# Compute descriptive statistics by groups
stable <- gene_loc2 %>%
desc_statby(measure.var = "Length", #Column containing variable to be sumarized
grps = "Chr") # Group
stable <- stable[, c("Chr", "length", "mean", "sd")]
# Summary table plot, medium blue theme
stable.p <- ggtexttable(stable, rows = NULL,
theme = ttheme("mBlue"))
# Arranging the plot
fig_3 <- ggarrange(scatter2, stable.p, text.p + rremove ("x.text"),
ncol =1, nrow = 3,
heights = c(1, 0.5, 0.2),
common.legend = TRUE)
fig_3
#Place the table within the plot using annotate_custom in ggplot
fig_4 <- scatter2 + annotation_custom(ggplotGrob(stable.p),
xmin = 2000, ymax = 1500,
xmax = 5000)
# Annotating the plot
final_scatter2 <- annotate_figure(fig_4,
top = text_grob("Scatter plot with summary table", color = "red", face = "bold", size = 14), #grob -- graphical object
bottom = text_grob("Data source: \n gse69360 data set", color = "blue", #customize text
hjust = 1, x = 1, face = "italic", size = 10), # horizontal justification: 0= left align, 1= right align
left = text_grob("Figure arranged using ggpubr", color = "green", rot = 90), # rot = angle to rotate the text
right = "I'm done, thanks :-)!",
fig.lab = "Figure 1", fig.lab.face = "bold"
)
final_scatter2
#Export plot
ggsave(mean_length.pdf, width = 5, height = 5, units = "in")
##References
# http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/78-perfect-scatter-plots-with-correlation-and-marginal-histograms/
# http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/81-ggplot2-easy-way-to-mix-multiple-graphs-on-the-same-page/#change-columnrow-span-of-a-plot
# http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/77-facilitating-exploratory-data-visualization-application-to-tcga-genomic-data/
# http://www.sthda.com/english/wiki/be-awesome-in-ggplot2-a-practical-guide-to-be-highly-effective-r-software-and-data-visualization#blog-posts