Skip to content

Commit

Permalink
Update gene_set_enrichment_plot() to use ComplexHeatmap & annotations…
Browse files Browse the repository at this point in the history
…, improve docs
  • Loading branch information
lahuuki committed Dec 13, 2024
1 parent d000478 commit 606924e
Showing 1 changed file with 241 additions and 89 deletions.
330 changes: 241 additions & 89 deletions R/gene_set_enrichment_plot.R
Original file line number Diff line number Diff line change
@@ -1,30 +1,47 @@
#' Plot the gene set enrichment results
#' Plot the gene set enrichment results with ComplexHeatmap
#'
#' This function takes the output of [gene_set_enrichment()] and creates a
#' heatmap visualization of the results.
#' ComplexHeatmap visualization of the results. Fill of the heatmap represents
#' the -log10(p-val), Odds-ratios are printed for test that pass specified
#' significance threshold `ORcut`.
#'
#' Includes functionality to plot the size of the input gene sets as barplot
#' annotations.
#'
#' @param enrichment The output of [gene_set_enrichment()].
#' @param xlabs A vector of names in the same order and length as
#' `unique(enrichment$ID)`. Gets passed to [layer_matrix_plot()].
#' `unique(enrichment$ID)`.
#' @param PThresh A `numeric(1)` specifying the P-value threshold for the
#' maximum value in the `-log10(p)` scale.
#' @param ORcut A `numeric(1)` specifying the P-value threshold for the
#' minimum value in the `-log10(p)` scale for printing the odds ratio values
#' in the cells of the resulting plot.
#' in the cells of the resulting plot. Defaults to 3 or p-val < 0.001.
#' @param enrichOnly A `logical(1)` indicating whether to show only odds ratio
#' values greater than 1.
#' @param layerHeights A `numeric()` vector of length equal to
#' `length(unique(enrichment$test)) + 1` that starts at 0 specifying where
#' to plot the y-axis breaks which can be used for re-creating the length of
#' each brain layer. Gets passed to [layer_matrix_plot()].
#' @param mypal A vector with the color palette to use. Gets passed to
#' [layer_matrix_plot()].
#' @param cex Passed to [layer_matrix_plot()].
#' @param mypal A `character` vector with the color palette to use. Colors will
#' be in order from 0 to lowest P-val `max(-log(enrichment$Pval))`. Defaults to
#' white, yellow, red pallet.
#' @param plot_SetSize_bar A `logical(1)` indicating whether to plot SetSize
#' from `enrichment` as an `anno_barplot` at the top of the heatmap.
#' @param gene_list_length Optional named `numeric` vector indicating the length
#' of the `gene_list` used to calculate `enrichment`, if inclided and
#' `plot_setSize_bar = TRUE` then the top `anno_barplot` will show the `SetSize`
#' and the difference from the length of the input gene_list.
#' #' @param model_sig_length Optional named `numeric` vector indicating the
#' number of significant genes in `modeling_results` used to calculate
#' `enrichment`. If included `anno_barplot` will be added to rows.
#' #' @param model_colors named `character` vector of colors, Adds colors to
#' row annotations.
#' #' @param ... Additional parameters passed to
#' [ComplexHeatmap::Heatmap()][ComplexHeatmap::Heatmap()].
#'
#' @return A plot visualizing the gene set enrichment
#' odds ratio and p-value results.
#' @return A ([Heatmap-class][ComplexHeatmap::Heatmap-class]) visualizing the
#' gene set enrichment odds ratio and p-value results.
#' @export
#' @importFrom stats reshape
#' @importFrom circlize colorRamp2
#' @importFrom ComplexHeatmap columnAnnotation rowAnnotation Heatmap anno_barplot
#'
#' @family Gene set enrichment functions
#' @author Andrew E Jaffe, Leonardo Collado-Torres
#' @seealso layer_matrix_plot
Expand All @@ -45,7 +62,7 @@
#' )
#'
#' ## Format them appropriately
#' asd_sfari_geneList <- list(
#' asd_safari_geneList <- list(
#' Gene_SFARI_all = asd_sfari$ensembl.id,
#' Gene_SFARI_high = asd_sfari$ensembl.id[asd_sfari$gene.score < 3],
#' Gene_SFARI_syndromic = asd_sfari$ensembl.id[asd_sfari$syndromic == 1]
Expand All @@ -58,98 +75,233 @@
#'
#' ## Compute the gene set enrichment results
#' asd_sfari_enrichment <- gene_set_enrichment(
#' gene_list = asd_sfari_geneList,
#' gene_list = asd_safari_geneList,
#' modeling_results = modeling_results,
#' model_type = "enrichment"
#' )
#'
#' ## Visualize the gene set enrichment results
#' ## with a custom color palette
#'
#' ## Default plot
#' gene_set_enrichment_plot(
#' enrichment = asd_sfari_enrichment
#' )
#'
#' ## Use a custom green color palette & use shorter gene set names (x-axis labels)
#' gene_set_enrichment_plot(
#' asd_sfari_enrichment,
#' xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)),
#' mypal = c("white",RColorBrewer::brewer.pal(9, "BuGn"))
#' )
#'
#' ## Add bar plot annotations for SetSize of model genes in the gene_lists
#' gene_set_enrichment_plot(
#' asd_sfari_enrichment,
#' xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)),
#' plot_SetSize_bar = TRUE
#' )
#'
#' ## Add stacked bar plot annotations showing SetSize and difference from the
#' ## length of the input gene_list
#' gene_set_enrichment_plot(
#' asd_sfari_enrichment,
#' xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)),
#' mypal = c(
#' "white",
#' grDevices::colorRampPalette(
#' RColorBrewer::brewer.pal(9, "BuGn")
#' )(50)
#' )
#' plot_SetSize_bar = TRUE,
#' gene_list_length = lapply(asd_safari_geneList, length)
#' )
#'
#' ## add bar plot annotations for number of enriched genes from layers
#' if (!exists("sce_layer")) sce_layer <- fetch_data(type = "sce_layer")
#' sig_genes <- sig_genes_extract(
#' modeling_results = modeling_results,
#' model = "enrichment",
#' sce_layer = sce_layer,
#' n = nrow(sce_layer)
#' )
#'
#' sig_genes <- sig_genes[sig_genes$fdr < 0.1,]
#' n_sig_model <- as.list(table(sig_genes$test))
#'
#' ## add barplot with n significant genes from modeling
#' gene_set_enrichment_plot(
#' asd_sfari_enrichment,
#' xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)),
#' plot_SetSize_bar = TRUE,
#' model_sig_length = n_sig_model
#' )
#'
#' ## Specify the layer heights so it resembles more the length of each
#' ## layer in the brain
#'## add color annotaions
#' gene_set_enrichment_plot(
#' asd_sfari_enrichment,
#' xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)),
#' layerHeights = c(0, 40, 55, 75, 85, 110, 120, 135),
#' plot_SetSize_bar = TRUE,
#' model_colors = libd_layer_colors
#' )
#'
#' ## add barplot with n significant genes from modeling filled with model color
#' gene_set_enrichment_plot(
#' asd_sfari_enrichment,
#' xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)),
#' plot_SetSize_bar = TRUE,
#' model_sig_length = n_sig_model,
#' model_colors = libd_layer_colors
#' )
#'
gene_set_enrichment_plot <-
function(enrichment,
function(
enrichment,
xlabs = unique(enrichment$ID),
PThresh = 12,
ORcut = 3,
enrichOnly = FALSE,
layerHeights = c(0, seq_len(length(unique(enrichment$test)))) * 15,
mypal = c(
"white",
grDevices::colorRampPalette(RColorBrewer::brewer.pal(9, "YlOrRd"))(50)
),
cex = 1.2) {
## Re-order and shorten names if they match our data
if (all(unique(enrichment$test) %in% c("WM", paste0("Layer", seq_len(6))))) {
enrichment$test <-
factor(gsub("ayer", "", enrichment$test), levels = rev(c(paste0(
"L", seq_len(6)
), "WM")))
}

## Check inputs
stopifnot(is(enrichment, "data.frame"))
stopifnot(all(c("ID", "test", "OR", "Pval") %in% colnames(enrichment)))
stopifnot(length(layerHeights) == length(unique(enrichment$test)) + 1)
stopifnot(ORcut <= PThresh)
stopifnot(length(xlabs) == length(unique(enrichment$ID)))

## Convert to -log10 scale and threshold the pvalues
enrichment$log10_P_thresh <-
round(-log10(enrichment$Pval), 2)
enrichment$log10_P_thresh[which(enrichment$log10_P_thresh > PThresh)] <-
PThresh

## Change some values for the plot
if (enrichOnly) {
enrichment$log10_P_thresh[enrichment$OR < 1] <- 0
}
enrichment$OR_char <- as.character(round(enrichment$OR, 2))
enrichment$OR_char[enrichment$log10_P_thresh < ORcut] <- ""

## Make into wide matrices
make_wide <- function(var = "OR_char") {
res <-
reshape(
enrichment,
idvar = "ID",
timevar = "test",
direction = "wide",
drop = colnames(enrichment)[!colnames(enrichment) %in% c("ID", "test", var)],
sep = "_mypattern_"
)[, -1, drop = FALSE]
colnames(res) <-
gsub(".*_mypattern_", "", colnames(res))
rownames(res) <- unique(enrichment$ID)
res <- res[, levels(as.factor(enrichment$test))]
t(res)
}
wide_or <- make_wide("OR_char")
wide_p <- make_wide("log10_P_thresh")

layer_matrix_plot(
matrix_values = wide_p,
matrix_labels = wide_or,
xlabs = xlabs,
layerHeights = layerHeights,
mypal = mypal,
cex = cex,
mar = c(12, 4 + (max(nchar(rownames(wide_p))) %/% 3) * 0.5, 4, 2) + 0.1
)
mypal = c("white", RColorBrewer::brewer.pal(9, "YlOrRd")),
plot_SetSize_bar = FALSE,
gene_list_length = NULL,
model_sig_length = NULL,
model_colors = NULL,
...
){
## Re-order and shorten names if they match our data
if (all(unique(enrichment$test) %in% c("WM", paste0("Layer", seq_len(6))))) {
enrichment$test <-
factor(gsub("ayer", "", enrichment$test), levels = rev(c(paste0(
"L", seq_len(6)
), "WM")))
}

## Check inputs
stopifnot(is(enrichment, "data.frame"))
stopifnot(all(c("ID", "test", "OR", "Pval") %in% colnames(enrichment)))
stopifnot(ORcut <= PThresh)
stopifnot(length(xlabs) == length(unique(enrichment$ID)))

## Convert to -log10 scale and threshold the pvalues
enrichment$log10_P_thresh <-
round(-log10(enrichment$Pval), 2)
enrichment$log10_P_thresh[which(enrichment$log10_P_thresh > PThresh)] <-
PThresh

## Change some values for the plot
if (enrichOnly) {
enrichment$log10_P_thresh[enrichment$OR < 1] <- 0
}
enrichment$OR_char <- as.character(round(enrichment$OR, 2))
enrichment$OR_char[enrichment$log10_P_thresh < ORcut] <- ""

## sub xlabs labels
if(!is.null(gene_list_length)){
stopifnot(setequal(names(gene_list_length), unique(enrichment$ID)))
gene_list_length <- gene_list_length[unique(enrichment$ID)]
names(gene_list_length) <- xlabs
}

for(i in seq(length(xlabs))){
enrichment$ID <- gsub(unique(enrichment$ID)[[i]], xlabs[[i]], enrichment$ID)
}

## Make into wide matrices
make_wide <- function(var = "OR_char") {
res <-
reshape(
enrichment,
idvar = "ID",
timevar = "test",
direction = "wide",
drop = colnames(enrichment)[!colnames(enrichment) %in% c("ID", "test", var)],
sep = "_mypattern_"
)[, -1, drop = FALSE]
colnames(res) <-
gsub(".*_mypattern_", "", colnames(res))
rownames(res) <- unique(enrichment$ID)
res <- res[, levels(as.factor(enrichment$test))]
t(res)
}
wide_or <- make_wide("OR_char")
wide_p <- make_wide("log10_P_thresh")

## define color pallet
mypal = circlize::colorRamp2(breaks = seq(0, max(wide_p), length.out = length(mypal)),
colors = mypal)

## Add gene count annotations
enrichment_setsize <- unique(enrichment[,c("ID", "SetSize")])

## COL annotations
if(plot_SetSize_bar){

if(!is.null(gene_list_length)){
stopifnot(all(colnames(wide_p) %in% names(gene_list_length)))
enrichment_setsize$SetInput <- unlist(gene_list_length[enrichment_setsize$ID])
enrichment_setsize$Diff <- enrichment_setsize$SetInput - enrichment_setsize$SetSize
}

rownames(enrichment_setsize) <- enrichment_setsize$ID
enrichment_setsize$ID <- NULL
enrichment_setsize$SetInput <- NULL ## only plot SetSize + Diff

col_gene_anno <- ComplexHeatmap::columnAnnotation(
`SetSize` = ComplexHeatmap::anno_barplot(enrichment_setsize)
)

} else col_gene_anno <- NULL

if(!is.null(model_colors)){
## shorten names if they match HumanPilot data
if (all(c("WM", paste0("Layer", seq_len(6))) %in% names(model_colors))) {
names(model_colors) <-gsub("ayer", "", names(model_colors))
}}


## ROW annotations
if(!is.null(model_sig_length)){ ## add row barplot annotation

## shorten names if they match HumanPilot data
if (all(names(model_sig_length) %in% c("WM", paste0("Layer", seq_len(6))))) {
names(model_sig_length) <-gsub("ayer", "", names(model_sig_length))
}

stopifnot(all(rownames(wide_p) %in% names(model_sig_length)))
model_sig_length <- t(data.frame(model_sig_length))

if(!is.null(model_colors)){ ## barplot with colors
row_gene_anno <- ComplexHeatmap::rowAnnotation(
`n\nmodel sig` = ComplexHeatmap::anno_barplot(model_sig_length[rownames(wide_p), ],
gp = gpar(fill = model_colors[rownames(wide_p)])
)
# annotation_label = anno_title_row

)
} else { ## barplot no colors
row_gene_anno <- ComplexHeatmap::rowAnnotation(
`model sig` = ComplexHeatmap::anno_barplot(model_sig_length[rownames(wide_p), ])
# annotation_label = anno_title_row
)
}

} else if(!is.null(model_colors)){ ## only apply color annotation

stopifnot(all(rownames(wide_p) %in% names(model_colors)))
model_colors <- model_colors[rownames(wide_p)]

row_gene_anno <- ComplexHeatmap::rowAnnotation(
" " = rownames(wide_p),
col = list(" " = model_colors),
show_legend = FALSE
)

}else row_gene_anno <- NULL

ComplexHeatmap::Heatmap(wide_p,
col = mypal,
name = "-log10(p-val)",
rect_gp = grid::gpar(col = "black", lwd = 1),
cluster_rows = FALSE,
cluster_columns = FALSE,
right_annotation = row_gene_anno,
top_annotation = col_gene_anno,
cell_fun = function(j, i, x, y, width, height, fill) {
grid::grid.text(wide_or[i, j], x, y, gp = grid::gpar(fontsize = 10))
},
...
)
}

0 comments on commit 606924e

Please sign in to comment.