Skip to content

Commit

Permalink
Added visualization functions
Browse files Browse the repository at this point in the history
  • Loading branch information
almeidasilvaf committed Feb 3, 2024
1 parent e409176 commit 16ac02e
Show file tree
Hide file tree
Showing 25 changed files with 823 additions and 58 deletions.
18 changes: 18 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(classify_gene_pairs)
export(classify_genes)
export(duplicates2counts)
export(find_ks_peaks)
export(get_anchors_list)
export(get_intron_counts)
Expand All @@ -10,7 +11,10 @@ export(get_tandem_proximal)
export(get_transposed)
export(get_transposed_classes)
export(pairs2kaks)
export(plot_duplicate_freqs)
export(plot_ks_distro)
export(plot_ks_peaks)
export(plot_rates_by_species)
export(split_pairs_by_peak)
importFrom(AnnotationDbi,select)
importFrom(BiocParallel,SerialParam)
Expand All @@ -20,15 +24,29 @@ importFrom(GenomicFeatures,intronsByTranscript)
importFrom(GenomicRanges,GRangesList)
importFrom(MSA2dist,dnastring2kaks)
importFrom(ggplot2,aes)
importFrom(ggplot2,after_stat)
importFrom(ggplot2,element_blank)
importFrom(ggplot2,facet_grid)
importFrom(ggplot2,facet_wrap)
importFrom(ggplot2,geom_bar)
importFrom(ggplot2,geom_boxplot)
importFrom(ggplot2,geom_density)
importFrom(ggplot2,geom_histogram)
importFrom(ggplot2,geom_violin)
importFrom(ggplot2,geom_vline)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggplot_build)
importFrom(ggplot2,labs)
importFrom(ggplot2,scale_fill_manual)
importFrom(ggplot2,scale_x_continuous)
importFrom(ggplot2,scale_y_continuous)
importFrom(ggplot2,stat_function)
importFrom(ggplot2,theme)
importFrom(ggplot2,theme_bw)
importFrom(ggplot2,vars)
importFrom(mclust,densityMclust)
importFrom(rlang,.data)
importFrom(stats,density)
importFrom(stats,dnorm)
importFrom(syntenet,interspecies_synteny)
importFrom(syntenet,intraspecies_synteny)
Expand Down
15 changes: 9 additions & 6 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,16 @@
"cds_scerevisiae"


#' Duplicate pairs and Ka, Ks, and Ka/Ks values for S. cerevisiae
#' Duplicate pairs and Ka, Ks, and Ka/Ks values for fungi species
#'
#' This data set was obtained with \code{classify_gene_pairs()} followed
#' by \code{pairs2kaks()}.
#'
#' @name scerevisiae_kaks
#' @format A data frame with the following variables:
#' @name fungi_kaks
#' @format A list of data frame with elements
#' named \strong{saccharomyces_cerevisiae}, \strong{candida_glabrata},
#' and \strong{schizosaccharomyces_pombe}. Each data frame contains
#' the following variables:
#' \describe{
#' \item{dup1}{Character, duplicated gene 1.}
#' \item{dup2}{Character, duplicated gene 2.}
Expand All @@ -83,9 +86,9 @@
#' \item{type}{Character, mode of duplication}
#' }
#' @examples
#' data(scerevisiae_kaks)
#' @usage data(scerevisiae_kaks)
"scerevisiae_kaks"
#' data(fungi_kaks)
#' @usage data(fungi_kaks)
"fungi_kaks"


#' Duplicate pairs and Ks values for Glycine max
Expand Down
3 changes: 2 additions & 1 deletion R/duplicate_classification.R
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,8 @@ classify_gene_pairs <- function(
#' @export
#' @importFrom GenomicRanges GRangesList
#' @examples
#' data(scerevisiae_kaks)
#' data(fungi_kaks)
#' scerevisiae_kaks <- fungi_kaks$saccharomyces_cerevisiae
#'
#' cols <- c("dup1", "dup2", "type")
#' gene_pairs_list <- list(Scerevisiae = scerevisiae_kaks[, cols])
Expand Down
6 changes: 4 additions & 2 deletions R/ka_ks_analyses.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ pairs2kaks <- function(
#' @export
#' @rdname find_ks_peaks
#' @examples
#' data(scerevisiae_kaks)
#' data(fungi_kaks)
#' scerevisiae_kaks <- fungi_kaks$saccharomyces_cerevisiae
#' ks <- scerevisiae_kaks$Ks
#'
#' # Find 2 peaks in Ks distribution
Expand Down Expand Up @@ -205,7 +206,8 @@ find_ks_peaks <- function(ks, npeaks = 2, min_ks = 0.01, max_ks = 4,
#' @export
#' @rdname split_pairs_by_peak
#' @examples
#' data(scerevisiae_kaks)
#' data(fungi_kaks)
#' scerevisiae_kaks <- fungi_kaks$saccharomyces_cerevisiae
#'
#' # Create a data frame of duplicate pairs and Ks values
#' ks_df <- scerevisiae_kaks[, c("dup1", "dup2", "Ks")]
Expand Down
63 changes: 62 additions & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ get_intron_counts <- function(txdb) {
#' @noRd
#' @rdname find_intersect_mixtures
#' @examples
#' data(scerevisiae_kaks)
#' data(fungi_kaks)
#' scerevisiae_kaks <- fungi_kaks$saccharomyces_cerevisiae
#' ks <- scerevisiae_kaks$Ks
#'
#' # Find 2 peaks in Ks distribution
Expand Down Expand Up @@ -239,4 +240,64 @@ find_intersect_mixtures <- function(peaks) {
}


#' Get a duplicate count matrix for each genome
#'
#' @param duplicate_list A list of data frames with the duplicated genes or
#' gene pairs and their modes of duplication as returned
#' by \code{classify_gene_pairs()} or \code{classify_genes()}.
#' @param shape Character specifying the shape of the output data frame.
#' One of "long" (data frame in the long shape, in the tidyverse sense),
#' or "wide" (data frame in the wide shape, in the tidyverse sense).
#' Default: "long".
#'
#' @return If \strong{shape = "wide"}, a count matrix containing the
#' frequency of duplicated genes (or gene pairs) by mode for each species,
#' with species in rows and duplication modes in columns.
#' If \strong{shape = "long"}, a data frame in long format with the following
#' variables:
#' \describe{
#' \item{type}{Factor, type of duplication.}
#' \item{n}{Numeric, number of duplicates.}
#' \item{species}{Character, species name}
#' }
#'
#' @export
#' @rdname duplicates2counts
#' @examples
#' data(fungi_kaks)
#'
#' # Get unique duplicates
#' duplicate_list <- classify_genes(fungi_kaks)
#'
#' # Get count table
#' counts <- duplicates2counts(duplicate_list)
duplicates2counts <- function(duplicate_list, shape = "long") {

# Get factor levels for variable `type`
tlevels <- lapply(duplicate_list, function(x) return(levels(x$type)))
tlevels <- tlevels[[names(sort(lengths(tlevels), decreasing = TRUE)[1])]]

counts <- Reduce(rbind, lapply(seq_along(duplicate_list), function(x) {

species <- names(duplicate_list)[x]

dup_table <- duplicate_list[[x]]
dup_table$type <- factor(dup_table$type, levels = tlevels)

if(shape == "long") {
final_dups <- as.data.frame(table(dup_table$type))
names(final_dups) <- c("type", "n")
final_dups$species <- species
} else if(shape == "wide") {
final_dups <- t(as.matrix(table(dup_table$type)))
final_dups <- cbind(species, as.data.frame(final_dups))
} else {
stop("Argument 'format' must be one of 'long' or 'wide'.")
}

return(final_dups)
}))

return(counts)
}

9 changes: 6 additions & 3 deletions R/utils_duplicate_classification.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ get_segmental <- function(anchor_pairs = NULL, pairs = NULL) {
#' @examples
#' data(yeast_annot)
#' data(yeast_seq)
#' data(scerevisiae_kaks)
#' data(fungi_kaks)
#' scerevisiae_kaks <- fungi_kaks$saccharomyces_cerevisiae
#'
#' # Get processed annotation for S. cerevisiae
#' pdata <- annotation <- syntenet::process_input(yeast_seq, yeast_annot)
Expand Down Expand Up @@ -191,7 +192,8 @@ get_tandem_proximal <- function(
#' data(diamond_intra)
#' data(yeast_seq)
#' data(yeast_annot)
#' data(scerevisiae_kaks)
#' data(fungi_kaks)
#' scerevisiae_kaks <- fungi_kaks$saccharomyces_cerevisiae
#'
#' # Get processed annotation
#' pdata <- syntenet::process_input(yeast_seq, yeast_annot)
Expand Down Expand Up @@ -307,7 +309,8 @@ get_transposed <- function(
#' data(diamond_intra)
#' data(yeast_seq)
#' data(yeast_annot)
#' data(scerevisiae_kaks)
#' data(fungi_kaks)
#' scerevisiae_kaks <- fungi_kaks$saccharomyces_cerevisiae
#'
#' # Get processed annotation
#' pdata <- syntenet::process_input(yeast_seq, yeast_annot)
Expand Down
Loading

0 comments on commit 16ac02e

Please sign in to comment.