diff --git a/DESCRIPTION b/DESCRIPTION index 013b6e0..fd0fce0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -67,6 +67,7 @@ Suggests: testthat (>= 3.0.0), knitr, feature, + patchwork, BiocStyle, rmarkdown, covr, diff --git a/R/ka_ks_analyses.R b/R/ka_ks_analyses.R index b882058..f6b53eb 100644 --- a/R/ka_ks_analyses.R +++ b/R/ka_ks_analyses.R @@ -33,9 +33,10 @@ #' annot <- pdata$annotation["Scerevisiae"] #' #' # Binary classification scheme -#' gene_pairs_list <- classify_gene_pairs(annot, blast_list) +#' pairs <- classify_gene_pairs(annot, blast_list) +#' td_pairs <- pairs[[1]][pairs[[1]]$type == "TD", ] #' gene_pairs_list <- list( -#' Scerevisiae = gene_pairs_list[[1]][seq(1, 5, by = 1), ] +#' Scerevisiae = td_pairs[seq(1, 3, by = 1), ] #' ) #' #' cds <- list(Scerevisiae = cds_scerevisiae) diff --git a/data/cds_scerevisiae.rda b/data/cds_scerevisiae.rda index 5206a71..bf0b5c9 100644 Binary files a/data/cds_scerevisiae.rda and b/data/cds_scerevisiae.rda differ diff --git a/data/fungi_kaks.rda b/data/fungi_kaks.rda index c0648f4..be110e5 100644 Binary files a/data/fungi_kaks.rda and b/data/fungi_kaks.rda differ diff --git a/data/gmax_ks.rda b/data/gmax_ks.rda index 1d2083d..42a56f2 100644 Binary files a/data/gmax_ks.rda and b/data/gmax_ks.rda differ diff --git a/man/pairs2kaks.Rd b/man/pairs2kaks.Rd index 99968fc..0e03b02 100644 --- a/man/pairs2kaks.Rd +++ b/man/pairs2kaks.Rd @@ -46,9 +46,10 @@ pdata <- syntenet::process_input(yeast_seq, yeast_annot) annot <- pdata$annotation["Scerevisiae"] # Binary classification scheme -gene_pairs_list <- classify_gene_pairs(annot, blast_list) +pairs <- classify_gene_pairs(annot, blast_list) +td_pairs <- pairs[[1]][pairs[[1]]$type == "TD", ] gene_pairs_list <- list( - Scerevisiae = gene_pairs_list[[1]][seq(1, 5, by = 1), ] + Scerevisiae = td_pairs[seq(1, 3, by = 1), ] ) cds <- list(Scerevisiae = cds_scerevisiae) diff --git a/tests/testthat/test-ka_ks_analyses.R b/tests/testthat/test-ka_ks_analyses.R index 2a398f7..10a2ea6 100644 --- a/tests/testthat/test-ka_ks_analyses.R +++ b/tests/testthat/test-ka_ks_analyses.R @@ -14,8 +14,11 @@ scerevisiae_kaks <- fungi_kaks$saccharomyces_cerevisiae pdata <- syntenet::process_input(yeast_seq, yeast_annot) annot <- pdata$annotation["Scerevisiae"] -gene_pairs_list <- classify_gene_pairs(annot, blast_list) -gene_pairs_list <- list(Scerevisiae = gene_pairs_list[[1]][1:2, ]) +pairs <- classify_gene_pairs(annot, blast_list) +td_pairs <- pairs[[1]][pairs[[1]]$type == "TD", ] +gene_pairs_list <- list( + Scerevisiae = td_pairs[seq(1, 3, by = 1), ] +) cds <- list(Scerevisiae = cds_scerevisiae) @@ -23,8 +26,8 @@ ks <- scerevisiae_kaks$Ks ## Simulate gene with CDS that is not a multiple of 3 cds2 <- cds -cds2$Scerevisiae$YGR032W <- Biostrings::subseq( - cds2$Scerevisiae$YGR032W, 1, length(cds2$Scerevisiae$YGR032W)-1 +cds2$Scerevisiae$Q0055 <- Biostrings::subseq( + cds2$Scerevisiae$Q0055, 1, length(cds2$Scerevisiae$Q0055) - 1 ) #----Start tests---------------------------------------------------------------- @@ -35,7 +38,7 @@ test_that("pairs2kaks() returns a data frame with Ka, Ks, and Ka/Ks", { expect_equal(class(kaks), "list") expect_equal(class(kaks[[1]]), "data.frame") - expect_equal(nrow(kaks[[1]]), 2) + expect_equal(nrow(kaks[[1]]), nrow(gene_pairs_list[[1]])) expect_true("Ks" %in% names(kaks[[1]])) expect_true("Ka" %in% names(kaks[[1]])) expect_true("Ka_Ks" %in% names(kaks[[1]])) diff --git a/vignettes/doubletrouble_vignette.Rmd b/vignettes/doubletrouble_vignette.Rmd index ed0b645..bcc642c 100644 --- a/vignettes/doubletrouble_vignette.Rmd +++ b/vignettes/doubletrouble_vignette.Rmd @@ -378,7 +378,7 @@ head(intron_counts$Scerevisiae) ``` Finally, we can use this list to classify duplicates using the full scheme -as folows: +as follows: ```{r} # Full scheme @@ -446,7 +446,7 @@ implements all codon models in KaKs_Calculator 2.0 [@wang2010kaks_calculator]. For the purpose of demonstration, we will only calculate $K_a$, $K_s$, -and $K_a/K_s$ for 5 SD-derived gene pairs. The CDS for SD-derived +and $K_a/K_s$ for 5 TD-derived gene pairs. The CDS for TD-derived genes were obtained from Ensembl Fungi [@yates2022ensembl], and they are stored in `cds_scerevisiae`. @@ -457,8 +457,9 @@ head(cds_scerevisiae) # Store DNAStringSet object in a list cds_list <- list(Scerevisiae = cds_scerevisiae) -# Keep only top give gene pairs for demonstration purposes -gene_pairs <- list(Scerevisiae = c_full$Scerevisiae[seq(1, 5, by = 1), ]) +# Keep only top five TD-derived gene pairs for demonstration purposes +td_pairs <- c_full$Scerevisiae[c_full$Scerevisiae$type == "TD", ] +gene_pairs <- list(Scerevisiae = td_pairs[seq(1, 5, by = 1), ]) # Calculate Ka, Ks, and Ka/Ks kaks <- pairs2kaks(gene_pairs, cds_list) @@ -630,21 +631,25 @@ type with the function `plot_duplicate_freqs()`. You can visualize frequencies in three different ways, as demonstrated below. ```{r} -# 1) Facets -plot_duplicate_freqs(counts_table) +# A) Facets +p1 <- plot_duplicate_freqs(counts_table) -# 2) Stacked barplot, absolute frequencies -plot_duplicate_freqs(counts_table, plot_type = "stack") +# B) Stacked barplot, absolute frequencies +p2 <- plot_duplicate_freqs(counts_table, plot_type = "stack") -# 3) Stacked barplot, relative frequencies -plot_duplicate_freqs(counts_table, plot_type = "stack_percent") +# C) Stacked barplot, relative frequencies +p3 <- plot_duplicate_freqs(counts_table, plot_type = "stack_percent") + +# Combine plots, one per row +patchwork::wrap_plots(p1, p2, p3, nrow = 3) + + patchwork::plot_annotation(tag_levels = "A") ``` If you want to visually the frequency of duplicated **genes** (not gene pairs), you'd first need to classify genes into unique modes of duplication with `classify_genes()`, and then repeat the code above. For example: -```{r} +```{r fig.height = 3, fig.width = 8} # Frequency of duplicated genes by mode classify_genes(fungi_kaks) |> # classify genes into unique duplication types duplicates2counts() |> # get a data frame of counts (long format) @@ -656,17 +661,21 @@ classify_genes(fungi_kaks) |> # classify genes into unique duplication types As briefly demonstrated before, to plot a $K_s$ distribution for the whole paranome, you will use the function `plot_ks_distro()`. -```{r} +```{r fig.height=3, fig.width=9} ks_df <- fungi_kaks$saccharomyces_cerevisiae -# 1) Histogram, whole paranome -plot_ks_distro(ks_df, plot_type = "histogram") +# A) Histogram, whole paranome +p1 <- plot_ks_distro(ks_df, plot_type = "histogram") + +# B) Density, whole paranome +p2 <- plot_ks_distro(ks_df, plot_type = "density") -# 2) Density, whole paranome -plot_ks_distro(ks_df, plot_type = "density") +# C) Histogram with density lines, whole paranome +p3 <- plot_ks_distro(ks_df, plot_type = "density_histogram") -# 3) Histogram with density lines, whole paranome -plot_ks_distro(ks_df, plot_type = "density_histogram") +# Combine plots side by side +patchwork::wrap_plots(p1, p2, p3, nrow = 1) + + patchwork::plot_annotation(tag_levels = "A") ``` However, visualizing the distribution for the whole paranome can mask patterns @@ -677,12 +686,16 @@ whether syntenic genes cluster together, suggesting the presence of WGD history. To visualize the distribution by duplication type, use `bytype = TRUE` in `plot_ks_distro()`. -```{r} -# 1) Duplicates by type, histogram -plot_ks_distro(ks_df, bytype = TRUE, plot_type = "histogram") +```{r fig.width = 8, fig.height=4} +# A) Duplicates by type, histogram +p1 <- plot_ks_distro(ks_df, bytype = TRUE, plot_type = "histogram") + +# B) Duplicates by type, violin +p2 <- plot_ks_distro(ks_df, bytype = TRUE, plot_type = "violin") -# 2) Duplicates by type, violin -plot_ks_distro(ks_df, bytype = TRUE, plot_type = "violin") +# Combine plots side by side +patchwork::wrap_plots(p1, p2) + + patchwork::plot_annotation(tag_levels = "A") ``` ## Visualizing substitution rates by species @@ -692,12 +705,16 @@ substitution rates ($K_s$, $K_a$, or their ratio $K_a/K_s$) by species. You can choose which rate you want to visualize, and whether or not to group gene pairs by duplication mode, as demonstrated below. -```{r} -# Ks for each species -plot_rates_by_species(fungi_kaks) +```{r fig.width = 6, fig.height = 4} +# A) Ks for each species +p1 <- plot_rates_by_species(fungi_kaks) + +# B) Ka/Ks by duplication type for each species +p2 <- plot_rates_by_species(fungi_kaks, rate_column = "Ka_Ks", bytype = TRUE) -# Ka/Ks by duplication type for each species -plot_rates_by_species(fungi_kaks, rate_column = "Ka_Ks", bytype = TRUE) +# Combine plots - one per row +patchwork::wrap_plots(p1, p2, nrow = 2) + + patchwork::plot_annotation(tag_levels = "A") ``` # Session information {.unnumbered}