Skip to content

Commit

Permalink
Added a check for gene ID matching between gene pairs and CDS + updat…
Browse files Browse the repository at this point in the history
…ed vignette accordingly
  • Loading branch information
almeidasilvaf committed Oct 2, 2024
1 parent 0e3c5f0 commit 7765ce7
Show file tree
Hide file tree
Showing 15 changed files with 136 additions and 278 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,5 @@ importFrom(stats,density)
importFrom(stats,dnorm)
importFrom(syntenet,interspecies_synteny)
importFrom(syntenet,intraspecies_synteny)
importFrom(utils,head)
importFrom(utils,read.table)
1 change: 1 addition & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@
#' \item{dup1}{Character, duplicated gene 1.}
#' \item{dup2}{Character, duplicated gene 2.}
#' \item{Ks}{Numeric, Ks values.}
#' \item{type}{Factor, duplication mode.}
#' }
#' @examples
#' data(gmax_ks)
Expand Down
37 changes: 37 additions & 0 deletions R/data_validation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@

#' Check if gene names in set 1 are present in set 2
#'
#' @param ref_ids Character vector of reference gene set.
#' @param test_ids Character vector of test gene set.
#' @param setnames Character vector of length with set names.
#' Default: \code{c("gene pairs", "CDS")}
#'
#' @return TRUE if names match, otherwise an error is shown.
#' @importFrom utils head
#' @details
#' This internal function can be used, for instance, to check if CDS names
#' match gene IDs in the gene pair list.
#' @noRd
check_geneid_match <- function(
ref_ids, test_ids, setnames = c("gene pairs", "CDS")
) {

mismatch_ids <- ref_ids[!ref_ids %in% test_ids]
mismatch_perc <- length(mismatch_ids) / length(ref_ids)
mismatch_perc <- round(mismatch_perc * 100, 2)

if(mismatch_perc >0) {
stop(
mismatch_perc, "%", " (N=", length(mismatch_ids), ") of the IDs in ", setnames[1],
" were not found in ", setnames[2], ".\n",
"All gene IDs in ", setnames[1], " must be in ", setnames[2],
". Did you check if gene IDs match?",
"\n\nHere are some examples of nonmatching IDs (from ", setnames[1], ") :\n",
paste0(head(mismatch_ids, n = 5), collapse = "\n"),
"\n\nAnd here are some examples of IDs in ", setnames[2], ":\n",
paste0(head(test_ids, n = 5), collapse = "\n")
)
}

return(TRUE)
}
14 changes: 12 additions & 2 deletions R/ka_ks_analyses.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#' "YN", "MYN", "MS", "MA", "GNG", "GLWL", "GLPB", "GMLWL", "GMLPB", "GYN",
#' and "GMYN". Default: "MYN".
#' @param threads Numeric indicating the number of threads to use. Default: 1.
#' @param verbose Logical indicating whether progress messages should be
#' printed on screen. Default: FALSE.
#'
#' @return A list of data frames containing gene pairs and their Ka, Ks,
#' and Ka/Ks values.
Expand Down Expand Up @@ -42,16 +44,24 @@
#'
#' kaks <- pairs2kaks(gene_pairs_list, cds)
#'
pairs2kaks <- function(gene_pairs_list, cds, model = "MYN", threads = 1) {
pairs2kaks <- function(
gene_pairs_list, cds, model = "MYN", threads = 1, verbose = FALSE
) {

kaks_list <- lapply(seq_along(gene_pairs_list), function(x) {

# Get pairs for species x
# Get pairs and CDS for species x
species <- names(gene_pairs_list)[x]
if(verbose) { message("Calculating rates for species '", species, "'") }

pairs <- gene_pairs_list[[x]]
names(pairs)[c(1, 2)] <- c("dup1", "dup2")
pairs$dup1 <- gsub("^[a-zA-Z]{2,5}_", "", pairs$dup1)
pairs$dup2 <- gsub("^[a-zA-Z]{2,5}_", "", pairs$dup2)
fcds <- cds[[species]]

# Check if IDs in pairs are all present in CDS
c1 <- check_geneid_match(unique(c(pairs$dup1, pairs$dup2)), names(fcds))

# Remove CDS that are not multiple of 3
fcds <- cds[[species]]
Expand Down
Binary file modified data/gmax_ks.rda
Binary file not shown.
72 changes: 0 additions & 72 deletions dev/01_create_pkg.R

This file was deleted.

50 changes: 0 additions & 50 deletions dev/02_git_github_setup.R

This file was deleted.

108 changes: 0 additions & 108 deletions dev/03_core_files.R

This file was deleted.

31 changes: 0 additions & 31 deletions dev/04_update.R

This file was deleted.

Loading

0 comments on commit 7765ce7

Please sign in to comment.