Skip to content

Commit

Permalink
Modified pairs2kaks() to use indices2kaks() and make it faster
Browse files Browse the repository at this point in the history
  • Loading branch information
almeidasilvaf committed Jul 25, 2024
1 parent d0f690b commit b67e2e4
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 47 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/check-bioc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,16 +100,16 @@ jobs:
uses: actions/cache@v2
with:
path: ${{ env.R_LIBS_USER }}
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.3-${{ hashFiles('.github/depends.Rds') }}
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.3-
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4-${{ hashFiles('.github/depends.Rds') }}
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4-

- name: Cache R packages on Linux
if: "!contains(github.event.head_commit.message, '/nocache') && runner.os == 'Linux' "
uses: actions/cache@v2
with:
path: /home/runner/work/_temp/Library
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.3-${{ hashFiles('.github/depends.Rds') }}
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.3-
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4-${{ hashFiles('.github/depends.Rds') }}
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4-

- name: Install Linux system dependencies
if: runner.os == 'Linux'
Expand Down
5 changes: 2 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ biocViews:
Encoding: UTF-8
LazyData: false
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Imports:
syntenet,
GenomicRanges,
Expand All @@ -59,8 +59,7 @@ Imports:
stats,
utils,
AnnotationDbi,
GenomicFeatures,
BiocParallel
GenomicFeatures
Depends:
R (>= 4.2.0)
Suggests:
Expand Down
4 changes: 1 addition & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@ export(plot_ks_peaks)
export(plot_rates_by_species)
export(split_pairs_by_peak)
importFrom(AnnotationDbi,select)
importFrom(BiocParallel,SerialParam)
importFrom(BiocParallel,bplapply)
importFrom(Biostrings,width)
importFrom(GenomicFeatures,intronsByTranscript)
importFrom(GenomicRanges,GRangesList)
importFrom(MSA2dist,dnastring2kaks)
importFrom(MSA2dist,indices2kaks)
importFrom(ggplot2,aes)
importFrom(ggplot2,after_stat)
importFrom(ggplot2,element_blank)
Expand Down
60 changes: 31 additions & 29 deletions R/ka_ks_analyses.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@
#' Possible values are "Li", "NG86", "NG", "LWL", "LPB", "MLWL", "MLPB", "GY",
#' "YN", "MYN", "MS", "MA", "GNG", "GLWL", "GLPB", "GMLWL", "GMLPB", "GYN",
#' and "GMYN". Default: "MYN".
#' @param bp_param BiocParallel back-end to be used.
#' Default: `BiocParallel::SerialParam()`.
#' @param threads Numeric indicating the number of threads to use. Default: 1.
#'
#' @return A list of data frames containing gene pairs and their Ka, Ks,
#' and Ka/Ks values.
#' @importFrom MSA2dist dnastring2kaks
#'
#' @importFrom MSA2dist indices2kaks
#' @importFrom Biostrings width
#' @importFrom BiocParallel SerialParam bplapply
#' @export
#' @rdname pairs2kaks
#' @examples
Expand All @@ -42,10 +41,8 @@
#' cds <- list(Scerevisiae = cds_scerevisiae)
#'
#' kaks <- pairs2kaks(gene_pairs_list, cds)
pairs2kaks <- function(
gene_pairs_list, cds, model = "MYN",
bp_param = BiocParallel::SerialParam()
) {
#'
pairs2kaks <- function(gene_pairs_list, cds, model = "MYN", threads = 1) {

kaks_list <- lapply(seq_along(gene_pairs_list), function(x) {

Expand All @@ -70,40 +67,45 @@ pairs2kaks <- function(
fcds <- fcds[-remove]
}

# Calculate Ka, Ks, and Ka/Ks for each gene pair
seq_list <- lapply(seq_len(nrow(pairs)), function(y) {
return(fcds[as.character(pairs[y, c(1, 2)])])
})
kaks <- BiocParallel::bplapply(seq_list, function(z, model) {

rates <- MSA2dist::dnastring2kaks(
z, model = model, isMSA = FALSE, verbose = FALSE
)
rates <- data.frame(
dup1 = rates$seq1,
dup2 = rates$seq2,
Ka = as.numeric(ifelse(rates$Ka == "NA", NA, rates$Ka)),
Ks = as.numeric(ifelse(rates$Ks == "NA", NA, rates$Ks)),
Ka_Ks = as.numeric(ifelse(rates[["Ka/Ks"]] == "NA", NA, rates[["Ka/Ks"]]))
# Create a list of indices - vectors of length 2
idx_df <- data.frame(gene = names(fcds), idx = seq_along(fcds))
pairs_idx <- lapply(seq_len(nrow(pairs)), function(p) {
idx <- c(
idx_df$idx[idx_df$gene == pairs[p, 1]],
idx_df$idx[idx_df$gene == pairs[p, 2]]
)

return(rates)
}, BPPARAM = bp_param, model = model)
kaks <- Reduce(rbind, kaks)
return(idx)
})

# Calculate rates
rates <- MSA2dist::indices2kaks(
cds = fcds, indices = pairs_idx,
model = model,
threads = threads,
isMSA = FALSE,
verbose = FALSE
)

rates <- data.frame(
dup1 = rates$seq1,
dup2 = rates$seq2,
Ka = as.numeric(ifelse(rates$Ka == "NA", NA, rates$Ka)),
Ks = as.numeric(ifelse(rates$Ks == "NA", NA, rates$Ks)),
Ka_Ks = as.numeric(ifelse(rates[["Ka/Ks"]] == "NA", NA, rates[["Ka/Ks"]]))
)
if("type" %in% names(pairs)) {
kaks$type <- pairs$type
rates$type <- pairs$type
}

return(kaks)
return(rates)
})
names(kaks_list) <- names(gene_pairs_list)

return(kaks_list)
}



#' Find peaks in a Ks distribution with Gaussian Mixture Models
#'
#' @param ks A numeric vector of Ks values.
Expand Down
11 changes: 3 additions & 8 deletions man/pairs2kaks.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b67e2e4

Please sign in to comment.