Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
traversc committed Sep 6, 2023
1 parent 8431b88 commit 292fffa
Showing 1 changed file with 44 additions and 0 deletions.
44 changes: 44 additions & 0 deletions R/RadixTree_search_helpers.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,47 @@
#' @title Distance search for similar sequences
#' @description Find similar sequences within a distance threshold
#' @param query `r rdoc("query")`
#' @param target `r rdoc("target")`
#' @param max_distance `r rdoc("max_distance")`
#' @param max_fraction `r rdoc("max_fraction")`
#' @param mode `r rdoc("mode")`
#' @param cost_matrix `r rdoc("cost_matrix")`
#' @param gap_cost `r rdoc("gap_cost")`
#' @param gap_open_cost `r rdoc("gap_open_cost")`
#' @param tree_class Which R6 class to use. Either RadixTree or RadixForest (default: RadixTree)
#' @param nthreads `r rdoc("nthreads")`
#' @param show_progress `r rdoc("show_progress")`
#' @details
#' This function finds all sequences in _target_ that are within a distance threshold of any sequence in _query_.
#' This function uses either a RadixTree or RadixForest to store _target_ sequences. See the R6 class documentation for additional details.
#'
#' `r rdoc("details")`
#' @return The output is a data.frame of all matches with columns "query" and "target".
#' For anchored searches, the output also includes attributes "query_size" and "target_size"
#' which are vectors containing the portion of the query and target sequences that are aligned.
#' @examples
#' dist_search(c("ACGT", "AAAA"), c("ACG", "ACGT"), max_distance = 1, mode = "levenshtein")
#' @name dist_search
dist_search <- function(query, target, max_distance = NULL, max_fraction = NULL, mode = "levenshtein",
cost_matrix = NULL, gap_cost = NULL, gap_open_cost = NULL, tree_class = "RadixTree",
nthreads = 1, show_progress = FALSE) {
if (!tree_class %in% c("RadixTree", "RadixForest")) {
stop("tree_class must be one of RadixTree or RadixForest")
}
if (tree_class == "RadixTree") {
obj <- RadixTree$new()
obj$insert(target)
obj$search(query, max_distance, max_fraction, mode, cost_matrix, gap_cost, gap_open_cost, nthreads, show_progress)
} else if(tree_class == "RadixForest") {
if(!is.null(cost_matrix) || !is.null(gap_cost) || !is.null(gap_open_cost)) {
stop("cost_matrix, gap_cost and gap_open_cost are not supported for RadixForest")
}
obj <- RadixForest$new()
obj$insert(target)
obj$search(query, max_distance, max_fraction, mode, nthreads, show_progress)
}
}

#' @title split_search
#' @description Search for similar sequences based on splitting sequences into left and right sides
#' and searching for matches in each side using a bi-directional anchored alignment.
Expand Down

0 comments on commit 292fffa

Please sign in to comment.