.

traversc · Jan 26, 2024 · e5f5ace · e5f5ace
1 parent c1f0572
commit e5f5ace
Show file tree

Hide file tree

Showing 5 changed files with 21 additions and 16 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -17,9 +17,9 @@ SystemRequirements: GNU make
 LinkingTo: 
     Rcpp, RcppParallel, BH
 Imports: 
-    Rcpp (>= 0.12.18.3), RcppParallel (>= 5.1.3), R6
+    Rcpp (>= 0.12.18.3), RcppParallel (>= 5.1.3), R6, rlang, dplyr, stringi
 Suggests:
-    knitr, rmarkdown, stringdist, qs, dplyr, Biostrings, igraph, ggplot2, stringi
+    knitr, rmarkdown, stringdist, qs, Biostrings, igraph, ggplot2
 VignetteBuilder: knitr
 RoxygenNote: 7.2.3
 Roxygen: list(markdown = TRUE)

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,10 +1,12 @@
 importFrom(Rcpp,sourceCpp)
 importFrom(RcppParallel, RcppParallelLibs)
 importFrom(R6, R6Class)
+importFrom(rlang, .data)
 useDynLib(seqtrie, .registration=TRUE)
 export("RadixTree")
 export("RadixForest")
 export("dist_matrix")
 export("dist_pairwise")
 export("dist_search")
+export("split_search")
 export("generate_cost_matrix")
diff --git a/R/RadixTree_search_helpers.R b/R/RadixTree_search_helpers.R
@@ -64,8 +64,10 @@ dist_search <- function(query, target, max_distance = NULL, max_fraction = NULL,
 #' # target1 AAGACCTAA CC
 #' # query2   GGGTGTAA CCACCC
 #' # target2   GGTGTAA CCAC
-#' # Despite having different frames, query1 and query2 and clearly match to target1 and target2, respectively.
-#' # One could consider splitting based on a common core sequence, e.g. a common TAA stop codon. 
+#' # Despite having different frames, query1 and query2 and clearly 
+#' # match to target1 and target2, respectively.
+#' # One could consider splitting based on a common core sequence, 
+#' # e.g. a common TAA stop codon. 
 #' split_search(query=c(  "AGACCTAACCC", "GGGTGTAACCACCC"),
 #'              target=c("AAGACCTAACC",   "GGTGTAACCAC"),
 #'              query_split=c(8, 8),
@@ -89,9 +91,9 @@ split_search <- function(query, target, query_split, target_split, edge_trim = 0
 
   # Search for similar sequences between lefts and rights
   left_matches <- left_tree$search(unique(query_left), max_distance = max_distance, mode = "anchored", ...)
-  left_matches <- dplyr::rename(left_matches, query_left=query, target_left=target)
+  left_matches <- dplyr::rename(left_matches, query_left=.data$query, target_left=.data$target)
   right_matches <- right_tree$search(unique(query_right), max_distance = max_distance, mode = "anchored", ...)
-  right_matches <- dplyr::rename(right_matches, query_right=query, target_right=target)
+  right_matches <- dplyr::rename(right_matches, query_right=.data$query, target_right=.data$target)
 
   # If either left or right finds no matches, return empty dataframe
   if(nrow(left_matches) == 0 || nrow(right_matches) == 0) {
@@ -101,9 +103,9 @@ split_search <- function(query, target, query_split, target_split, edge_trim = 0
   # construct map of full sequence to left and right
   # filter in only potential matches, i.e. queries or targets that are in both left_matches and right_matches data.frame
   df_query <- data.frame(query, query_left, query_right)
-  df_query <- dplyr::filter(df_query, query_left %in% left_matches$query_left, query_right %in% right_matches$query_right)
+  df_query <- dplyr::filter(df_query, .data$query_left %in% left_matches$query_left, .data$query_right %in% right_matches$query_right)
   df_target <- data.frame(target, target_left, target_right)
-  df_target <- dplyr::filter(df_target, target_left %in% left_matches$target_left, target_right %in% right_matches$target_right)
+  df_target <- dplyr::filter(df_target, .data$target_left %in% left_matches$target_left, .data$target_right %in% right_matches$target_right)
 
   # Join results together, append full query and target sequences to left and right matches
   left_matches <- dplyr::inner_join(left_matches, df_query, by = "query_left")
@@ -112,8 +114,7 @@ split_search <- function(query, target, query_split, target_split, edge_trim = 0
   right_matches <- dplyr::inner_join(right_matches, df_target, by = "target_right")
 
   results <- dplyr::inner_join(left_matches, right_matches, by = c("query", "target"), suffix=c(".left", ".right"))
-  results <- dplyr::mutate(results, distance = distance.left + distance.right)
-  results <- dplyr::filter(results, distance <= max_distance)
-  results <- dplyr::select(results, query, target, distance)
-  as.data.frame(results)
+  results <- dplyr::mutate(results, distance = .data$distance.left + .data$distance.right)
+  results <- dplyr::filter(results, .data$distance <= max_distance)
+  as.data.frame(results[c("query", "target", "distance")])
 }
diff --git a/man/split_search.Rd b/man/split_search.Rd
diff --git a/vignettes/vignette.rmd b/vignettes/vignette.rmd
@@ -18,10 +18,10 @@ knitr::opts_chunk$set(dpi=96,fig.width=6.5)
 ### Basic usage
 
 ```{r, basic_usage, eval=FALSE}
-results <- dist_search(strings1, strings2, max_distance=2, nthreads = 1)
+results <- dist_search(x, y, max_distance = 2, nthreads = 1)
 ```
 
-The above code will find all similar sequences between `strings1` and `strings2`. 
+The above code will find all similar sequences/strings between `x` and `y`. 
 This will generally be significantly faster than calculating pairwise distance or
 pairwise alignment.