diff --git a/R/calc_allele_freqs.R b/R/calc_allele_freqs.R index 1e4813c..1edc246 100644 --- a/R/calc_allele_freqs.R +++ b/R/calc_allele_freqs.R @@ -16,10 +16,10 @@ calc_allele_freqs <- function(human_profiles, rm_markers = NULL, check_inputs = TRUE) { + check_is_bool(check_inputs) if (check_inputs) { - # check if expected columns are present check_colnames(human_profiles, c("SampleName", "Marker", "Allele")) - check_ids(rm_markers, "rm_markers") + check_present(rm_markers, human_profiles, "Marker") } if (!is.null(rm_markers)) { diff --git a/R/calc_log10_lrs.R b/R/calc_log10_lrs.R index 67e3919..dfaf7b4 100644 --- a/R/calc_log10_lrs.R +++ b/R/calc_log10_lrs.R @@ -157,50 +157,47 @@ calc_log10_lrs <- seed = NULL, time_limit = 3, check_inputs = TRUE) { + check_is_bool(check_inputs) if (check_inputs) { check_colnames( bloodmeal_profiles, c("SampleName", "Marker", "Allele", "Height") ) check_colnames(human_profiles, c("SampleName", "Marker", "Allele")) - check_ids(bloodmeal_ids, "bloodmeal_ids") - check_ids(human_ids, "human_ids") + check_present(bloodmeal_ids, bloodmeal_profiles, "SampleName") + check_present(human_ids, human_profiles, "SampleName") - kit_df <- check_kit(kit) - - bm_prof_markers <- bloodmeal_profiles$Marker |> - unique() |> - toupper() - hu_prof_markers <- human_profiles$Marker |> - unique() |> - toupper() - kit_markers <- kit_df$Marker |> + kit_markers <- check_kit(kit)$Marker |> unique() |> toupper() check_setdiff_markers( - bm_prof_markers, + bloodmeal_profiles$Marker |> + unique() |> + toupper(), kit_markers, "bloodmeal_profiles", "kit" ) check_setdiff_markers( - hu_prof_markers, + human_profiles$Marker |> + unique() |> + toupper(), kit_markers, "human_profiles", "kit" ) check_peak_thresh(peak_thresh) - check_is_bool(model_degrad, "model_degrad") - check_is_bool(model_bw_stutt, "model_bw_stutt") - check_is_bool(model_fw_stutt, "model_fw_stutt") - check_is_numeric(difftol, "difftol", pos = TRUE) - check_is_numeric(threads, "threads", pos = TRUE) - if(!is.null(seed)){ - check_is_numeric(seed, "seed") + check_is_bool(model_degrad) + check_is_bool(model_bw_stutt) + check_is_bool(model_fw_stutt) + check_is_numeric(difftol, pos = TRUE) + check_is_numeric(threads, pos = TRUE) + if (!is.null(seed)) { + check_is_numeric(seed) } - check_is_numeric(time_limit, "time_limit", pos = TRUE) + check_is_numeric(time_limit, pos = TRUE) } if (is.null(bloodmeal_ids)) { diff --git a/R/checks.R b/R/checks.R index d726597..0fdb33a 100644 --- a/R/checks.R +++ b/R/checks.R @@ -28,9 +28,10 @@ check_bistro_inputs <- c("SampleName", "Marker", "Allele", "Height") ) check_colnames(human_profiles, c("SampleName", "Marker", "Allele")) - check_ids(bloodmeal_ids, "bloodmeal_ids") - check_ids(human_ids, "human_ids") - check_ids(rm_markers) + check_present(bloodmeal_ids, bloodmeal_profiles, "SampleName") + check_present(human_ids, human_profiles, "SampleName") + check_present(rm_markers, human_profiles, "Marker") + check_present(rm_markers, bloodmeal_profiles, "Marker") kit_df <- check_kit(kit) @@ -68,17 +69,17 @@ check_bistro_inputs <- check_if_allele_freqs(pop_allele_freqs, calc_allele_freqs, kit_df) check_peak_thresh(peak_thresh) - check_is_bool(rm_twins, "rm_twins") - check_is_bool(model_degrad, "model_degrad") - check_is_bool(model_bw_stutt, "model_bw_stutt") - check_is_bool(model_fw_stutt, "model_fw_stutt") - check_is_bool(return_lrs, "return_lrs") - check_is_numeric(difftol, "difftol", pos = TRUE) - check_is_numeric(threads, "threads", pos = TRUE) - if(!is.null(seed)){ - check_is_numeric(seed, "seed") + check_is_bool(rm_twins) + check_is_bool(model_degrad) + check_is_bool(model_bw_stutt) + check_is_bool(model_fw_stutt) + check_is_bool(return_lrs) + check_is_numeric(difftol, pos = TRUE) + check_is_numeric(threads, pos = TRUE) + if (!is.null(seed)) { + check_is_numeric(seed) } - check_is_numeric(time_limit, "time_limit", pos = TRUE) + check_is_numeric(time_limit, pos = TRUE) } #' Check is boolean @@ -88,10 +89,10 @@ check_bistro_inputs <- #' #' @return Error or nothing #' @keywords internal -check_is_bool <- function(vec, vec_name) { +check_is_bool <- function(vec) { if (!is.logical(vec)) { stop( - vec_name, + deparse(substitute(vec)), " must be a logical (TRUE or FALSE), but is ", class(vec), "." @@ -107,11 +108,11 @@ check_is_bool <- function(vec, vec_name) { #' #' @return Error or nothing #' @keywords internal -check_is_numeric <- function(vec, vec_name, pos = FALSE) { +check_is_numeric <- function(vec, pos = FALSE) { if (!is.numeric(vec)) { - stop(vec_name, " must be numeric, but is ", class(vec), ".") + stop(deparse(substitute(vec)), " must be numeric, but is ", class(vec), ".") } else if (vec <= 0 && pos == TRUE) { - stop(vec_name, " must be greater than zero, but is ", vec, ".") + stop(deparse(substitute(vec)), " must be greater than zero, but is ", vec, ".") } } @@ -240,9 +241,9 @@ check_setdiff_markers <- #' #' @return Error or nothing #' @keywords internal -check_ids <- function(vec, vec_name) { +check_ids <- function(vec) { if (!is.null(vec) && !is.vector(vec)) { - stop(vec_name, " must be NULL or a vector but is: ", class(vec)) + stop(deparse(substitute(vec)), " must be NULL or a vector but is: ", class(vec)) } } @@ -260,7 +261,7 @@ check_colnames <- function(df, expected_colnames) { expected_colnames[!expected_colnames %in% names(df)] if (length(missing_colnames) > 0) { stop(paste0( - "Not all expected column names are present. Missing: ", + "Not all expected column names are present in ", deparse(substitute(df)), ". Missing: ", paste0(missing_colnames, collapse = ", ") )) } @@ -309,7 +310,7 @@ check_create_db_input <- function(bloodmeal_profiles, bloodmeal_profiles, c("SampleName", "Marker", "Allele") ) - check_ids(rm_markers) + check_present(rm_markers, bloodmeal_profiles, "Marker") check_peak_thresh(peak_thresh) kit_df <- check_kit(kit) kit_markers <- kit_df$Marker |> @@ -331,3 +332,21 @@ check_create_db_input <- function(bloodmeal_profiles, ) length(kit_markers) } + +#' Check if input markers to remove are present in the dataset +#' +#' @param df Data frame to check against +#' @inheritParams bistro +#' +#' @return Warning or nothing +#' @keywords internal +check_present <- function(to_rm, df, col) { + check_ids(to_rm) + not_present <- setdiff(toupper(to_rm), toupper(unlist(df[, col]))) + if (length(not_present) > 0) { + warning( + "These are not present in ", deparse(substitute(df)), "[,", deparse(substitute(col)), "]: ", + not_present + ) + } +} diff --git a/R/identify_matches.R b/R/identify_matches.R index 5151e6e..602fa8c 100644 --- a/R/identify_matches.R +++ b/R/identify_matches.R @@ -161,7 +161,7 @@ identify_matches <- function(log10_lrs, "log10_lr", "notes" ) ) - check_ids(bloodmeal_ids, "bloodmeal_ids") + check_present(bloodmeal_ids, log10_lrs, "bloodmeal_id") } if (is.null(bloodmeal_ids)) { diff --git a/R/match_exact.R b/R/match_exact.R index 0d31efa..771ea7b 100644 --- a/R/match_exact.R +++ b/R/match_exact.R @@ -32,10 +32,11 @@ match_exact <- function(bloodmeal_profiles, ) } check_colnames(human_profiles, c("SampleName", "Marker", "Allele")) - check_ids(bloodmeal_ids, "bloodmeal_ids") - check_ids(human_ids, "human_ids") - check_is_bool(rm_twins, "rm_twins") - check_ids(rm_markers, "rm_markers") + check_present(bloodmeal_ids, bloodmeal_profiles, "SampleName") + check_present(human_ids, human_profiles, "SampleName") + check_is_bool(rm_twins) + check_present(rm_markers, bloodmeal_profiles, "SampleName") + check_present(rm_markers, human_profiles, "SampleName") bloodmeal_profiles <- prep_bloodmeal_profiles( bloodmeal_profiles, diff --git a/R/match_similarity.R b/R/match_similarity.R index 3b10ba1..f6992c9 100644 --- a/R/match_similarity.R +++ b/R/match_similarity.R @@ -45,11 +45,12 @@ match_similarity <- function(bloodmeal_profiles, ) } check_colnames(human_profiles, c("SampleName", "Marker", "Allele")) - check_ids(bloodmeal_ids, "bloodmeal_ids") - check_ids(human_ids, "human_ids") - check_is_bool(rm_twins, "rm_twins") - check_ids(rm_markers, "rm_markers") - check_is_bool(return_similarities, "return_similarities") + check_present(bloodmeal_ids, bloodmeal_profiles, "SampleName") + check_present(human_ids, human_profiles, "SampleName") + check_is_bool(rm_twins) + check_present(rm_markers, bloodmeal_profiles, "SampleName") + check_present(rm_markers, human_profiles, "SampleName") + check_is_bool(return_similarities) bloodmeal_profiles <- prep_bloodmeal_profiles( bloodmeal_profiles, diff --git a/R/match_static_thresh.R b/R/match_static_thresh.R index fa79893..ac3674c 100644 --- a/R/match_static_thresh.R +++ b/R/match_static_thresh.R @@ -15,6 +15,14 @@ #' ) #' match_static_thresh(bistro_output$lrs, 10) match_static_thresh <- function(log10_lrs, thresh) { + check_colnames( + log10_lrs, + c( + "bloodmeal_id", "human_id", + "locus_count", "est_noc", "efm_noc", + "log10_lr", "notes" + ) + ) check_is_numeric(thresh, pos = TRUE) bm_info <- log10_lrs |> diff --git a/R/preprocess_data.R b/R/preprocess_data.R index ba4d842..4f9a147 100644 --- a/R/preprocess_data.R +++ b/R/preprocess_data.R @@ -17,14 +17,16 @@ prep_bloodmeal_profiles <- function(bloodmeal_profiles, rm_markers = c("AMEL"), check_heights = TRUE, check_inputs = TRUE) { + check_is_bool(check_inputs) if (check_inputs) { check_colnames( bloodmeal_profiles, c("SampleName", "Marker", "Allele", "Height") ) - check_ids(bloodmeal_ids, "bloodmeal_ids") + check_present(bloodmeal_ids, bloodmeal_profiles, "SampleName") check_peak_thresh(peak_thresh) - check_is_bool(check_heights, "check_heights") + check_present(rm_markers, bloodmeal_profiles, "Marker") + check_is_bool(check_heights) } if (is.null(bloodmeal_ids)) { @@ -66,10 +68,12 @@ prep_human_profiles <- function(human_profiles, rm_twins = TRUE, rm_markers = c("AMEL"), check_inputs = TRUE) { + check_is_bool(check_inputs) if (check_inputs) { check_colnames(human_profiles, c("SampleName", "Marker", "Allele")) - check_ids(human_ids, "bloodmeal_ids") - check_is_bool(rm_twins, "rm_twins") + check_present(human_ids, human_profiles, "SampleName") + check_is_bool(rm_twins) + check_present(rm_markers, human_profiles) } if (rm_twins) { diff --git a/README.Rmd b/README.Rmd index 12d0d1b..531afaa 100644 --- a/README.Rmd +++ b/README.Rmd @@ -61,3 +61,8 @@ deps <- lapply( ## Usage Check out the [vignette](https://duke-malaria-collaboratory.github.io/bistro/articles/bistro.html) for more information. + +## Have questions or need help troubleshooting? + +Open up an [issue](https://github.com/duke-malaria-collaboratory/bistro/issues) on our GitHub page or contact us (Christine: christine.markwalter@duke.edu, Zena: zenalapp@gmail.com) and we can help out. + diff --git a/README.md b/README.md index c1ceefd..1ab63c0 100644 --- a/README.md +++ b/README.md @@ -33,10 +33,18 @@ remotes::install_github("duke-malaria-collaboratory/bistro") - Imports: codetools (\>= 0.2.19), dplyr (\>= 1.1.3), R.utils (\>= 2.12.2), stringr (\>= 1.5.0), tibble (\>= 3.2.1), tidyr (\>= 1.3.0) - Suggests: knitr (\>= 1.43), readr (\>= 2.1.4), rmarkdown (\>= 2.24), - testthat (\>= 3.0.0) + testthat (\>= 3.2.0) ## Usage Check out the [vignette](https://duke-malaria-collaboratory.github.io/bistro/articles/bistro.html) for more information. + +## Have questions or need help troubleshooting? + +Open up an +[issue](https://github.com/duke-malaria-collaboratory/bistro/issues) on +our GitHub page or contact us (Christine: +, Zena: ) and we can +help out. diff --git a/man/check_ids.Rd b/man/check_ids.Rd index 1794cc0..ea32452 100644 --- a/man/check_ids.Rd +++ b/man/check_ids.Rd @@ -4,7 +4,7 @@ \alias{check_ids} \title{Check input ids} \usage{ -check_ids(vec, vec_name) +check_ids(vec) } \arguments{ \item{vec}{vector to check} diff --git a/man/check_is_bool.Rd b/man/check_is_bool.Rd index 30fab42..ee389c5 100644 --- a/man/check_is_bool.Rd +++ b/man/check_is_bool.Rd @@ -4,7 +4,7 @@ \alias{check_is_bool} \title{Check is boolean} \usage{ -check_is_bool(vec, vec_name) +check_is_bool(vec) } \arguments{ \item{vec}{vector to check} diff --git a/man/check_is_numeric.Rd b/man/check_is_numeric.Rd index 768ad07..0788174 100644 --- a/man/check_is_numeric.Rd +++ b/man/check_is_numeric.Rd @@ -4,14 +4,14 @@ \alias{check_is_numeric} \title{Check is numeric} \usage{ -check_is_numeric(vec, vec_name, pos = FALSE) +check_is_numeric(vec, pos = FALSE) } \arguments{ \item{vec}{vector to check} -\item{vec_name}{vector name} - \item{pos}{whether the number must be positive} + +\item{vec_name}{vector name} } \value{ Error or nothing diff --git a/man/check_present.Rd b/man/check_present.Rd new file mode 100644 index 0000000..0460a95 --- /dev/null +++ b/man/check_present.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checks.R +\name{check_present} +\alias{check_present} +\title{Check if input markers to remove are present in the dataset} +\usage{ +check_present(to_rm, df, col) +} +\arguments{ +\item{df}{Data frame to check against} +} +\value{ +Warning or nothing +} +\description{ +Check if input markers to remove are present in the dataset +} +\keyword{internal} diff --git a/tests/testthat/test-calc_allele_freqs.R b/tests/testthat/test-calc_allele_freqs.R index 3326655..978f76f 100644 --- a/tests/testthat/test-calc_allele_freqs.R +++ b/tests/testthat/test-calc_allele_freqs.R @@ -1,7 +1,7 @@ test_that("calc_allele_freqs works", { expect_error( calc_allele_freqs(data.frame(name = 1)), - "Not all expected column names are present. Missing:" + "Not all expected column names are present in human_profiles. Missing:" ) hu_prof_sub <- diff --git a/tests/testthat/test-calc_log10_lrs.R b/tests/testthat/test-calc_log10_lrs.R index af64ba2..99f0844 100644 --- a/tests/testthat/test-calc_log10_lrs.R +++ b/tests/testthat/test-calc_log10_lrs.R @@ -92,9 +92,6 @@ test_that("calc_log10_lrs works", { pop_allele_freqs = pop_allele_freqs, kit = "ESX17", peak_thresh = 200, - # seed = 1, check_inputs = TRUE ))) - - }) diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 01d36d5..8524638 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -10,24 +10,29 @@ test_that("check_bistro_input works", { }) test_that("check_is_bool works", { - expect_no_error(check_is_bool(TRUE, "v1")) + expect_no_error(check_is_bool(TRUE)) expect_error( - check_is_bool(1, "v1"), - "v1 must be a logical" + check_is_bool(1), + "1 must be a logical" ) }) test_that("check_is_numeric works", { - expect_no_error(check_is_numeric(1, "v1")) - expect_no_error(check_is_numeric(-1, "v1")) - expect_no_error(check_is_numeric(1, "v1", pos = TRUE)) + expect_no_error(check_is_numeric(1)) + expect_no_error(check_is_numeric(-1)) + expect_no_error(check_is_numeric(1, pos = TRUE)) expect_error( - check_is_numeric(0, "v1", pos = TRUE), - "v1 must be greater than zero, but is 0." + check_is_numeric(0, pos = TRUE), + "0 must be greater than zero, but is 0." ) expect_error( - check_is_numeric("a", "v1"), - "v1 must be numeric, but is character." + check_is_numeric("a"), + "must be numeric, but is character." + ) + var <- "a" + expect_error( + check_is_numeric(var), + "var must be numeric, but is character." ) }) @@ -80,13 +85,14 @@ test_that("check_if_allele_freqs works", { }) test_that("check_ids works", { - expect_no_error(check_ids(NULL, "v1")) - expect_no_error(check_ids(1, "v1")) - expect_no_error(check_ids(1:2, "v1")) - expect_no_error(check_ids("a", "v1")) + expect_no_error(check_ids(NULL)) + expect_no_error(check_ids(1)) + expect_no_error(check_ids(1:2)) + expect_no_error(check_ids("a")) + tib <- tibble::tibble(test = numeric()) expect_error( - check_ids(tibble::tibble(test = numeric()), "v1"), - "v1 must be NULL or a vector but is: tbl_dftbldata.frame" + check_ids(tib), + "tib must be NULL or a vector but is: tbl_dftbldata.frame" ) }) @@ -95,7 +101,7 @@ test_that("check_colnames works", { expect_error( check_colnames(tibble::tibble(test = 1), "test1"), - "Not all expected column names are present. Missing: test1" + "Not all expected column names are present in" ) }) @@ -140,3 +146,11 @@ test_that("check_create_db_input working", { "ESX17", 0, c("AMEL") ), 16) }) + +test_that("check_present working", { + expect_no_error(check_present(c("amel"), human_profiles, "Marker")) + expect_warning( + check_present(c("a"), human_profiles, "Marker"), + "These are not present in human_profiles" + ) +}) diff --git a/tests/testthat/test-preprocess_data.R b/tests/testthat/test-preprocess_data.R index b381c06..fe55bec 100644 --- a/tests/testthat/test-preprocess_data.R +++ b/tests/testthat/test-preprocess_data.R @@ -1,11 +1,11 @@ test_that("prep_bloodmeal_profiles works", { - expect_error( + expect_warning(expect_error( prep_bloodmeal_profiles( bloodmeal_profiles %>% dplyr::filter(Height < 200), peak_thresh = 200 ), "All bloodmeal peak heights below threshold of 200." - ) + )) expect_snapshot(prep_bloodmeal_profiles( bloodmeal_profiles, diff --git a/vignettes/bistro.Rmd b/vignettes/bistro.Rmd index ac36b3c..1c09ed3 100644 --- a/vignettes/bistro.Rmd +++ b/vignettes/bistro.Rmd @@ -269,3 +269,8 @@ bistro(samples, hu_db, "identifiler", 200, ## Running bistro step-by-step If you're interested in learning how to run sub-functions of `bistro()` individually, head over to `vignette("step-by-step")`. + + +## Have questions or need help troubleshooting? + +Open up an [issue](https://github.com/duke-malaria-collaboratory/bistro/issues) on our GitHub page or contact us (Christine: christine.markwalter@duke.edu, Zena: zenalapp@gmail.com) and we can help out.