Skip to content

Commit

Permalink
Add step-by-step guide and additional examples and checks resolves #7
Browse files Browse the repository at this point in the history
  • Loading branch information
zenalapp committed Nov 20, 2023
1 parent a6dbda5 commit c6ed007
Show file tree
Hide file tree
Showing 20 changed files with 319 additions and 43 deletions.
16 changes: 11 additions & 5 deletions R/bistro.R
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,10 @@ bistro <-
)

if (calc_allele_freqs) {
pop_allele_freqs <- calc_allele_freqs(human_profiles, rm_markers)
pop_allele_freqs <- calc_allele_freqs(human_profiles,
rm_markers,
check_inputs = FALSE
)
} else if (!is.null(rm_markers)) {
pop_allele_freqs <- pop_allele_freqs |>
dplyr::select(-dplyr::matches(paste0("^", toupper(rm_markers), "$")))
Expand All @@ -139,15 +142,17 @@ bistro <-
bloodmeal_profiles,
bloodmeal_ids,
peak_thresh,
rm_markers
rm_markers,
check_inputs = FALSE
)

message("Formatting human profiles")
human_profiles <- prep_human_profiles(
human_profiles,
human_ids,
rm_twins,
rm_markers
rm_markers,
check_inputs = FALSE
)

bm_markers <- bloodmeal_profiles$Marker
Expand Down Expand Up @@ -180,12 +185,13 @@ bistro <-
difftol,
threads,
seed,
time_limit
time_limit,
check_inputs = FALSE
)

message("Identifying matches")

matches <- identify_matches(log10_lrs, bloodmeal_ids)
matches <- identify_matches(log10_lrs, bloodmeal_ids, check_inputs = FALSE)

if (return_lrs) {
matches <- list(
Expand Down
17 changes: 12 additions & 5 deletions R/calc_allele_freqs.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,25 @@
#'
#' @description Calculate allele frequencies for a (generally human) population.
#'
#' @param check_inputs A boolean indicating whether or not to check the
#' inputs to the function. Default: TRUE
#' @inheritParams bistro
#'
#' @return A tibble where the first column is the STR allele and the following
#' columns are the frequency of that allele for different markers. Alleles
#' that do not exist for a given marker are coded as NA.
#'
#' @export
#' @keywords internal
calc_allele_freqs <- function(human_profiles, rm_markers = NULL) {
# check if expected columns are present
check_colnames(human_profiles, c("SampleName", "Marker", "Allele"))
check_ids(rm_markers, "rm_markers")
#' @examples
#' calc_allele_freqs(human_profiles)
calc_allele_freqs <- function(human_profiles,
rm_markers = NULL,
check_inputs = TRUE) {
if (check_inputs) {
# check if expected columns are present
check_colnames(human_profiles, c("SampleName", "Marker", "Allele"))
check_ids(rm_markers, "rm_markers")
}

if (!is.null(rm_markers)) {
human_profiles <- human_profiles |>
Expand Down
59 changes: 57 additions & 2 deletions R/calc_log10_lrs.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ calc_one_log10_lr <-
#' `prep_bloodmeal_profiles()` and `prep_human_profiles()` first.
#'
#' @inheritParams bistro
#' @inheritParams calc_allele_freqs
#'
#' @return A tibble with the same output as for [bistro()], except there is no
#' match column and every bloodmeal-human pair with the calculated log10_lr is
Expand All @@ -130,7 +131,16 @@ calc_one_log10_lr <-
#' value of 3.
#'
#' @export
#' @keywords internal
#' @examples
#' bm_profs <- prep_bloodmeal_profiles(bloodmeal_profiles, peak_thresh = 200)
#' hu_profs <- prep_human_profiles(human_profiles)
#' log10_lrs <- calc_log10_lrs(bm_profs,
#' hu_profs,
#' bloodmeal_ids = "evid1",
#' pop_allele_freqs = pop_allele_freqs,
#' kit = "ESX17",
#' peak_thresh = 200
#' )
calc_log10_lrs <-
function(bloodmeal_profiles,
human_profiles,
Expand All @@ -145,7 +155,52 @@ calc_log10_lrs <-
difftol = 1,
threads = 4,
seed = 1,
time_limit = 3) {
time_limit = 3,
check_inputs = TRUE) {
if (check_inputs) {
check_colnames(
bloodmeal_profiles,
c("SampleName", "Marker", "Allele", "Height")
)
check_colnames(human_profiles, c("SampleName", "Marker", "Allele"))
check_ids(bloodmeal_ids, "bloodmeal_ids")
check_ids(human_ids, "human_ids")

kit_df <- check_kit(kit)

bm_prof_markers <- bloodmeal_profiles$Marker |>
unique() |>
toupper()
hu_prof_markers <- human_profiles$Marker |>
unique() |>
toupper()
kit_markers <- kit_df$Marker |>
unique() |>
toupper()

check_setdiff_markers(
bm_prof_markers,
kit_markers,
"bloodmeal_profiles",
"kit"
)
check_setdiff_markers(
hu_prof_markers,
kit_markers,
"human_profiles",
"kit"
)

check_peak_thresh(peak_thresh)
check_is_bool(model_degrad, "model_degrad")
check_is_bool(model_bw_stutt, "model_bw_stutt")
check_is_bool(model_fw_stutt, "model_fw_stutt")
check_is_numeric(difftol, "difftol", pos = TRUE)
check_is_numeric(threads, "threads", pos = TRUE)
check_is_numeric(seed, "seed")
check_is_numeric(time_limit, "time_limit", pos = TRUE)
}

if (is.null(bloodmeal_ids)) {
bloodmeal_ids <- unique(bloodmeal_profiles$SampleName)
}
Expand Down
5 changes: 4 additions & 1 deletion R/checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,10 @@ check_heights <- function(heights, peak_thresh) {
#' @return number of alleles in a complete profile
#'
#' @keywords internal
check_create_db_input <- function(bloodmeal_profiles, kit, peak_thresh, rm_markers) {
check_create_db_input <- function(bloodmeal_profiles,
kit,
peak_thresh,
rm_markers) {
check_colnames(
bloodmeal_profiles,
c("SampleName", "Marker", "Allele")
Expand Down
23 changes: 18 additions & 5 deletions R/create_db_from_bloodmeals.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@
#' @inheritParams bistro
#'
#' @return Human database created from complete single-source bloodmeals.
#' Complete is defined as the number of markers in the kit minus the number of markers in `rm_markers`.
#' Complete is defined as the number of markers in the kit minus the
#' number of markers in `rm_markers`.
#' @export
#'
#' @examples
#' \dontrun{
#' # load example data
#' path_to_data <- paste0("https://raw.githubusercontent.com/duke-malaria-collaboratory/",
#' "bistro_validation/main/data/provedit/provedit_samples_mass200thresh.csv")
#' path_to_data <- paste0(
#' "https://raw.githubusercontent.com/duke-malaria-collaboratory/",
#' "bistro_validation/main/data/provedit/provedit_samples_mass200thresh.csv"
#' )
#' samples <- readr::read_csv(path_to_data)
#' create_db_from_bloodmeals(samples, kit = "identifiler", peak_thresh = 200)
#' }
Expand All @@ -20,7 +23,12 @@ create_db_from_bloodmeals <- function(bloodmeal_profiles,
peak_thresh,
rm_markers = c("AMEL")) {
check_pkg_version("tidyr", utils::packageVersion("tidyr"), "1.3.0")
n_markers_in_kit <- check_create_db_input(bloodmeal_profiles, kit, peak_thresh, rm_markers)
n_markers_in_kit <- check_create_db_input(
bloodmeal_profiles,
kit,
peak_thresh,
rm_markers
)
bloodmeal_profiles |>
filter_peaks(peak_thresh = peak_thresh) |>
rm_markers(markers = rm_markers) |>
Expand All @@ -37,7 +45,12 @@ create_db_from_bloodmeals <- function(bloodmeal_profiles,
dplyr::mutate(SampleName = paste0("H", as.numeric(factor(SampleName)))) |>
dplyr::arrange(Marker, Allele) |>
dplyr::group_by(SampleName) |>
dplyr::summarize(all_alleles = stringr::str_c(paste0(Marker, "__", Allele), collapse = ";")) |>
dplyr::summarize(
all_alleles =
stringr::str_c(paste0(Marker, "__", Allele),
collapse = ";"
)
) |>
dplyr::group_by(all_alleles) |>
dplyr::summarize() |>
dplyr::mutate(SampleName = paste0("H", dplyr::row_number()), .before = 1) |>
Expand Down
29 changes: 27 additions & 2 deletions R/identify_matches.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,37 @@ identify_one_match_set <- function(log10_lrs, bloodmeal_id) {
#' @param log10_lrs Output from [calc_log10_lrs()] or from `bistro` with
#' `return_lrs = TRUE` (`lrs`: the second element in the list)
#' @inheritParams bistro
#' @inheritParams calc_allele_freqs
#'
#' @return A tibble with the same output as for [bistro()].
#'
#' @export
#' @keywords internal
identify_matches <- function(log10_lrs, bloodmeal_ids = NULL) {
#' @examples
#' bm_profs <- prep_bloodmeal_profiles(bloodmeal_profiles, peak_thresh = 200)
#' hu_profs <- prep_human_profiles(human_profiles)
#' log10_lrs <- calc_log10_lrs(bm_profs,
#' hu_profs,
#' bloodmeal_ids = "evid1",
#' pop_allele_freqs = pop_allele_freqs,
#' kit = "ESX17",
#' peak_thresh = 200
#' )
#' matches <- identify_matches(log10_lrs)
identify_matches <- function(log10_lrs,
bloodmeal_ids = NULL,
check_inputs = TRUE) {
if (check_inputs) {
check_colnames(
log10_lrs,
c(
"bloodmeal_id", "human_id",
"locus_count", "est_noc", "efm_noc",
"log10_lr", "notes"
)
)
check_ids(bloodmeal_ids, "bloodmeal_ids")
}

if (is.null(bloodmeal_ids)) {
bloodmeal_ids <- unique(log10_lrs$bloodmeal_id)
}
Expand Down
7 changes: 5 additions & 2 deletions R/match_exact.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,17 @@ match_exact <- function(bloodmeal_profiles,
bloodmeal_profiles,
bloodmeal_ids,
peak_thresh,
rm_markers = NULL
rm_markers = NULL,
check_heights = FALSE,
check_inputs = FALSE
)

human_profiles <- prep_human_profiles(
human_profiles,
human_ids,
rm_twins,
rm_markers = NULL
rm_markers = NULL,
check_inputs = FALSE
)

# human profiles
Expand Down
6 changes: 4 additions & 2 deletions R/match_similarity.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ match_similarity <- function(bloodmeal_profiles,
bloodmeal_ids,
peak_thresh,
rm_markers = rm_markers,
check_heights = FALSE
check_heights = FALSE,
check_inputs = FALSE
)

all_bm_ids <- unique(bloodmeal_profiles$SampleName)
Expand All @@ -68,7 +69,8 @@ match_similarity <- function(bloodmeal_profiles,
human_profiles,
human_ids,
rm_twins,
rm_markers = rm_markers
rm_markers = rm_markers,
check_inputs = FALSE
) |>
tidyr::drop_na()

Expand Down
30 changes: 26 additions & 4 deletions R/preprocess_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,28 @@
#' @param check_heights A boolean indicating whether to check if all peak
#' heights are below the threshold. Default: TRUE
#' @inheritParams bistro
#' @inheritParams calc_allele_freqs
#'
#' @return Dataframe with preprocessed bloodmeal profiles
#' @export
#' @keywords internal
#' @examples
#' prep_bloodmeal_profiles(bloodmeal_profiles, peak_thresh = 200)
prep_bloodmeal_profiles <- function(bloodmeal_profiles,
bloodmeal_ids = NULL,
peak_thresh = NULL,
rm_markers = c("AMEL"),
check_heights = TRUE) {
check_heights = TRUE,
check_inputs = TRUE) {
if (check_inputs) {
check_colnames(
bloodmeal_profiles,
c("SampleName", "Marker", "Allele", "Height")
)
check_ids(bloodmeal_ids, "bloodmeal_ids")
check_peak_thresh(peak_thresh)
check_is_bool(check_heights, "check_heights")
}

if (is.null(bloodmeal_ids)) {
bloodmeal_ids <- unique(bloodmeal_profiles$SampleName)
} else {
Expand Down Expand Up @@ -41,15 +54,24 @@ prep_bloodmeal_profiles <- function(bloodmeal_profiles,
#'
#' Removes duplicates and optionally twins, subsets ids
#'
#' @inheritParams calc_allele_freqs
#' @inheritParams bistro
#'
#' @return Dataframe with preprocessed human profiles
#' @export
#' @keywords internal
#' @examples
#' prep_human_profiles(human_profiles)
prep_human_profiles <- function(human_profiles,
human_ids = NULL,
rm_twins = TRUE,
rm_markers = c("AMEL")) {
rm_markers = c("AMEL"),
check_inputs = TRUE) {
if (check_inputs) {
check_colnames(human_profiles, c("SampleName", "Marker", "Allele"))
check_ids(human_ids, "bloodmeal_ids")
check_is_bool(rm_twins, "rm_twins")
}

if (rm_twins) {
human_profiles <- rm_twins(human_profiles)
}
Expand Down
2 changes: 2 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ reference:
Sub-functions used by `bistro()`
contents:
- calc_allele_freqs
- prep_bloodmeal_profiles
- prep_human_profiles
- rm_dups
- rm_twins
- filter_peaks
Expand Down
9 changes: 7 additions & 2 deletions man/calc_allele_freqs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit c6ed007

Please sign in to comment.