From 5675cc07d58fa8d153cc12d1a3fdf3a120b26877 Mon Sep 17 00:00:00 2001 From: Marcelo Mollinari Date: Mon, 12 Feb 2024 23:58:22 -0500 Subject: [PATCH] update documentation --- DESCRIPTION | 2 +- NAMESPACE | 31 - R/RcppExports.R | 4 - R/calc_genoprob.R | 3 - R/est_map_hmm.R | 2282 +++++++++--------- R/filters.R | 23 +- R/get_counts.R | 6 - R/get_counts_from_web.R | 3 - R/haplotype_map_utils.R | 6 - R/make_seq.R | 726 +++--- R/pairwise_rf.R | 39 +- R/plot_map_list.R | 2 - R/prior_dist_hmm.R | 1 - R/read_mappoly_csv.R | 3 - R/reest_map_error.R | 2 - R/rf_list_to_matrix.R | 2 - R/simulation_utils.R | 6 - R/single_map_hmm.R | 29 - R/single_paprent_single_phase_hmm.R | 5 - R/utils.R | 222 +- man/add_mrk_at_tail_ph_list.Rd | 3 - man/aggregate_matrix.Rd | 3 - man/calc_genoprob_haplo.Rd | 3 - man/cat_phase.Rd | 3 - man/check_data_dist_sanity.Rd | 3 - man/check_data_dose_sanity.Rd | 3 - man/check_if_rf_is_possible.Rd | 3 - man/check_ls_phase.Rd | 3 - man/concatenate_ph_list.Rd | 3 - man/create_map.Rd | 3 - man/draw_cross.Rd | 3 - man/est_haplo_hmm.Rd | 3 - man/est_map_haplo_given_genoprob.Rd | 3 - man/est_pairwise_rf.Rd | 28 +- man/est_rf_hmm_single_phase.Rd | 30 - man/est_rf_hmm_single_phase_single_parent.Rd | 6 - man/filter_map_at_hmm_thres.Rd | 14 +- man/filter_missing.Rd | 19 +- man/filter_non_conforming_classes.Rd | 3 - man/format_rf.Rd | 3 - man/genetic-mapping-functions.Rd | 54 + man/genotyping_global_error.Rd | 3 - man/get_cache_two_pts_from_web.Rd | 3 - man/get_counts.Rd | 3 - man/get_counts_all_phases.Rd | 3 - man/get_counts_two_parents.Rd | 3 - man/get_dosage_type.Rd | 18 +- man/get_full_info_tail.Rd | 3 - man/get_ols_map.Rd | 3 - man/get_ph_list_subset.Rd | 3 - man/get_rf_from_mat.Rd | 3 - man/get_states_and_emission_single_parent.Rd | 3 - man/get_w_m.Rd | 3 - man/gg_color_hue.Rd | 3 - man/imf_h.Rd | 15 - man/imf_k.Rd | 15 - man/imf_m.Rd | 15 - man/is.prob.data.Rd | 13 +- man/make_seq_mappoly.Rd | 102 +- man/mappoly-color-palettes.Rd | 40 + man/mf_h.Rd | 15 - man/mf_k.Rd | 15 - man/mf_m.Rd | 15 - man/mp_pallet1.Rd | 15 - man/mp_pallet2.Rd | 15 - man/mp_pallet3.Rd | 15 - man/mrk_chisq_test.Rd | 3 - man/msg.Rd | 3 - man/paralell_pairwise_discrete.Rd | 3 - man/paralell_pairwise_discrete_rcpp.Rd | 3 - man/paralell_pairwise_probability.Rd | 3 - man/perm_pars.Rd | 3 - man/perm_tot.Rd | 3 - man/plot_compare_haplotypes.Rd | 29 +- man/plot_one_map.Rd | 3 - man/poly_hmm_est.Rd | 3 - man/prepare_map.Rd | 3 - man/print_ph.Rd | 3 - man/sample_data.Rd | 4 +- man/select_rf.Rd | 3 - man/sim_cross_one_informative_parent.Rd | 3 - man/sim_cross_two_informative_parents.Rd | 3 - man/split_mappoly.Rd | 3 - man/table_to_mappoly.Rd | 3 - man/update_ph_list_at_hmm_thres.Rd | 3 - man/v_2_m.Rd | 3 - src/RcppExports.cpp | 32 +- src/read_mappoly_vcf.cpp | 4 - 88 files changed, 1877 insertions(+), 2183 deletions(-) create mode 100644 man/genetic-mapping-functions.Rd delete mode 100644 man/imf_h.Rd delete mode 100644 man/imf_k.Rd delete mode 100644 man/imf_m.Rd create mode 100644 man/mappoly-color-palettes.Rd delete mode 100644 man/mf_h.Rd delete mode 100644 man/mf_k.Rd delete mode 100644 man/mf_m.Rd delete mode 100644 man/mp_pallet1.Rd delete mode 100644 man/mp_pallet2.Rd delete mode 100644 man/mp_pallet3.Rd diff --git a/DESCRIPTION b/DESCRIPTION index f4f33452..09f34a47 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -94,7 +94,7 @@ Imports: zoo, plotly LinkingTo: Rcpp, RcppParallel -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 SystemRequirements: GNU make Encoding: UTF-8 Suggests: diff --git a/NAMESPACE b/NAMESPACE index e69bc9b1..b146f3bf 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,14 +28,7 @@ S3method(print,mappoly.sequence) S3method(print,mappoly.twopt) S3method(print,mappoly.unique.seq) S3method(print,two.pts.linkage.phases) -export(.mappoly_data_skeleton) -export(.mappoly_map_skeleton) -export(.vcf_get_depth) -export(.vcf_get_ploidy) -export(.vcf_get_probabilities) -export(.vcf_transform_dosage) export(add_marker) -export(aggregate_matrix) export(cache_counts_twopt) export(calc_genoprob) export(calc_genoprob_dist) @@ -44,10 +37,8 @@ export(calc_genoprob_single_parent) export(calc_homologprob) export(calc_prefpair_profiles) export(check_data_sanity) -export(check_if_rf_is_possible) export(compare_haplotypes) export(compare_maps) -export(create_map) export(cross_simulate) export(detect_info_par) export(dist_prob_to_class) @@ -60,8 +51,6 @@ export(est_pairwise_rf) export(est_pairwise_rf2) export(est_rf_hmm) export(est_rf_hmm_sequential) -export(est_rf_hmm_single_phase) -export(est_rf_hmm_single_phase_single_parent) export(export_data_to_polymapR) export(export_map_list) export(export_qtlpoly) @@ -70,25 +59,15 @@ export(filter_aneuploid) export(filter_individuals) export(filter_map_at_hmm_thres) export(filter_missing) -export(filter_non_conforming_classes) export(filter_segregation) export(find_blocks) export(framework_map) export(generate_all_link_phases_elim_equivalent_haplo) export(get_LOD) -export(get_cache_two_pts_from_web) export(get_dosage_type) export(get_genomic_order) -export(get_ols_map) -export(get_rf_from_mat) -export(get_states_and_emission_single_parent) export(get_submap) -export(get_w_m) -export(gg_color_hue) export(group_mappoly) -export(imf_h) -export(imf_k) -export(imf_m) export(import_data_from_polymapR) export(import_from_updog) export(import_phased_maplist_from_polymapR) @@ -101,18 +80,11 @@ export(make_seq_mappoly) export(mds_mappoly) export(merge_datasets) export(merge_maps) -export(mf_h) export(mf_k) -export(mf_m) export(mp_pallet1) export(mp_pallet2) export(mp_pallet3) -export(paralell_pairwise_discrete) -export(paralell_pairwise_discrete_rcpp) -export(paralell_pairwise_probability) export(parallel_block) -export(perm_pars) -export(perm_tot) export(ph_list_to_matrix) export(ph_matrix_to_list) export(plot_GIC) @@ -136,13 +108,10 @@ export(sample_data) export(segreg_poly) export(sim_homologous) export(split_and_rephase) -export(split_mappoly) export(summary_maps) -export(table_to_mappoly) export(update_framework_map) export(update_map) export(update_missing) -export(v_2_m) import(RCurl) import(Rcpp) import(fields) diff --git a/R/RcppExports.R b/R/RcppExports.R index 78f35b01..da90effa 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,22 +1,18 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -#' @export .vcf_get_probabilities <- function(mat, pl_pos) { .Call('_mappoly_vcf_get_probabilities', PACKAGE = 'mappoly', mat, pl_pos) } -#' @export .vcf_transform_dosage <- function(mat, gt_pos) { .Call('_mappoly_vcf_transform_dosage', PACKAGE = 'mappoly', mat, gt_pos) } -#' @export .vcf_get_ploidy <- function(mat, gt_pos) { .Call('_mappoly_vcf_get_ploidy', PACKAGE = 'mappoly', mat, gt_pos) } -#' @export .vcf_get_depth <- function(mat, dp_pos) { .Call('_mappoly_vcf_get_depth', PACKAGE = 'mappoly', mat, dp_pos) } diff --git a/R/calc_genoprob.R b/R/calc_genoprob.R index 2635f0f1..84c1c503 100755 --- a/R/calc_genoprob.R +++ b/R/calc_genoprob.R @@ -127,10 +127,7 @@ cat("\n No. genotypic classes: ", dim(x$probs)[1], "\n") } #' Create a map with pseudomarkers at a given step -#' -#' @param void internal function to be documented #' @keywords internal -#' @export create_map <- function(input.map, step = 0, phase.config = "best") { diff --git a/R/est_map_hmm.R b/R/est_map_hmm.R index 48bbc4a4..1067273d 100755 --- a/R/est_map_hmm.R +++ b/R/est_map_hmm.R @@ -1,1139 +1,1143 @@ -#' Multipoint analysis using Hidden Markov Models in autopolyploids -#' -#' Performs the multipoint analysis proposed by \cite{Mollinari and -#' Garcia (2019)} in a sequence of markers -#' -#' This function first enumerates a set of linkage phase configurations -#' based on two-point recombination fraction information using a threshold -#' provided by the user (argument \code{thresh}). After that, for each -#' configuration, it reconstructs the genetic map using the -#' HMM approach described in Mollinari and Garcia (2019). As result, it returns -#' the multipoint likelihood for each configuration in form of LOD Score comparing -#' each configuration to the most likely one. It is recommended to use a small number -#' of markers (e.g. 50 markers for hexaploids) since the possible linkage -#' phase combinations bounded only by the two-point information can be huge. -#' Also, it can be quite sensible to small changes in \code{'thresh'}. -#' For a large number of markers, please see \code{\link[mappoly]{est_rf_hmm_sequential}}. -# -#' @param input.seq an object of class \code{mappoly.sequence} -#' -#' @param input.ph an object of class \code{two.pts.linkage.phases}. -#' If not available (default = NULL), it will be computed -#' -#' @param thres LOD Score threshold used to determine if the linkage phases -#' compared via two-point analysis should be considered. Smaller -#' values will result in smaller number of linkage phase -#' configurations to be evaluated by the multipoint algorithm. -#' -#' @param twopt an object of class \code{mappoly.twopt} -#' containing two-point information -#' -#' @param verbose if \code{TRUE}, current progress is shown; if -#' \code{FALSE} (default), no output is produced -#' -#' @param tol the desired accuracy (default = 1e-04) -#' -#' @param est.given.0.rf logical. If TRUE returns a map forcing all -#' recombination fractions equals to 0 (1e-5, for internal use only. -#' Default = FALSE) -#' -#' @param reestimate.single.ph.configuration logical. If \code{TRUE} -#' returns a map without re-estimating the map parameters for cases -#' where there is only one possible linkage phase configuration. -#' This argument is intended to be used in a sequential map construction -#' -#' @param high.prec logical. If \code{TRUE} (default) uses high precision -#' long double numbers in the HMM procedure -#' -#' @param x an object of the class \code{mappoly.map} -#' -#' @param detailed logical. if TRUE, prints the linkage phase configuration and the marker -#' position for all maps. If FALSE (default), prints a map summary -#' -#' @param left.lim the left limit of the plot (in cM, default = 0). -#' -#' @param right.lim the right limit of the plot (in cM, default = Inf, i.e., -#' will print the entire map) -#' -#' @param phase logical. If \code{TRUE} (default) plots the phase configuration -#' for both parents -#' -#' @param mrk.names if TRUE, marker names are displayed (default = FALSE) -#' -#' @param cex The magnification to be used for marker names -#' -#' @param config should be \code{'best'} or the position of the -#' configuration to be plotted. If \code{'best'}, plot the configuration -#' with the highest likelihood -#' -#' @param P a string containing the name of parent P -#' -#' @param Q a string containing the name of parent Q -#' -#' @param xlim range of the x-axis. If \code{xlim = NULL} (default) it uses the -#' map range. -#' -#' @param ... currently ignored -#' -#' @return A list of class \code{mappoly.map} with two elements: -#' -#' i) info: a list containing information about the map, regardless of the linkage phase configuration: -#' \item{ploidy}{the ploidy level} -#' \item{n.mrk}{number of markers} -#' \item{seq.num}{a vector containing the (ordered) indices of markers in the map, -#' according to the input file} -#' \item{mrk.names}{the names of markers in the map} -#' \item{seq.dose.p1}{a vector containing the dosage in parent 1 for all markers in the map} -#' \item{seq.dose.p2}{a vector containing the dosage in parent 2 for all markers in the map} -#' \item{chrom}{a vector indicating the sequence (usually chromosome) each marker belongs -#' as informed in the input file. If not available, -#' \code{chrom = NULL}} -#' \item{genome.pos}{physical position (usually in megabase) of the markers into the sequence} -#' \item{seq.ref}{reference base used for each marker (i.e. A, T, C, G). If not available, -#' \code{seq.ref = NULL}} -#' \item{seq.alt}{alternative base used for each marker (i.e. A, T, C, G). If not available, -#' \code{seq.ref = NULL}} -#' \item{chisq.pval}{a vector containing p-values of the chi-squared test of Mendelian -#' segregation for all markers in the map} -#' \item{data.name}{name of the dataset of class \code{mappoly.data}} -#' \item{ph.thres}{the LOD threshold used to define the linkage phase configurations to test} -#' -#' ii) a list of maps with possible linkage phase configuration. Each map in the list is also a -#' list containing -#' \item{seq.num}{a vector containing the (ordered) indices of markers in the map, -#' according to the input file} -#' \item{seq.rf}{a vector of size (\code{n.mrk - 1}) containing a sequence of recombination -#' fraction between the adjacent markers in the map} -#' \item{seq.ph}{linkage phase configuration for all markers in both parents} -#' \item{loglike}{the hmm-based multipoint likelihood} -#' -#' @examples -#' mrk.subset <- make_seq_mappoly(hexafake, 1:10) -#' red.mrk <- elim_redundant(mrk.subset) -#' unique.mrks <- make_seq_mappoly(red.mrk) -#' subset.pairs <- est_pairwise_rf(input.seq = unique.mrks, -#' ncpus = 1, -#' verbose = TRUE) -#' -#' ## Estimating subset map with a low tolerance for the E.M. procedure -#' ## for CRAN testing purposes -#' subset.map <- est_rf_hmm(input.seq = unique.mrks, -#' thres = 2, -#' twopt = subset.pairs, -#' verbose = TRUE, -#' tol = 0.1, -#' est.given.0.rf = FALSE) -#' subset.map -#' ## linkage phase configuration with highest likelihood -#' plot(subset.map, mrk.names = TRUE, config = "best") -#' ## the second one -#' plot(subset.map, mrk.names = TRUE, config = 2) -#' -#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu} -#' -#' @references -#' Mollinari, M., and Garcia, A. A. F. (2019) Linkage -#' analysis and haplotype phasing in experimental autopolyploid -#' populations with high ploidy level using hidden Markov -#' models, _G3: Genes, Genomes, Genetics_. -#' https://doi.org/10.1534/g3.119.400378 -#' @rdname est_rf_hmm -#' @export est_rf_hmm -est_rf_hmm <- function(input.seq, - input.ph = NULL, - thres = 0.5, - twopt = NULL, - verbose = FALSE, - tol = 1e-04, - est.given.0.rf = FALSE, - reestimate.single.ph.configuration = TRUE, - high.prec = TRUE) { - - ## checking for correct object - input_classes <- c("mappoly.sequence", "two.pts.linkage.phases") - if (!inherits(input.seq, input_classes[1])) { - stop(deparse(substitute(input.seq)), " is not an object of class 'mappoly.sequence'") - } - if(length(input.seq$seq.num) == 1) - stop("Input sequence contains only one marker.", call. = FALSE) - if (is.null(input.ph)) { - if (is.null(twopt)) - stop("Two-point analysis not found!") - if (verbose) - cat("\nListing all configurations under threshold", thres, "using two-point information...\n") - input.ph <- ls_linkage_phases(input.seq = input.seq, thres = thres, twopt = twopt) - } - if (!inherits(input.ph, input_classes[2])) { - stop(deparse(substitute(input.ph)), " is not an object of class 'two.pts.linkage.phases'") - } - info.par <-detect_info_par(input.seq) - if(length(input.seq$seq.num) == 2 & !est.given.0.rf){ - maps <- vector("list", 1) - res.temp <- twopt$pairwise[[paste(sort(input.seq$seq.num), collapse = "-")]] - dp <- get(input.seq$data.name, pos = 1)$dosage.p1[input.seq$seq.num] - dq <- get(input.seq$data.name, pos = 1)$dosage.p2[input.seq$seq.num] - sh <- as.numeric(unlist(strsplit(rownames(res.temp)[1], split = "-"))) - res.ph <- list(P = c(list(0),list(0)), Q = c(list(0),list(0))) - if(dp[1] != 0) - res.ph$P[[1]] <- 1:dp[1] - if(dq[1] != 0) - res.ph$Q[[1]] <- 1:dq[1] - if(dp[2] != 0) - { - v <- (rev(res.ph$P[[1]])[1]+1):(dp[2]+rev(res.ph$P[[1]])[1]) - res.ph$P[[2]] <- v-sh[1] - } - if(dq[2] != 0) - { - v <- (rev(res.ph$Q[[1]])[1]+1):(dq[2]+rev(res.ph$Q[[1]])[1]) - res.ph$Q[[2]] <- v-sh[2] - } - names(res.ph$P) <- names(res.ph$Q) <- input.seq$seq.num - maps[[1]] <- list(seq.num = input.seq$seq.num, - seq.rf = res.temp[1,"rf"], - seq.ph = res.ph, - loglike = 0) - return(structure(list(info = list(ploidy = input.seq$ploidy, - n.mrk = length(input.seq$seq.num), - seq.num = input.seq$seq.num, - mrk.names = input.seq$seq.mrk.names, - seq.dose.p1 = get(input.seq$data.name, pos = 1)$dosage.p1[input.seq$seq.mrk.names], - seq.dose.p2 = get(input.seq$data.name, pos = 1)$dosage.p2[input.seq$seq.mrk.names], - chrom = get(input.seq$data.name, pos = 1)$chrom[input.seq$seq.mrk.names], - genome.pos = get(input.seq$data.name, pos = 1)$genome.pos[input.seq$seq.mrk.names], - seq.ref = get(input.seq$data.name, pos = 1)$seq.ref[input.seq$seq.mrk.names], - seq.alt = get(input.seq$data.name, pos = 1)$seq.alt[input.seq$seq.mrk.names], - chisq.pval = get(input.seq$data.name, pos = 1)$chisq.pval[input.seq$seq.mrk.names], - data.name = input.seq$data.name, - ph.thresh = abs(res.temp[2,1])), - maps = maps), - class = "mappoly.map")) - } - n.ph <- length(input.ph$config.to.test) - ret.map.no.rf.estimation <- FALSE - if(n.ph == 1 && !reestimate.single.ph.configuration) - ret.map.no.rf.estimation <- TRUE - maps <- vector("list", n.ph) - if (verbose){ - txt <- paste0(" ",n.ph, " phase(s): ") - cat(txt) - } - for (i in 1:n.ph) { - if (verbose) { - if((i+1)%%40 == 0) - cat("\n", paste0(rep(" ", nchar(txt)), collapse = ""), sep = "") - cat(". ") - } - if(est.given.0.rf){ - rf.temp <- rep(1e-5, length(input.seq$seq.num) - 1) - tol = 1 - } - else{ - rf.temp <- rep(0.001, length(input.seq$seq.num) - 1) - } - - ph <- list(P = input.ph$config.to.test[[i]]$P, - Q = input.ph$config.to.test[[i]]$Q) - - if(info.par == "both") - { - maps[[i]] <- est_rf_hmm_single_phase(input.seq = input.seq, - input.ph.single = ph, - rf.temp = rf.temp, - tol = tol, - verbose = FALSE, - ret.map.no.rf.estimation = ret.map.no.rf.estimation, - high.prec = high.prec) - } - else if (info.par == "p1") - { - maps[[i]] <- est_rf_hmm_single_phase_single_parent(input.seq, - input.ph.single = ph, - info.parent = 1, - uninfo.parent = 2, - rf.vec = rf.temp, - tol = tol, - verbose = FALSE, - ret.map.no.rf.estimation = ret.map.no.rf.estimation) - } - else if (info.par == "p2") - { - maps[[i]] <- est_rf_hmm_single_phase_single_parent(input.seq, - input.ph.single = ph, - info.parent = 2, - uninfo.parent = 1, - rf.vec = rf.temp, - tol = tol, - verbose = FALSE, - ret.map.no.rf.estimation = ret.map.no.rf.estimation) - - } - else stop("Should not get here.") - } - #cat("\n") - id <- order(sapply(maps, function(x) x$loglike), decreasing = TRUE) - maps <- maps[id] - if (verbose) - cat("\n") - structure(list(info = list(ploidy = input.seq$ploidy, - n.mrk = length(input.seq$seq.num), - seq.num = input.seq$seq.num, - mrk.names = input.seq$seq.mrk.names, - seq.dose.p1 = get(input.seq$data.name, pos = 1)$dosage.p1[input.seq$seq.mrk.names], - seq.dose.p2 = get(input.seq$data.name, pos = 1)$dosage.p2[input.seq$seq.mrk.names], - chrom = get(input.seq$data.name, pos = 1)$chrom[input.seq$seq.mrk.names], - genome.pos = get(input.seq$data.name, pos = 1)$genome.pos[input.seq$seq.mrk.names], - seq.ref = get(input.seq$data.name, pos = 1)$seq.ref[input.seq$seq.mrk.names], - seq.alt = get(input.seq$data.name, pos = 1)$seq.alt[input.seq$seq.mrk.names], - chisq.pval = get(input.seq$data.name, pos = 1)$chisq.pval[input.seq$seq.mrk.names], - data.name = input.seq$data.name, ph.thresh = input.ph$thres), - maps = maps), - class = "mappoly.map") -} - -#' Multipoint analysis using Hidden Markov Models: Sequential phase elimination -#' -#' Performs the multipoint analysis proposed by \cite{Mollinari and -#' Garcia (2019)} in a sequence of markers removing unlikely phases -#' using sequential multipoint information. -#' -#' This function sequentially includes markers into a map given an -#' ordered sequence. It uses two-point information to eliminate -#' unlikely linkage phase configurations given \code{thres.twopt}. The -#' search is made within a window of size \code{extend.tail}. For the -#' remaining configurations, the HMM-based likelihood is computed and -#' the ones that pass the HMM threshold (\code{thres.hmm}) are eliminated. -#' -#' @param input.seq an object of class \code{mappoly.sequence} -#' -#' @param twopt an object of class \code{mappoly.twopt} -#' containing the two-point information -#' -#' @param start.set number of markers to start the phasing procedure (default = 4) -#' -#' @param thres.twopt the LOD threshold used to determine if the linkage -#' phases compared via two-point analysis should be considered -#' for the search space reduction (A.K.A. \eqn{\eta} in -#' \cite{Mollinari and Garcia (2019)}, default = 5) -#' -#' @param thres.hmm the LOD threshold used to determine if the linkage -#' phases compared via hmm analysis should be evaluated in the -#' next round of marker inclusion (default = 50) -#' -#' @param extend.tail the length of the chain's tail that should -#' be used to calculate the likelihood of the map. If \code{NULL} (default), -#' the function uses all markers positioned. Even if \code{info.tail = TRUE}, -#' it uses at least \code{extend.tail} as the tail length -#' -#' @param phase.number.limit the maximum number of linkage phases of the sub-maps defined -#' by arguments \code{info.tail} and \code{extend.tail}. Default is 20. If the -#' size exceeds this limit, the marker will not be inserted. If -#' \code{Inf}, then it will insert all markers. -#' -#' @param sub.map.size.diff.limit the maximum accepted length -#' difference between the current and the previous sub-map defined -#' by arguments \code{info.tail} and \code{extend.tail}. If the -#' size exceeds this limit, the marker will not be inserted. If -#' \code{NULL}(default), then it will insert all markers. -#' -#' @param info.tail if \code{TRUE} (default), it uses the complete informative tail -#' of the chain (i.e. number of markers where all homologous -#' (\eqn{ploidy x 2}) can be distinguished) to calculate the map likelihood -#' -#'@param reestimate.single.ph.configuration logical. If \code{FALSE} (default) -#' returns a map without re-estimating the map parameters in cases -#' where there are only one possible linkage phase configuration -#' -#' @param tol the desired accuracy during the sequential phase (default = 10e-02) -#' -#' @param tol.final the desired accuracy for the final map (default = 10e-04) -#' -#' @param verbose If \code{TRUE} (default), current progress is shown; if -#' \code{FALSE}, no output is produced -#' -#' @param detailed.verbose If \code{TRUE}, the expansion of the current -#' submap is shown; -#' -#' @param high.prec logical. If \code{TRUE} uses high precision -#' (long double) numbers in the HMM procedure implemented in C++, -#' which can take a long time to perform (default = FALSE) -#' -#' @return A list of class \code{mappoly.map} with two elements: -#' -#' i) info: a list containing information about the map, regardless of the linkage phase configuration: -#' \item{ploidy}{the ploidy level} -#' \item{n.mrk}{number of markers} -#' \item{seq.num}{a vector containing the (ordered) indices of markers in the map, -#' according to the input file} -#' \item{mrk.names}{the names of markers in the map} -#' \item{seq.dose.p1}{a vector containing the dosage in parent 1 for all markers in the map} -#' \item{seq.dose.p2}{a vector containing the dosage in parent 2 for all markers in the map} -#' \item{chrom}{a vector indicating the sequence (usually chromosome) each marker belongs -#' as informed in the input file. If not available, -#' \code{chrom = NULL}} -#' \item{genome.pos}{physical position (usually in megabase) of the markers into the sequence} -#' \item{seq.ref}{reference base used for each marker (i.e. A, T, C, G). If not available, -#' \code{seq.ref = NULL}} -#' \item{seq.alt}{alternative base used for each marker (i.e. A, T, C, G). If not available, -#' \code{seq.ref = NULL}} -#' \item{chisq.pval}{a vector containing p-values of the chi-squared test of Mendelian -#' segregation for all markers in the map} -#' \item{data.name}{name of the dataset of class \code{mappoly.data}} -#' \item{ph.thres}{the LOD threshold used to define the linkage phase configurations to test} -#' -#' ii) a list of maps with possible linkage phase configuration. Each map in the list is also a -#' list containing -#' \item{seq.num}{a vector containing the (ordered) indices of markers in the map, -#' according to the input file} -#' \item{seq.rf}{a vector of size (\code{n.mrk - 1}) containing a sequence of recombination -#' fraction between the adjacent markers in the map} -#' \item{seq.ph}{linkage phase configuration for all markers in both parents} -#' \item{loglike}{the hmm-based multipoint likelihood} -#' -#' @examples -#' \donttest{ -#' mrk.subset <- make_seq_mappoly(hexafake, 1:20) -#' red.mrk <- elim_redundant(mrk.subset) -#' unique.mrks <- make_seq_mappoly(red.mrk) -#' subset.pairs <- est_pairwise_rf(input.seq = unique.mrks, -#' ncpus = 1, -#' verbose = TRUE) -#' subset.map <- est_rf_hmm_sequential(input.seq = unique.mrks, -#' thres.twopt = 5, -#' thres.hmm = 10, -#' extend.tail = 10, -#' tol = 0.1, -#' tol.final = 10e-3, -#' phase.number.limit = 5, -#' twopt = subset.pairs, -#' verbose = TRUE) -#' print(subset.map, detailed = TRUE) -#' plot(subset.map) -#' plot(subset.map, left.lim = 0, right.lim = 1, mrk.names = TRUE) -#' plot(subset.map, phase = FALSE) -#' -#' ## Retrieving simulated linkage phase -#' ph.P <- maps.hexafake[[1]]$maps[[1]]$seq.ph$P -#' ph.Q <- maps.hexafake[[1]]$maps[[1]]$seq.ph$Q -#' ## Estimated linkage phase -#' ph.P.est <- subset.map$maps[[1]]$seq.ph$P -#' ph.Q.est <- subset.map$maps[[1]]$seq.ph$Q -#' compare_haplotypes(ploidy = 6, h1 = ph.P[names(ph.P.est)], h2 = ph.P.est) -#' compare_haplotypes(ploidy = 6, h1 = ph.Q[names(ph.Q.est)], h2 = ph.Q.est) -#' } -#' -#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu} -#' -#' @references -#' Mollinari, M., and Garcia, A. A. F. (2019) Linkage -#' analysis and haplotype phasing in experimental autopolyploid -#' populations with high ploidy level using hidden Markov -#' models, _G3: Genes, Genomes, Genetics_. -#' \doi{10.1534/g3.119.400378} -#' -#' @importFrom utils head -#' @importFrom cli rule -#' @export est_rf_hmm_sequential -est_rf_hmm_sequential <- function(input.seq, - twopt, - start.set = 4, - thres.twopt = 5, - thres.hmm = 50, - extend.tail = NULL, - phase.number.limit = 20, - sub.map.size.diff.limit = Inf, - info.tail = TRUE, - reestimate.single.ph.configuration = FALSE, - tol = 10e-2, - tol.final = 10e-4, - verbose = TRUE, - detailed.verbose = FALSE, - high.prec = FALSE) -{## checking for correct object - input_classes <- c("mappoly.sequence", "mappoly.twopt") - if (!inherits(input.seq, input_classes[1])) { - stop(deparse(substitute(input.seq)), - " is not an object of class 'mappoly.sequence'") - } - if (!inherits(twopt, input_classes[2])) { - stop(deparse(substitute(twopt)), - " is not an object of class 'mappoly.twopt'") - } - ## Information about the map - if(verbose) { - cli::cat_line("Number of markers: ", length(input.seq$seq.num)) - msg("Initial sequence", line = 2) - } - if(is.null(extend.tail)) - extend.tail <- length(input.seq$seq.num) - ##### Map in case of two markers ##### - if(length(input.seq$seq.num) == 2) - { - cur.map <- est_rf_hmm(input.seq = input.seq, thres = thres.twopt, - twopt = twopt, tol = tol.final, high.prec = high.prec, - verbose = verbose, - reestimate.single.ph.configuration = reestimate.single.ph.configuration) - return(filter_map_at_hmm_thres(cur.map, thres.hmm)) - } - ##### More than 3 markers #### - ##Starting sequential algorithm for maps with more than 3 markers - ## First step: test all possible phase configurations under - ## a 'thres.twopt' threshold for a sequence of size 'start.set' - if(start.set > length(input.seq$seq.num)) - start.set <- length(input.seq$seq.num) - if(verbose) cat(start.set, " markers...\n", sep = "") - cte <- 0 - rf.temp <- c(Inf, Inf) - while(any(rf.temp >= sub.map.size.diff.limit)) - { - cte <- cte+1 - if(length(rf.temp) == 1) { - ## cat("Impossible build a map using the given thresholds\n") - ## stop() - stop("Impossible build a map using the given thresholds\n") - } - if(verbose) cat(cli::symbol$bullet, " Trying sequence:", cte:(start.set+cte-1), ":\n") - cur.seq <- make_seq_mappoly(input.obj = get(input.seq$data.name, pos = 1), as.vector(na.omit(input.seq$seq.num[cte:(start.set+cte-1)])), data.name = input.seq$data.name) - input.ph <- ls_linkage_phases(input.seq = cur.seq, - thres = thres.twopt, - twopt = twopt) - cur.map <- est_rf_hmm(input.seq = cur.seq, thres = thres.twopt, - twopt = twopt, tol = tol, high.prec = high.prec, - verbose = verbose, input.ph = input.ph, - reestimate.single.ph.configuration = reestimate.single.ph.configuration) - cur.map <- filter_map_at_hmm_thres(cur.map, thres.hmm) - cur.map <- reest_rf(cur.map, tol = tol.final, verbose = FALSE) - rf.temp <- imf_h(cur.map$maps[[1]]$seq.rf) - } - if(verbose) - msg("Done with initial sequence", line = 2) - if(start.set >= length(input.seq$seq.num)){ - if(verbose) cat("\nDone phasing", cur.map$info$n.mrk, "markers\nReestimating final recombination fractions.") - return(reest_rf(cur.map, tol = tol.final)) - } - ##### More than 'start.set' markers ##### - ## For maps with more markers than 'start.set', include - ## next makers in a sequential fashion - ct <- start.set+cte - all.ph <- update_ph_list_at_hmm_thres(cur.map, thres.hmm) - if(sub.map.size.diff.limit != Inf & !reestimate.single.ph.configuration){ - if (verbose) message("Making 'reestimate.single.ph.configuration = TRUE' to use map expansion") - reestimate.single.ph.configuration <- TRUE - } - while(ct <= length(input.seq$seq.num)) - { - ## Number of multipoint phases evaluated in the - ## previous round of marker insertion - hmm.phase.number <- length(cur.map$maps) - ## Get the map tail containing sufficient markers to - ## distinguish among the ploidy*2 possible homologs. - if(info.tail) - tail.temp <- get_full_info_tail(input.obj = cur.map) - input.ph <- NULL - ## for each linkage phase inherited from HMM - ## analysis from previous round, evaluate the - ## possible linkage phases for the marker inserted - ## in the current round using two-point information - ## under the threshold of thres.twopt - for(i in 1:length(all.ph$config.to.test)) - { - seq.num <- NULL - if(info.tail) - seq.num <- tail.temp$maps[[i]]$seq.num - if(length(seq.num) < extend.tail) - seq.num <- tail(cur.map$maps[[i]]$seq.num, extend.tail) - ## If the tail do not contain the marker responsible for carrying - ## multiple linkage phases through the rounds of insertion, - ## extend the tail so it contains that marker - if(max(check_ls_phase(all.ph)) >= length(seq.num)){ - seq.num <- tail(cur.map$maps[[i]]$seq.num, max(check_ls_phase(all.ph)) + start.set) - } - seq.cur <- make_seq_mappoly(input.obj = get(input.seq$data.name, pos = 1), - arg = seq.num, - data.name = input.seq$data.name) - all.ph.temp <- get_ph_list_subset(all.ph, seq.num, i) - input.ph.temp <- ls_linkage_phases(input.seq = seq.cur, - thres = thres.twopt, - twopt = twopt, - mrk.to.add = input.seq$seq.num[ct], - prev.info = all.ph.temp) - input.ph <- concatenate_ph_list(ph.list.1 = input.ph, ph.list.2 = input.ph.temp) - } - ## This is the number of linkage phases to be tested - ## combining HMM phases from previous round of mrk - ## insertion and the linkage phases from the two-point - ## information obtained in the current round - twopt.phase.number <- length(input.ph$config.to.test) - ## If this number is higher than phase.number.limit, - ## proceed to the next iteration - if(length(input.ph$config.to.test) > phase.number.limit) { - if(verbose) - cat(crayon::italic$yellow(paste0(ct ,": not included (*linkage phases*)\n", sep = ""))) - ct <- ct + 1 - next() - } - ## Appending the marker to the numeric - ## sequence and making a new sequence - seq.num <- c(seq.num, input.seq$seq.num[ct]) - seq.cur <- make_seq_mappoly(input.obj = get(input.seq$data.name, pos = 1), - arg = seq.num, - data.name = input.seq$data.name) - if(verbose) - cat_phase(input.seq, input.ph, all.ph, ct, seq.num, twopt.phase.number, hmm.phase.number) - ## Evaluation all maps using HMM - cur.map.temp <- est_rf_hmm(input.seq = seq.cur, - input.ph = input.ph, - twopt = twopt, - tol = tol, - verbose = FALSE, - reestimate.single.ph.configuration = reestimate.single.ph.configuration, - high.prec = high.prec) - cur.map.temp$maps <- unique(cur.map.temp$maps) - ## Filtering linkage phase configurations under a HMM LOD threshold - cur.map.temp <- filter_map_at_hmm_thres(cur.map.temp, thres.hmm) - - ## Gathering linkage phases of the current map, excluding the marker inserted - ## in the current round - ph.new <- lapply(cur.map.temp$maps, function(x) list(P = head(x$seq.ph$P, -1), - Q = head(x$seq.ph$Q, -1))) - ## Gathering linkage phases of the previous map, excluding the marker inserted - ## in the current round - ph.old <- lapply(cur.map$maps, function(x, id) list(P = x$seq.ph$P[id], - Q = x$seq.ph$Q[id]), id = names(ph.new[[1]]$P)) - - ## Check in which whole phase configurations the new - ## HMM tail should be appended - MQ <- MP <- matrix(NA, length(ph.old), length(ph.new)) - for(j1 in 1:nrow(MP)){ - for(j2 in 1:ncol(MP)){ - MP[j1, j2] <- identical(ph.old[[j1]]$P, ph.new[[j2]]$P) - MQ[j1, j2] <- identical(ph.old[[j1]]$Q, ph.new[[j2]]$Q) - } - } - M <- which(MP & MQ, arr.ind = TRUE) - colnames(M) <- c("old", "new") - ## Checking for quality (map size and LOD Score) - submap.length.old <- sapply(cur.map$maps, function(x) sum(imf_h(x$seq.rf)))[M[,1]] - last.dist.old <- sapply(cur.map$maps, function(x) tail(imf_h(x$seq.rf),1))[M[,1]] - submap.length.new <- sapply(cur.map.temp$maps, function(x) sum(imf_h(x$seq.rf)))[M[,2]] - last.dist.new <- sapply(cur.map.temp$maps, function(x) tail(imf_h(x$seq.rf),1))[M[,2]] - last.mrk.expansion <- submap.expansion <- numeric(nrow(M)) - submap.expansion <- submap.length.new - submap.length.old - last.mrk.expansion <- last.dist.new - last.dist.old - - if(length(M[,2]) == 0) { - if(verbose) cat(crayon::italic$yellow(paste0(ct ,": not included (~map extension~)\n", sep = ""))) - ct <- ct + 1 - next() - } - - cur.map.temp$maps <- cur.map.temp$maps[M[,2]] - LOD <- get_LOD(cur.map.temp, sorted = FALSE) - if(sub.map.size.diff.limit != Inf){ - selected.map <- submap.expansion < sub.map.size.diff.limit & LOD < thres.hmm - if(detailed.verbose){ - x <- round(cbind(submap.length.new, submap.expansion, last.mrk.expansion),2) - for(j1 in 1:nrow(x)){ - cat(" ", x[j1,1], ": (", x[j1,2], "/", x[j1,3],")", sep = "") - if(j1 != nrow(x)) cat("\n") - } - if(all(!selected.map)) cat(paste0(crayon::red(cli::symbol$cross), "\n")) - else cat(paste0(crayon::green(cli::symbol$tick), "\n")) - } - if(all(!selected.map)){ - if(verbose) cat(crayon::italic$yellow(paste0(ct ,": not included (~map extension~)\n", sep = ""))) - ct <- ct + 1 - next() - } - } else { selected.map <- LOD < thres.hmm } - all.ph.temp <- update_ph_list_at_hmm_thres(cur.map.temp, Inf) - id <- which(selected.map & which(!sapply(cur.map.temp$maps, is.null))) - if(length(id) == 0){ - if(verbose) cat(crayon::italic$yellow(paste0(ct ,": not included (-linkage phases-)\n", sep = ""))) - ct <- ct + 1 - next() - } - cur.map.temp$maps <- cur.map.temp$maps[id] - if(any(unique(M[id,2,drop = FALSE]) > length(all.ph.temp$config.to.test))){ - if(verbose) cat(crayon::italic$yellow(paste0(ct ,": not included (-linkage phases-)\n", sep = ""))) - ct <- ct + 1 - next() - } - if(length(id) > nrow(M)) - all.ph.temp <- add_mrk_at_tail_ph_list(all.ph, all.ph.temp, M[,,drop = FALSE]) - else if(length(id) == nrow(M)){ - M[,2] <- id - all.ph.temp <- add_mrk_at_tail_ph_list(all.ph, all.ph.temp, M[,,drop = FALSE]) - } else { - all.ph.temp <- add_mrk_at_tail_ph_list(all.ph, all.ph.temp, M[id,,drop = FALSE]) - } - all.ph <- all.ph.temp - cur.map <- cur.map.temp - ct <- ct + 1 - } - ##### Re-estimating final map #### - ## Re-estimating final map with higher tolerance - seq.final <- make_seq_mappoly(input.obj = get(input.seq$data.name, pos = 1), - arg = as.numeric(names(all.ph$config.to.test[[1]]$P)), - data.name = input.seq$data.name) - #msg(paste("Done phasing", length(seq.final$seq.num), "markers")) - if (verbose){ - msg("Reestimating final recombination fractions", line = 2) - cat("") - } - final.map <- est_rf_hmm(input.seq = seq.final, - input.ph = all.ph, - twopt = twopt, - tol = tol.final, - verbose = FALSE, - reestimate.single.ph.configuration = TRUE, - high.prec = TRUE) - if(verbose) { - #cat("\n------------------------------------------") - cat("Markers in the initial sequence: ", length(input.seq$seq.num), sep = "") - cat("\nMapped markers : ", final.map$info$n.mrk, " (", - round(100*final.map$info$n.mrk/length(input.seq$seq.num),1) ,"%)\n", sep = "") - msg("", line = 2) - } - return(final.map) -} - -#' @rdname est_rf_hmm -#' @export -print.mappoly.map <- function(x, detailed = FALSE, ...) { - cat("This is an object of class 'mappoly.map'\n") - cat(" Ploidy level:\t", x$info$ploidy, "\n") - cat(" No. individuals:\t", get(x$info$data.name)$n.ind, "\n") - cat(" No. markers:\t", x$info$n.mrk, "\n") - cat(" No. linkage phases:\t", length(x$maps), "\n") - l <- sapply(x$maps, function(x) x$loglike) - LOD <- round(l - max(l), 2) - if(detailed) - { - for (j in 1:length(x$maps)) { - cat("\n ---------------------------------------------\n") - cat(" Linkage phase configuration: ", j) - cat("\n log-likelihood:\t", x$map[[j]]$loglike) - cat("\n LOD:\t\t", format(round(LOD[j], 2), digits = 2)) - cat("\n\n") - M <- matrix("|", x$info$n.mrk, x$info$ploidy * 2) - M <- cbind(M, " ") - M <- cbind(M, format(round(cumsum(c(0, imf_h(x$maps[[j]]$seq.rf))), 1), digits = 1)) - for (i in 1:x$info$n.mrk) { - if (all(x$maps[[j]]$seq.ph$Q[[i]] != 0)) - M[i, c(x$maps[[j]]$seq.ph$P[[i]], x$maps[[j]]$seq.ph$Q[[i]] + x$info$ploidy)] <- "o" else M[i, x$maps[[j]]$seq.ph$P[[i]]] <- "o" - } - M <- cbind(get(x$info$data.name)$mrk.names[x$maps[[j]]$seq.num], M) - big.name <- max(nchar(M[,1])) - format_name <- function(y, big.name){ - paste0(y, paste0(rep(" ", big.name-nchar(y)), collapse = "")) - } - M <- rbind(c("", letters[1:(ncol(M)-3)], "", ""), M) - format(apply(M, 1, function(y) cat(c("\t", format_name(y[1], big.name), "\t", y[2:(x$info$ploidy + 1)], rep(" ", 4), y[(x$info$ploidy + 2):(x$info$ploidy * 2 + 3)], "\n"), collapse = ""))) - } - } else { - cat("\n ---------------------------------------------\n") - cat(" Number of linkage phase configurations: ", length(x$maps)) - cat("\n ---------------------------------------------\n") - for (j in 1:length(x$maps)) { - cat(" Linkage phase configuration: ", j) - cat("\n map length:\t", format(round(sum(imf_h(x$map[[j]]$seq.rf)), 2))) - cat("\n log-likelihood:\t", format(round(x$map[[j]]$loglike, 2))) - cat("\n LOD:\t\t", format(round(LOD[j], 2), digits = 2)) - cat("\n ~~~~~~~~~~~~~~~~~~\n") - } - } -} - -#' @rdname est_rf_hmm -#' @importFrom grDevices rgb -#' @importFrom graphics rect -#' @export -plot.mappoly.map <- function(x, left.lim = 0, right.lim = Inf, - phase = TRUE, mrk.names = FALSE, - cex = 1, config = "best", P = "Parent 1", - Q = "Parent 2", xlim = NULL, ...) { - oldpar <- par(no.readonly = TRUE) - on.exit(par(oldpar)) - if(phase){ - map.info <- prepare_map(x, config) - if(any(map.info$ph.p == "B")){ - var.col <- c("black", "darkgray") - var.col <- var.col[1:2] - names(var.col) <- c("A", "B") - } else { - var.col <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3") - names(var.col) <- c("A", "T", "C", "G") - } - ploidy <- map.info$ploidy - if(ploidy == 2) { - d.col <- c(NA, "#1B9E77", "#D95F02") - }else if(ploidy == 4){ - d.col <- c(NA, "#1B9E77", "#D95F02", "#7570B3", "#E7298A") - }else if(ploidy == 6){ - d.col <- c(NA, "#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E", "#E6AB02") - }else if(ploidy == 8){ - d.col <- c(NA, "#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E", "#E6AB02", "#A6761D", "#666666") - } else d.col <- c(NA, gg_color_hue(ploidy)) - names(d.col) <- 0:ploidy - d.col[1] <- NA - x <- map.info$map - lab <- names(x) - zy <- seq(0, 0.5, length.out = ploidy) + 1.5 - pp <- map.info$ph.p - pq <- map.info$ph.q - dp <- map.info$dp - dq <- map.info$dq - x1 <- abs(left.lim - x) - x2 <- abs(right.lim - x) - id.left <- which(x1 == min(x1))[1] - id.right <- rev(which(x2 == min(x2)))[1] - par(mai = c(1,0.15,0,0), mar = c(4.5,2,1,2)) - curx <- x[id.left:id.right] - layout(mat = matrix(c(4,2,3, 1), ncol = 2), heights = c(2, 10), widths = c(1, 10)) - if(is.null(xlim)) - xlim <- range(curx) - plot(x = curx, - y = rep(.5,length(curx)), - type = "n" , - ylim = c(.25, 4.5), - axes = FALSE, - xlab = "Distance (cM)", - ylab = "", - xlim = xlim) - lines(c(x[id.left], x[id.right]), c(.5, .5), lwd = 15, col = "gray") - points(x = curx, - y = rep(.5,length(curx)), - xlab = "", ylab = "", - pch = "|", cex = 1.5, - ylim = c(0,2)) - axis(side = 1) - diplocol1 <- c("#4DAF4A", "#984EA3") - diplocol2 <- c("#E41A1C", "#377EB8") - #Parent 2 - x1 <- seq(x[id.left], x[id.right], length.out = length(curx)) - x.control <- diff(x1[1:2])/2 - if(length(x1) < 150) - x.control <- x.control * .8 - if(length(x1) < 100) - x.control <- x.control * .8 - if(length(x1) < 75) - x.control <- x.control * .8 - if(length(x1) < 50) - x.control <- x.control * .8 - if(length(x1) < 25) - x.control <- x.control * .8 - for(i in 1:ploidy) - { - if(ploidy == 2 && any(map.info$ph.p == "B")){ - lines(range(x1), c(zy[i], zy[i]), lwd = 15, col = diplocol1[i]) - } else { - lines(range(x1), c(zy[i], zy[i]), lwd = 8, col = "gray") - } - y1 <- rep(zy[i], length(curx)) - pal <- var.col[pq[id.left:id.right,i]] - rect(xleft = x1 - x.control, ybottom = y1 -.05, xright = x1 + x.control, ytop = y1 +.05, col = pal, border = NA) - } - #connecting allelic variants to markers - for(i in 1:length(x1)) - lines(c(curx[i], x1[i]), c(0.575, zy[1]-.05), lwd = 0.2) - points(x = x1, - y = zy[ploidy]+0.05+dq[id.left:id.right]/20, - col = d.col[as.character(dq[id.left:id.right])], - pch = 19, cex = .7) - #Parent 1 - zy <- zy + 1.1 - for(i in 1:ploidy) - { - if(ploidy == 2 && any(map.info$ph.p == "B")){ - lines(range(x1), c(zy[i], zy[i]), lwd = 12, col = diplocol2[i]) - } else { - lines(range(x1), c(zy[i], zy[i]), lwd = 8, col = "gray") - } - y1 <- rep(zy[i], length(curx)) - pal <- var.col[pp[id.left:id.right,i]] - rect(xleft = x1 - x.control, ybottom = y1 -.05, xright = x1 + x.control, ytop = y1 +.05, col = pal, border = NA) - } - points(x = x1, - y = zy[ploidy]+0.05+dp[id.left:id.right]/20, - col = d.col[as.character(dp[id.left:id.right])], - pch = 19, cex = .7) - if(mrk.names) - text(x = x1, - y = rep(zy[ploidy]+0.05+.3, length(curx)), - labels = names(curx), - srt = 90, adj = 0, cex = cex) - par(mar = c(4.5,1,1,0), xpd = TRUE) - plot(x = 0, - y = 0, - type = "n" , - axes = FALSE, - ylab = "", - xlab = "", - ylim = c(.25, 4.5)) - zy <- zy - 1.1 - mtext(text = Q, side = 4, at = mean(zy), line = -1, font = 4) - for(i in 1:ploidy) - mtext(letters[(ploidy+1):(2*ploidy)][i], line = 0, at = zy[i], side = 4) - zy <- zy + 1.1 - mtext(text = P, side = 4, at = mean(zy), line = -1, font = 4) - for(i in 1:ploidy) - mtext(letters[1:ploidy][i], line = 0, at = zy[i], side = 4) - par(mar = c(0,1,2,4), xpd = FALSE) - plot(x = curx, - y = rep(.5,length(curx)), - type = "n" , - axes = FALSE, - xlab = "", - ylab = "") - if(any(map.info$ph.p == "B")){ - legend("bottomleft", legend = c("A", "B"), - fill = c(var.col), title = "Variants", - box.lty = 0, bg = "transparent", ncol = 2) - } else { - legend("bottomleft", legend = c("A", "T", "C", "G", "-"), - fill = c(var.col, "white"), title = "Nucleotides", - box.lty = 0, bg = "transparent", ncol = 4) - } - legend("bottomright", legend = names(d.col)[-1], title = "Doses" , - col = d.col[-1], ncol = ploidy/2, pch = 19, - box.lty = 0) - } else { - plot_map_list(x) - } -} - -#' prepare maps for plot -#' @param void internal function to be documented -#' @keywords internal -prepare_map <- function(input.map, config = "best"){ - if (!inherits(input.map, "mappoly.map")) { - stop(deparse(substitute(input.map)), " is not an object of class 'mappoly.map'") - } - ## Choosing the linkage phase configuration - LOD.conf <- get_LOD(input.map, sorted = FALSE) - if(config == "best") { - i.lpc <- which.min(LOD.conf) - } else if(config == "all"){ - i.lpc <- seq_along(LOD.conf) } else if (config > length(LOD.conf)) { - stop("invalid linkage phase configuration") - } else i.lpc <- config - ## Gathering marker positions - map <- cumsum(imf_h(c(0, input.map$maps[[i.lpc]]$seq.rf))) - names(map) <- input.map$info$mrk.names - ## - ph.p <- ph_list_to_matrix(input.map$maps[[i.lpc]]$seq.ph$P, input.map$info$ploidy) - ph.q <- ph_list_to_matrix(input.map$maps[[i.lpc]]$seq.ph$Q, input.map$info$ploidy) - dimnames(ph.p) <- list(names(map), letters[1:input.map$info$ploidy]) - dimnames(ph.q) <- list(names(map), letters[(1+input.map$info$ploidy):(2*input.map$info$ploidy)]) - if(is.null(input.map$info$seq.alt)) - { - ph.p[ph.p == 1] <- ph.q[ph.q == 1] <- "A" - ph.p[ph.p == 0] <- ph.q[ph.q == 0] <- "B" - } else { - for(i in input.map$info$mrk.names){ - ph.p[i, ph.p[i,] == 1] <- input.map$info$seq.alt[i] - ph.p[i, ph.p[i,] == 0] <- input.map$info$seq.ref[i] - ph.q[i, ph.q[i,] == 1] <- input.map$info$seq.alt[i] - ph.q[i, ph.q[i,] == 0] <- input.map$info$seq.ref[i] - } - } - dp <- input.map$info$seq.dose.p1 - dq <- input.map$info$seq.dose.p2 - list(ploidy = input.map$info$ploidy, map = map, ph.p = ph.p, ph.q = ph.q, dp = dp, dq = dq) -} - -#' Get the tail of a marker sequence up to the point where the markers -#' provide no additional information. -#' -#' @param void internal function to be documented -#' @keywords internal -get_full_info_tail <- function(input.obj, extend = NULL) { - ## checking for correct object - if(!inherits(input.obj, "mappoly.map")) - stop(deparse(substitute(input.obj)), " is not an object of class 'mappoly.map''") - if (!is.null(extend)) - if (extend > input.obj$info$n.mrk) - return(input.obj) - ploidy <- input.obj$info$ploidy - i <- ploidy/2 - w1 <- ph_list_to_matrix(input.obj$maps[[1]]$seq.ph$P, ploidy) - max1 <- length(unique(apply(w1, 2, paste0, collapse = ""))) - w2 <- ph_list_to_matrix(input.obj$maps[[1]]$seq.ph$Q, ploidy) - max2 <- length(unique(apply(w2, 2, paste0, collapse = ""))) - while (i < input.obj$info$n.mrk) { - wp <- ph_list_to_matrix(tail(input.obj$maps[[1]]$seq.ph$P, i), ploidy) - xp <- apply(wp, 2, paste, collapse = "-") - wq <- ph_list_to_matrix(tail(input.obj$maps[[1]]$seq.ph$Q, i), ploidy) - xq <- apply(wq, 2, paste, collapse = "-") - if (length(unique(xp)) == max1 && length(unique(xq)) == max2) - break() - i <- i + 1 - } - if (!is.null(extend)) - if (i < extend) - i <- extend - input.obj$info$n.mrk <- i - - - - input.obj$info$seq.num <- tail(input.obj$info$seq.num, i) - input.obj$info$mrk.names <- tail(input.obj$info$mrk.names, i) - input.obj$info$seq.dose.p1 <- tail(input.obj$info$seq.dose.p1, i) - input.obj$info$seq.dose.p2 <- tail(input.obj$info$seq.dose.p2, i) - input.obj$info$chrom <- tail(input.obj$info$chrom, i) - input.obj$info$genome.pos <- tail(input.obj$info$genome.pos, i) - input.obj$info$chisq.pval <- tail(input.obj$info$chisq.pval, i) - - for (j in 1:length(input.obj$maps)) { - input.obj$maps[[j]]$loglike <- 0 - input.obj$maps[[j]]$seq.ph$P <- tail(input.obj$maps[[j]]$seq.ph$P, n = i) - input.obj$maps[[j]]$seq.ph$Q <- tail(input.obj$maps[[j]]$seq.ph$Q, n = i) - input.obj$maps[[j]]$seq.num <- tail(input.obj$maps[[j]]$seq.num, n = i) - input.obj$maps[[j]]$seq.rf <- tail(input.obj$maps[[j]]$seq.rf, n = (i - 1)) - } - return(input.obj) -} - -#' remove maps under a certain threshold -#' @param void internal function to be documented -#' @keywords internal -#' @export filter_map_at_hmm_thres -filter_map_at_hmm_thres <- function(map, thres.hmm){ - map$info$ph.thresh <- NULL - map$maps <- map$maps[get_LOD(map, sorted = FALSE) < thres.hmm] - map -} - -#' makes a phase list from map, selecting only -#' configurations under a certain threshold -#' @param void internal function to be documented -#' @keywords internal -update_ph_list_at_hmm_thres <- function(map, thres.hmm){ - temp.map <- filter_map_at_hmm_thres(map, thres.hmm) - config.to.test <- lapply(temp.map$maps, function(x) x$seq.ph) - structure(list(config.to.test = config.to.test, - ploidy = map$info$ploidy, seq.num = map$maps[[1]]$seq.num, - thres = map$info$ph.thresh, data.name = map$info$data.name, - thres.hmm = thres.hmm), - class = "two.pts.linkage.phases") -} - -#' subset of a linkage phase list -#' @param void internal function to be documented -#' @keywords internal -get_ph_list_subset <- function(ph.list, seq.num, conf){ - config.to.test <- list(lapply(ph.list$config.to.test[[conf]], function(x, seq.num) x[as.character(seq.num)], seq.num)) - structure(list(config.to.test = config.to.test, - ploidy = ph.list$ploidy, seq.num = ph.list$seq.num, - thres = ph.list$ph.thresh, data.name = ph.list$data.name, - thres.hmm = ph.list$thres.hmm), - class = "two.pts.linkage.phases") -} - -#' concatenate two linkage phase lists -#' @param void internal function to be documented -#' @keywords internal -concatenate_ph_list <- function(ph.list.1, ph.list.2){ - if(length(ph.list.1) == 0) - return(ph.list.2) - config.to.test <- c(ph.list.1$config.to.test, ph.list.2$config.to.test) - id <- which(!duplicated(config.to.test)) - config.to.test <- config.to.test[id] - structure(list(config.to.test = config.to.test, - ploidy = ph.list.1$ploidy, seq.num = ph.list.1$seq.num, - thres = ph.list.1$ph.thresh, data.name = ph.list.1$data.name, - thres.hmm = ph.list.1$thres.hmm), - class = "two.pts.linkage.phases") -} - -#' add a single marker at the tail of a linkage phase list -#' @param void internal function -#' @keywords internal -add_mrk_at_tail_ph_list <- function(ph.list.1, ph.list.2, cor.index){ - config.to.test <- vector("list", length = nrow(cor.index)) - for(i in 1:nrow(cor.index)){ - config.to.test[[i]] <- list(P = c(ph.list.1$config.to.test[[cor.index[i,1]]]$P, tail(ph.list.2$config.to.test[[cor.index[i,2]]]$P,1)), - Q = c(ph.list.1$config.to.test[[cor.index[i,1]]]$Q, tail(ph.list.2$config.to.test[[cor.index[i,2]]]$Q,1))) - } - structure(list(config.to.test = config.to.test, - ploidy = ph.list.1$ploidy, seq.num = ph.list.1$seq.num, - thres = ph.list.1$ph.thresh, data.name = ph.list.1$data.name, - thres.hmm = ph.list.1$thres.hmm), - class = "two.pts.linkage.phases") -} - -#' Compare a list of linkage phases and return the -#' markers for which they are different. -#' @param void internal function to be documented -#' @keywords internal -check_ls_phase <- function(ph){ - if(length(ph$config.to.test) == 1) return(0) - id <- rep(1, length(ph$config.to.test[[1]]$P)) - for(i in 1:length(ph$config.to.test[[1]]$P)){ - w <- ph$config.to.test[[1]]$P[i] - for(j in 2:length(ph$config.to.test)) - id[i] <- id[i] + identical(w, ph$config.to.test[[j]]$P[i]) - } - names(id) <- names((ph$config.to.test[[1]]$P)) - w <- length(ph$config.to.test[[1]]$P) - which(id < length(ph$config.to.test)) - if(length(w) == 0) return(0) - w -} - -#' cat for graphical representation of the phases -#' @param void internal function to be documented -#' @keywords internal -print_ph <- function(input.ph){ - phs.P <- lapply(input.ph$config.to.test, - function(x, ploidy) { - M <- matrix("|", nrow = 1, ncol = ploidy) - M[unlist(tail(x$P, 1))] <- crayon::red(cli::symbol$bullet) - paste(M, collapse = "")}, - ploidy = input.ph$ploidy) - phs.Q <- lapply(input.ph$config.to.test, - function(x, ploidy) { - M <- matrix("|", nrow = 1, ncol = ploidy) - M[unlist(tail(x$Q, 1))] <- crayon::cyan(cli::symbol$bullet) - paste(M, collapse = "")}, - ploidy = input.ph$ploidy) - if(length(phs.P) == 1) - return(paste(unlist(phs.P)[1], unlist(phs.Q)[1], " ")) - if(length(phs.P) == 2) - return(paste(unlist(phs.P)[1], unlist(phs.Q)[1], " ", unlist(phs.P)[2], unlist(phs.Q)[2])) - if(length(phs.P) > 2) - return(paste(unlist(phs.P)[1], unlist(phs.Q)[1], " ... ", unlist(phs.P)[2], unlist(phs.Q)[2])) -} - -#' cat for phase information -#' @param void internal function to be documented -#' @keywords internal -cat_phase <- function(input.seq, - input.ph, - all.ph, - ct, - seq.num, - twopt.phase.number, - hmm.phase.number){ - pc <- round(100*ct/length(input.seq$seq.num),1) - xmax11 <- nchar(length(input.seq$seq.num)) - x11 <- ct - x11 <- paste0(x11, paste(rep(" ", xmax11-nchar(x11)), collapse = "")) - x12 <- length(all.ph$config.to.test[[1]]$P) + 1 - x12 <- paste0(x12, paste(rep(" ", xmax11-nchar(x12)), collapse = "")) - x13 <- pc - x13 <- paste0(":(",pc,"%)", paste(rep(" ", 5-nchar(x13)), collapse = "")) - x1 <- paste0(x12,"/",x11, x13) - xmax21 <- nchar(max(input.seq$seq.num)) - x21 <- input.seq$seq.num[ct] - x21 <- paste0(x21, paste(rep(" ", xmax21-nchar(x21)), collapse = "")) - x22 <- length(input.ph$config.to.test) - x22 <- paste0(x22, " ph ", paste(rep(" ", 5-nchar(x22)), collapse = "")) - x23 <- paste0("(", hmm.phase.number, "/", twopt.phase.number,") ") - x23 <- paste0(x23, paste(rep(" ", 10-nchar(x23)), collapse = "")) - x2 <- paste0( x21, ": ", x22, x23) - x31 <- length(seq.num)-1 - x31 <- paste0(x31, paste(rep(" ", xmax11-nchar(x31)), collapse = "")) - x3 <- paste0(" -- tail: ", x31) - cat(x1, x2, x3, print_ph(input.ph), "\n") -} +#' Multipoint analysis using Hidden Markov Models in autopolyploids +#' +#' Performs the multipoint analysis proposed by \cite{Mollinari and +#' Garcia (2019)} in a sequence of markers +#' +#' This function first enumerates a set of linkage phase configurations +#' based on two-point recombination fraction information using a threshold +#' provided by the user (argument \code{thresh}). After that, for each +#' configuration, it reconstructs the genetic map using the +#' HMM approach described in Mollinari and Garcia (2019). As result, it returns +#' the multipoint likelihood for each configuration in form of LOD Score comparing +#' each configuration to the most likely one. It is recommended to use a small number +#' of markers (e.g. 50 markers for hexaploids) since the possible linkage +#' phase combinations bounded only by the two-point information can be huge. +#' Also, it can be quite sensible to small changes in \code{'thresh'}. +#' For a large number of markers, please see \code{\link[mappoly]{est_rf_hmm_sequential}}. +# +#' @param input.seq an object of class \code{mappoly.sequence} +#' +#' @param input.ph an object of class \code{two.pts.linkage.phases}. +#' If not available (default = NULL), it will be computed +#' +#' @param thres LOD Score threshold used to determine if the linkage phases +#' compared via two-point analysis should be considered. Smaller +#' values will result in smaller number of linkage phase +#' configurations to be evaluated by the multipoint algorithm. +#' +#' @param twopt an object of class \code{mappoly.twopt} +#' containing two-point information +#' +#' @param verbose if \code{TRUE}, current progress is shown; if +#' \code{FALSE} (default), no output is produced +#' +#' @param tol the desired accuracy (default = 1e-04) +#' +#' @param est.given.0.rf logical. If TRUE returns a map forcing all +#' recombination fractions equals to 0 (1e-5, for internal use only. +#' Default = FALSE) +#' +#' @param reestimate.single.ph.configuration logical. If \code{TRUE} +#' returns a map without re-estimating the map parameters for cases +#' where there is only one possible linkage phase configuration. +#' This argument is intended to be used in a sequential map construction +#' +#' @param high.prec logical. If \code{TRUE} (default) uses high precision +#' long double numbers in the HMM procedure +#' +#' @param x an object of the class \code{mappoly.map} +#' +#' @param detailed logical. if TRUE, prints the linkage phase configuration and the marker +#' position for all maps. If FALSE (default), prints a map summary +#' +#' @param left.lim the left limit of the plot (in cM, default = 0). +#' +#' @param right.lim the right limit of the plot (in cM, default = Inf, i.e., +#' will print the entire map) +#' +#' @param phase logical. If \code{TRUE} (default) plots the phase configuration +#' for both parents +#' +#' @param mrk.names if TRUE, marker names are displayed (default = FALSE) +#' +#' @param cex The magnification to be used for marker names +#' +#' @param config should be \code{'best'} or the position of the +#' configuration to be plotted. If \code{'best'}, plot the configuration +#' with the highest likelihood +#' +#' @param P a string containing the name of parent P +#' +#' @param Q a string containing the name of parent Q +#' +#' @param xlim range of the x-axis. If \code{xlim = NULL} (default) it uses the +#' map range. +#' +#' @param ... currently ignored +#' +#' @return A list of class \code{mappoly.map} with two elements: +#' +#' i) info: a list containing information about the map, regardless of the linkage phase configuration: +#' \item{ploidy}{the ploidy level} +#' \item{n.mrk}{number of markers} +#' \item{seq.num}{a vector containing the (ordered) indices of markers in the map, +#' according to the input file} +#' \item{mrk.names}{the names of markers in the map} +#' \item{seq.dose.p1}{a vector containing the dosage in parent 1 for all markers in the map} +#' \item{seq.dose.p2}{a vector containing the dosage in parent 2 for all markers in the map} +#' \item{chrom}{a vector indicating the sequence (usually chromosome) each marker belongs +#' as informed in the input file. If not available, +#' \code{chrom = NULL}} +#' \item{genome.pos}{physical position (usually in megabase) of the markers into the sequence} +#' \item{seq.ref}{reference base used for each marker (i.e. A, T, C, G). If not available, +#' \code{seq.ref = NULL}} +#' \item{seq.alt}{alternative base used for each marker (i.e. A, T, C, G). If not available, +#' \code{seq.ref = NULL}} +#' \item{chisq.pval}{a vector containing p-values of the chi-squared test of Mendelian +#' segregation for all markers in the map} +#' \item{data.name}{name of the dataset of class \code{mappoly.data}} +#' \item{ph.thres}{the LOD threshold used to define the linkage phase configurations to test} +#' +#' ii) a list of maps with possible linkage phase configuration. Each map in the list is also a +#' list containing +#' \item{seq.num}{a vector containing the (ordered) indices of markers in the map, +#' according to the input file} +#' \item{seq.rf}{a vector of size (\code{n.mrk - 1}) containing a sequence of recombination +#' fraction between the adjacent markers in the map} +#' \item{seq.ph}{linkage phase configuration for all markers in both parents} +#' \item{loglike}{the hmm-based multipoint likelihood} +#' +#' @examples +#' mrk.subset <- make_seq_mappoly(hexafake, 1:10) +#' red.mrk <- elim_redundant(mrk.subset) +#' unique.mrks <- make_seq_mappoly(red.mrk) +#' subset.pairs <- est_pairwise_rf(input.seq = unique.mrks, +#' ncpus = 1, +#' verbose = TRUE) +#' +#' ## Estimating subset map with a low tolerance for the E.M. procedure +#' ## for CRAN testing purposes +#' subset.map <- est_rf_hmm(input.seq = unique.mrks, +#' thres = 2, +#' twopt = subset.pairs, +#' verbose = TRUE, +#' tol = 0.1, +#' est.given.0.rf = FALSE) +#' subset.map +#' ## linkage phase configuration with highest likelihood +#' plot(subset.map, mrk.names = TRUE, config = "best") +#' ## the second one +#' plot(subset.map, mrk.names = TRUE, config = 2) +#' +#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu} +#' +#' @references +#' Mollinari, M., and Garcia, A. A. F. (2019) Linkage +#' analysis and haplotype phasing in experimental autopolyploid +#' populations with high ploidy level using hidden Markov +#' models, _G3: Genes, Genomes, Genetics_. +#' https://doi.org/10.1534/g3.119.400378 +#' @rdname est_rf_hmm +#' @export est_rf_hmm +est_rf_hmm <- function(input.seq, + input.ph = NULL, + thres = 0.5, + twopt = NULL, + verbose = FALSE, + tol = 1e-04, + est.given.0.rf = FALSE, + reestimate.single.ph.configuration = TRUE, + high.prec = TRUE) { + + ## checking for correct object + input_classes <- c("mappoly.sequence", "two.pts.linkage.phases") + if (!inherits(input.seq, input_classes[1])) { + stop(deparse(substitute(input.seq)), " is not an object of class 'mappoly.sequence'") + } + if(length(input.seq$seq.num) == 1) + stop("Input sequence contains only one marker.", call. = FALSE) + if (is.null(input.ph)) { + if (is.null(twopt)) + stop("Two-point analysis not found!") + if (verbose) + cat("\nListing all configurations under threshold", thres, "using two-point information...\n") + input.ph <- ls_linkage_phases(input.seq = input.seq, thres = thres, twopt = twopt) + } + if (!inherits(input.ph, input_classes[2])) { + stop(deparse(substitute(input.ph)), " is not an object of class 'two.pts.linkage.phases'") + } + info.par <-detect_info_par(input.seq) + if(length(input.seq$seq.num) == 2 & !est.given.0.rf){ + maps <- vector("list", 1) + res.temp <- twopt$pairwise[[paste(sort(input.seq$seq.num), collapse = "-")]] + dp <- get(input.seq$data.name, pos = 1)$dosage.p1[input.seq$seq.num] + dq <- get(input.seq$data.name, pos = 1)$dosage.p2[input.seq$seq.num] + sh <- as.numeric(unlist(strsplit(rownames(res.temp)[1], split = "-"))) + res.ph <- list(P = c(list(0),list(0)), Q = c(list(0),list(0))) + if(dp[1] != 0) + res.ph$P[[1]] <- 1:dp[1] + if(dq[1] != 0) + res.ph$Q[[1]] <- 1:dq[1] + if(dp[2] != 0) + { + v <- (rev(res.ph$P[[1]])[1]+1):(dp[2]+rev(res.ph$P[[1]])[1]) + res.ph$P[[2]] <- v-sh[1] + } + if(dq[2] != 0) + { + v <- (rev(res.ph$Q[[1]])[1]+1):(dq[2]+rev(res.ph$Q[[1]])[1]) + res.ph$Q[[2]] <- v-sh[2] + } + names(res.ph$P) <- names(res.ph$Q) <- input.seq$seq.num + maps[[1]] <- list(seq.num = input.seq$seq.num, + seq.rf = res.temp[1,"rf"], + seq.ph = res.ph, + loglike = 0) + return(structure(list(info = list(ploidy = input.seq$ploidy, + n.mrk = length(input.seq$seq.num), + seq.num = input.seq$seq.num, + mrk.names = input.seq$seq.mrk.names, + seq.dose.p1 = get(input.seq$data.name, pos = 1)$dosage.p1[input.seq$seq.mrk.names], + seq.dose.p2 = get(input.seq$data.name, pos = 1)$dosage.p2[input.seq$seq.mrk.names], + chrom = get(input.seq$data.name, pos = 1)$chrom[input.seq$seq.mrk.names], + genome.pos = get(input.seq$data.name, pos = 1)$genome.pos[input.seq$seq.mrk.names], + seq.ref = get(input.seq$data.name, pos = 1)$seq.ref[input.seq$seq.mrk.names], + seq.alt = get(input.seq$data.name, pos = 1)$seq.alt[input.seq$seq.mrk.names], + chisq.pval = get(input.seq$data.name, pos = 1)$chisq.pval[input.seq$seq.mrk.names], + data.name = input.seq$data.name, + ph.thresh = abs(res.temp[2,1])), + maps = maps), + class = "mappoly.map")) + } + n.ph <- length(input.ph$config.to.test) + ret.map.no.rf.estimation <- FALSE + if(n.ph == 1 && !reestimate.single.ph.configuration) + ret.map.no.rf.estimation <- TRUE + maps <- vector("list", n.ph) + if (verbose){ + txt <- paste0(" ",n.ph, " phase(s): ") + cat(txt) + } + for (i in 1:n.ph) { + if (verbose) { + if((i+1)%%40 == 0) + cat("\n", paste0(rep(" ", nchar(txt)), collapse = ""), sep = "") + cat(". ") + } + if(est.given.0.rf){ + rf.temp <- rep(1e-5, length(input.seq$seq.num) - 1) + tol = 1 + } + else{ + rf.temp <- rep(0.001, length(input.seq$seq.num) - 1) + } + + ph <- list(P = input.ph$config.to.test[[i]]$P, + Q = input.ph$config.to.test[[i]]$Q) + + if(info.par == "both") + { + maps[[i]] <- est_rf_hmm_single_phase(input.seq = input.seq, + input.ph.single = ph, + rf.temp = rf.temp, + tol = tol, + verbose = FALSE, + ret.map.no.rf.estimation = ret.map.no.rf.estimation, + high.prec = high.prec) + } + else if (info.par == "p1") + { + maps[[i]] <- est_rf_hmm_single_phase_single_parent(input.seq, + input.ph.single = ph, + info.parent = 1, + uninfo.parent = 2, + rf.vec = rf.temp, + tol = tol, + verbose = FALSE, + ret.map.no.rf.estimation = ret.map.no.rf.estimation) + } + else if (info.par == "p2") + { + maps[[i]] <- est_rf_hmm_single_phase_single_parent(input.seq, + input.ph.single = ph, + info.parent = 2, + uninfo.parent = 1, + rf.vec = rf.temp, + tol = tol, + verbose = FALSE, + ret.map.no.rf.estimation = ret.map.no.rf.estimation) + + } + else stop("Should not get here.") + } + #cat("\n") + id <- order(sapply(maps, function(x) x$loglike), decreasing = TRUE) + maps <- maps[id] + if (verbose) + cat("\n") + structure(list(info = list(ploidy = input.seq$ploidy, + n.mrk = length(input.seq$seq.num), + seq.num = input.seq$seq.num, + mrk.names = input.seq$seq.mrk.names, + seq.dose.p1 = get(input.seq$data.name, pos = 1)$dosage.p1[input.seq$seq.mrk.names], + seq.dose.p2 = get(input.seq$data.name, pos = 1)$dosage.p2[input.seq$seq.mrk.names], + chrom = get(input.seq$data.name, pos = 1)$chrom[input.seq$seq.mrk.names], + genome.pos = get(input.seq$data.name, pos = 1)$genome.pos[input.seq$seq.mrk.names], + seq.ref = get(input.seq$data.name, pos = 1)$seq.ref[input.seq$seq.mrk.names], + seq.alt = get(input.seq$data.name, pos = 1)$seq.alt[input.seq$seq.mrk.names], + chisq.pval = get(input.seq$data.name, pos = 1)$chisq.pval[input.seq$seq.mrk.names], + data.name = input.seq$data.name, ph.thresh = input.ph$thres), + maps = maps), + class = "mappoly.map") +} + +#' Multipoint analysis using Hidden Markov Models: Sequential phase elimination +#' +#' Performs the multipoint analysis proposed by \cite{Mollinari and +#' Garcia (2019)} in a sequence of markers removing unlikely phases +#' using sequential multipoint information. +#' +#' This function sequentially includes markers into a map given an +#' ordered sequence. It uses two-point information to eliminate +#' unlikely linkage phase configurations given \code{thres.twopt}. The +#' search is made within a window of size \code{extend.tail}. For the +#' remaining configurations, the HMM-based likelihood is computed and +#' the ones that pass the HMM threshold (\code{thres.hmm}) are eliminated. +#' +#' @param input.seq an object of class \code{mappoly.sequence} +#' +#' @param twopt an object of class \code{mappoly.twopt} +#' containing the two-point information +#' +#' @param start.set number of markers to start the phasing procedure (default = 4) +#' +#' @param thres.twopt the LOD threshold used to determine if the linkage +#' phases compared via two-point analysis should be considered +#' for the search space reduction (A.K.A. \eqn{\eta} in +#' \cite{Mollinari and Garcia (2019)}, default = 5) +#' +#' @param thres.hmm the LOD threshold used to determine if the linkage +#' phases compared via hmm analysis should be evaluated in the +#' next round of marker inclusion (default = 50) +#' +#' @param extend.tail the length of the chain's tail that should +#' be used to calculate the likelihood of the map. If \code{NULL} (default), +#' the function uses all markers positioned. Even if \code{info.tail = TRUE}, +#' it uses at least \code{extend.tail} as the tail length +#' +#' @param phase.number.limit the maximum number of linkage phases of the sub-maps defined +#' by arguments \code{info.tail} and \code{extend.tail}. Default is 20. If the +#' size exceeds this limit, the marker will not be inserted. If +#' \code{Inf}, then it will insert all markers. +#' +#' @param sub.map.size.diff.limit the maximum accepted length +#' difference between the current and the previous sub-map defined +#' by arguments \code{info.tail} and \code{extend.tail}. If the +#' size exceeds this limit, the marker will not be inserted. If +#' \code{NULL}(default), then it will insert all markers. +#' +#' @param info.tail if \code{TRUE} (default), it uses the complete informative tail +#' of the chain (i.e. number of markers where all homologous +#' (\eqn{ploidy x 2}) can be distinguished) to calculate the map likelihood +#' +#'@param reestimate.single.ph.configuration logical. If \code{FALSE} (default) +#' returns a map without re-estimating the map parameters in cases +#' where there are only one possible linkage phase configuration +#' +#' @param tol the desired accuracy during the sequential phase (default = 10e-02) +#' +#' @param tol.final the desired accuracy for the final map (default = 10e-04) +#' +#' @param verbose If \code{TRUE} (default), current progress is shown; if +#' \code{FALSE}, no output is produced +#' +#' @param detailed.verbose If \code{TRUE}, the expansion of the current +#' submap is shown; +#' +#' @param high.prec logical. If \code{TRUE} uses high precision +#' (long double) numbers in the HMM procedure implemented in C++, +#' which can take a long time to perform (default = FALSE) +#' +#' @return A list of class \code{mappoly.map} with two elements: +#' +#' i) info: a list containing information about the map, regardless of the linkage phase configuration: +#' \item{ploidy}{the ploidy level} +#' \item{n.mrk}{number of markers} +#' \item{seq.num}{a vector containing the (ordered) indices of markers in the map, +#' according to the input file} +#' \item{mrk.names}{the names of markers in the map} +#' \item{seq.dose.p1}{a vector containing the dosage in parent 1 for all markers in the map} +#' \item{seq.dose.p2}{a vector containing the dosage in parent 2 for all markers in the map} +#' \item{chrom}{a vector indicating the sequence (usually chromosome) each marker belongs +#' as informed in the input file. If not available, +#' \code{chrom = NULL}} +#' \item{genome.pos}{physical position (usually in megabase) of the markers into the sequence} +#' \item{seq.ref}{reference base used for each marker (i.e. A, T, C, G). If not available, +#' \code{seq.ref = NULL}} +#' \item{seq.alt}{alternative base used for each marker (i.e. A, T, C, G). If not available, +#' \code{seq.ref = NULL}} +#' \item{chisq.pval}{a vector containing p-values of the chi-squared test of Mendelian +#' segregation for all markers in the map} +#' \item{data.name}{name of the dataset of class \code{mappoly.data}} +#' \item{ph.thres}{the LOD threshold used to define the linkage phase configurations to test} +#' +#' ii) a list of maps with possible linkage phase configuration. Each map in the list is also a +#' list containing +#' \item{seq.num}{a vector containing the (ordered) indices of markers in the map, +#' according to the input file} +#' \item{seq.rf}{a vector of size (\code{n.mrk - 1}) containing a sequence of recombination +#' fraction between the adjacent markers in the map} +#' \item{seq.ph}{linkage phase configuration for all markers in both parents} +#' \item{loglike}{the hmm-based multipoint likelihood} +#' +#' @examples +#' \donttest{ +#' mrk.subset <- make_seq_mappoly(hexafake, 1:20) +#' red.mrk <- elim_redundant(mrk.subset) +#' unique.mrks <- make_seq_mappoly(red.mrk) +#' subset.pairs <- est_pairwise_rf(input.seq = unique.mrks, +#' ncpus = 1, +#' verbose = TRUE) +#' subset.map <- est_rf_hmm_sequential(input.seq = unique.mrks, +#' thres.twopt = 5, +#' thres.hmm = 10, +#' extend.tail = 10, +#' tol = 0.1, +#' tol.final = 10e-3, +#' phase.number.limit = 5, +#' twopt = subset.pairs, +#' verbose = TRUE) +#' print(subset.map, detailed = TRUE) +#' plot(subset.map) +#' plot(subset.map, left.lim = 0, right.lim = 1, mrk.names = TRUE) +#' plot(subset.map, phase = FALSE) +#' +#' ## Retrieving simulated linkage phase +#' ph.P <- maps.hexafake[[1]]$maps[[1]]$seq.ph$P +#' ph.Q <- maps.hexafake[[1]]$maps[[1]]$seq.ph$Q +#' ## Estimated linkage phase +#' ph.P.est <- subset.map$maps[[1]]$seq.ph$P +#' ph.Q.est <- subset.map$maps[[1]]$seq.ph$Q +#' compare_haplotypes(ploidy = 6, h1 = ph.P[names(ph.P.est)], h2 = ph.P.est) +#' compare_haplotypes(ploidy = 6, h1 = ph.Q[names(ph.Q.est)], h2 = ph.Q.est) +#' } +#' +#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu} +#' +#' @references +#' Mollinari, M., and Garcia, A. A. F. (2019) Linkage +#' analysis and haplotype phasing in experimental autopolyploid +#' populations with high ploidy level using hidden Markov +#' models, _G3: Genes, Genomes, Genetics_. +#' \doi{10.1534/g3.119.400378} +#' +#' @importFrom utils head +#' @importFrom cli rule +#' @export est_rf_hmm_sequential +est_rf_hmm_sequential <- function(input.seq, + twopt, + start.set = 4, + thres.twopt = 5, + thres.hmm = 50, + extend.tail = NULL, + phase.number.limit = 20, + sub.map.size.diff.limit = Inf, + info.tail = TRUE, + reestimate.single.ph.configuration = FALSE, + tol = 10e-2, + tol.final = 10e-4, + verbose = TRUE, + detailed.verbose = FALSE, + high.prec = FALSE) +{## checking for correct object + input_classes <- c("mappoly.sequence", "mappoly.twopt") + if (!inherits(input.seq, input_classes[1])) { + stop(deparse(substitute(input.seq)), + " is not an object of class 'mappoly.sequence'") + } + if (!inherits(twopt, input_classes[2])) { + stop(deparse(substitute(twopt)), + " is not an object of class 'mappoly.twopt'") + } + ## Information about the map + if(verbose) { + cli::cat_line("Number of markers: ", length(input.seq$seq.num)) + msg("Initial sequence", line = 2) + } + if(is.null(extend.tail)) + extend.tail <- length(input.seq$seq.num) + ##### Map in case of two markers ##### + if(length(input.seq$seq.num) == 2) + { + cur.map <- est_rf_hmm(input.seq = input.seq, thres = thres.twopt, + twopt = twopt, tol = tol.final, high.prec = high.prec, + verbose = verbose, + reestimate.single.ph.configuration = reestimate.single.ph.configuration) + return(filter_map_at_hmm_thres(cur.map, thres.hmm)) + } + ##### More than 3 markers #### + ##Starting sequential algorithm for maps with more than 3 markers + ## First step: test all possible phase configurations under + ## a 'thres.twopt' threshold for a sequence of size 'start.set' + if(start.set > length(input.seq$seq.num)) + start.set <- length(input.seq$seq.num) + if(verbose) cat(start.set, " markers...\n", sep = "") + cte <- 0 + rf.temp <- c(Inf, Inf) + while(any(rf.temp >= sub.map.size.diff.limit)) + { + cte <- cte+1 + if(length(rf.temp) == 1) { + ## cat("Impossible build a map using the given thresholds\n") + ## stop() + stop("Impossible build a map using the given thresholds\n") + } + if(verbose) cat(cli::symbol$bullet, " Trying sequence:", cte:(start.set+cte-1), ":\n") + cur.seq <- make_seq_mappoly(input.obj = get(input.seq$data.name, pos = 1), as.vector(na.omit(input.seq$seq.num[cte:(start.set+cte-1)])), data.name = input.seq$data.name) + input.ph <- ls_linkage_phases(input.seq = cur.seq, + thres = thres.twopt, + twopt = twopt) + cur.map <- est_rf_hmm(input.seq = cur.seq, thres = thres.twopt, + twopt = twopt, tol = tol, high.prec = high.prec, + verbose = verbose, input.ph = input.ph, + reestimate.single.ph.configuration = reestimate.single.ph.configuration) + cur.map <- filter_map_at_hmm_thres(cur.map, thres.hmm) + cur.map <- reest_rf(cur.map, tol = tol.final, verbose = FALSE) + rf.temp <- imf_h(cur.map$maps[[1]]$seq.rf) + } + if(verbose) + msg("Done with initial sequence", line = 2) + if(start.set >= length(input.seq$seq.num)){ + if(verbose) cat("\nDone phasing", cur.map$info$n.mrk, "markers\nReestimating final recombination fractions.") + return(reest_rf(cur.map, tol = tol.final)) + } + ##### More than 'start.set' markers ##### + ## For maps with more markers than 'start.set', include + ## next makers in a sequential fashion + ct <- start.set+cte + all.ph <- update_ph_list_at_hmm_thres(cur.map, thres.hmm) + if(sub.map.size.diff.limit != Inf & !reestimate.single.ph.configuration){ + if (verbose) message("Making 'reestimate.single.ph.configuration = TRUE' to use map expansion") + reestimate.single.ph.configuration <- TRUE + } + while(ct <= length(input.seq$seq.num)) + { + ## Number of multipoint phases evaluated in the + ## previous round of marker insertion + hmm.phase.number <- length(cur.map$maps) + ## Get the map tail containing sufficient markers to + ## distinguish among the ploidy*2 possible homologs. + if(info.tail) + tail.temp <- get_full_info_tail(input.obj = cur.map) + input.ph <- NULL + ## for each linkage phase inherited from HMM + ## analysis from previous round, evaluate the + ## possible linkage phases for the marker inserted + ## in the current round using two-point information + ## under the threshold of thres.twopt + for(i in 1:length(all.ph$config.to.test)) + { + seq.num <- NULL + if(info.tail) + seq.num <- tail.temp$maps[[i]]$seq.num + if(length(seq.num) < extend.tail) + seq.num <- tail(cur.map$maps[[i]]$seq.num, extend.tail) + ## If the tail do not contain the marker responsible for carrying + ## multiple linkage phases through the rounds of insertion, + ## extend the tail so it contains that marker + if(max(check_ls_phase(all.ph)) >= length(seq.num)){ + seq.num <- tail(cur.map$maps[[i]]$seq.num, max(check_ls_phase(all.ph)) + start.set) + } + seq.cur <- make_seq_mappoly(input.obj = get(input.seq$data.name, pos = 1), + arg = seq.num, + data.name = input.seq$data.name) + all.ph.temp <- get_ph_list_subset(all.ph, seq.num, i) + input.ph.temp <- ls_linkage_phases(input.seq = seq.cur, + thres = thres.twopt, + twopt = twopt, + mrk.to.add = input.seq$seq.num[ct], + prev.info = all.ph.temp) + input.ph <- concatenate_ph_list(ph.list.1 = input.ph, ph.list.2 = input.ph.temp) + } + ## This is the number of linkage phases to be tested + ## combining HMM phases from previous round of mrk + ## insertion and the linkage phases from the two-point + ## information obtained in the current round + twopt.phase.number <- length(input.ph$config.to.test) + ## If this number is higher than phase.number.limit, + ## proceed to the next iteration + if(length(input.ph$config.to.test) > phase.number.limit) { + if(verbose) + cat(crayon::italic$yellow(paste0(ct ,": not included (*linkage phases*)\n", sep = ""))) + ct <- ct + 1 + next() + } + ## Appending the marker to the numeric + ## sequence and making a new sequence + seq.num <- c(seq.num, input.seq$seq.num[ct]) + seq.cur <- make_seq_mappoly(input.obj = get(input.seq$data.name, pos = 1), + arg = seq.num, + data.name = input.seq$data.name) + if(verbose) + cat_phase(input.seq, input.ph, all.ph, ct, seq.num, twopt.phase.number, hmm.phase.number) + ## Evaluation all maps using HMM + cur.map.temp <- est_rf_hmm(input.seq = seq.cur, + input.ph = input.ph, + twopt = twopt, + tol = tol, + verbose = FALSE, + reestimate.single.ph.configuration = reestimate.single.ph.configuration, + high.prec = high.prec) + cur.map.temp$maps <- unique(cur.map.temp$maps) + ## Filtering linkage phase configurations under a HMM LOD threshold + cur.map.temp <- filter_map_at_hmm_thres(cur.map.temp, thres.hmm) + + ## Gathering linkage phases of the current map, excluding the marker inserted + ## in the current round + ph.new <- lapply(cur.map.temp$maps, function(x) list(P = head(x$seq.ph$P, -1), + Q = head(x$seq.ph$Q, -1))) + ## Gathering linkage phases of the previous map, excluding the marker inserted + ## in the current round + ph.old <- lapply(cur.map$maps, function(x, id) list(P = x$seq.ph$P[id], + Q = x$seq.ph$Q[id]), id = names(ph.new[[1]]$P)) + + ## Check in which whole phase configurations the new + ## HMM tail should be appended + MQ <- MP <- matrix(NA, length(ph.old), length(ph.new)) + for(j1 in 1:nrow(MP)){ + for(j2 in 1:ncol(MP)){ + MP[j1, j2] <- identical(ph.old[[j1]]$P, ph.new[[j2]]$P) + MQ[j1, j2] <- identical(ph.old[[j1]]$Q, ph.new[[j2]]$Q) + } + } + M <- which(MP & MQ, arr.ind = TRUE) + colnames(M) <- c("old", "new") + ## Checking for quality (map size and LOD Score) + submap.length.old <- sapply(cur.map$maps, function(x) sum(imf_h(x$seq.rf)))[M[,1]] + last.dist.old <- sapply(cur.map$maps, function(x) tail(imf_h(x$seq.rf),1))[M[,1]] + submap.length.new <- sapply(cur.map.temp$maps, function(x) sum(imf_h(x$seq.rf)))[M[,2]] + last.dist.new <- sapply(cur.map.temp$maps, function(x) tail(imf_h(x$seq.rf),1))[M[,2]] + last.mrk.expansion <- submap.expansion <- numeric(nrow(M)) + submap.expansion <- submap.length.new - submap.length.old + last.mrk.expansion <- last.dist.new - last.dist.old + + if(length(M[,2]) == 0) { + if(verbose) cat(crayon::italic$yellow(paste0(ct ,": not included (~map extension~)\n", sep = ""))) + ct <- ct + 1 + next() + } + + cur.map.temp$maps <- cur.map.temp$maps[M[,2]] + LOD <- get_LOD(cur.map.temp, sorted = FALSE) + if(sub.map.size.diff.limit != Inf){ + selected.map <- submap.expansion < sub.map.size.diff.limit & LOD < thres.hmm + if(detailed.verbose){ + x <- round(cbind(submap.length.new, submap.expansion, last.mrk.expansion),2) + for(j1 in 1:nrow(x)){ + cat(" ", x[j1,1], ": (", x[j1,2], "/", x[j1,3],")", sep = "") + if(j1 != nrow(x)) cat("\n") + } + if(all(!selected.map)) cat(paste0(crayon::red(cli::symbol$cross), "\n")) + else cat(paste0(crayon::green(cli::symbol$tick), "\n")) + } + if(all(!selected.map)){ + if(verbose) cat(crayon::italic$yellow(paste0(ct ,": not included (~map extension~)\n", sep = ""))) + ct <- ct + 1 + next() + } + } else { selected.map <- LOD < thres.hmm } + all.ph.temp <- update_ph_list_at_hmm_thres(cur.map.temp, Inf) + id <- which(selected.map & which(!sapply(cur.map.temp$maps, is.null))) + if(length(id) == 0){ + if(verbose) cat(crayon::italic$yellow(paste0(ct ,": not included (-linkage phases-)\n", sep = ""))) + ct <- ct + 1 + next() + } + cur.map.temp$maps <- cur.map.temp$maps[id] + if(any(unique(M[id,2,drop = FALSE]) > length(all.ph.temp$config.to.test))){ + if(verbose) cat(crayon::italic$yellow(paste0(ct ,": not included (-linkage phases-)\n", sep = ""))) + ct <- ct + 1 + next() + } + if(length(id) > nrow(M)) + all.ph.temp <- add_mrk_at_tail_ph_list(all.ph, all.ph.temp, M[,,drop = FALSE]) + else if(length(id) == nrow(M)){ + M[,2] <- id + all.ph.temp <- add_mrk_at_tail_ph_list(all.ph, all.ph.temp, M[,,drop = FALSE]) + } else { + all.ph.temp <- add_mrk_at_tail_ph_list(all.ph, all.ph.temp, M[id,,drop = FALSE]) + } + all.ph <- all.ph.temp + cur.map <- cur.map.temp + ct <- ct + 1 + } + ##### Re-estimating final map #### + ## Re-estimating final map with higher tolerance + seq.final <- make_seq_mappoly(input.obj = get(input.seq$data.name, pos = 1), + arg = as.numeric(names(all.ph$config.to.test[[1]]$P)), + data.name = input.seq$data.name) + #msg(paste("Done phasing", length(seq.final$seq.num), "markers")) + if (verbose){ + msg("Reestimating final recombination fractions", line = 2) + cat("") + } + final.map <- est_rf_hmm(input.seq = seq.final, + input.ph = all.ph, + twopt = twopt, + tol = tol.final, + verbose = FALSE, + reestimate.single.ph.configuration = TRUE, + high.prec = TRUE) + if(verbose) { + #cat("\n------------------------------------------") + cat("Markers in the initial sequence: ", length(input.seq$seq.num), sep = "") + cat("\nMapped markers : ", final.map$info$n.mrk, " (", + round(100*final.map$info$n.mrk/length(input.seq$seq.num),1) ,"%)\n", sep = "") + msg("", line = 2) + } + return(final.map) +} + +#' @rdname est_rf_hmm +#' @export +print.mappoly.map <- function(x, detailed = FALSE, ...) { + cat("This is an object of class 'mappoly.map'\n") + cat(" Ploidy level:\t", x$info$ploidy, "\n") + cat(" No. individuals:\t", get(x$info$data.name)$n.ind, "\n") + cat(" No. markers:\t", x$info$n.mrk, "\n") + cat(" No. linkage phases:\t", length(x$maps), "\n") + l <- sapply(x$maps, function(x) x$loglike) + LOD <- round(l - max(l), 2) + if(detailed) + { + for (j in 1:length(x$maps)) { + cat("\n ---------------------------------------------\n") + cat(" Linkage phase configuration: ", j) + cat("\n log-likelihood:\t", x$map[[j]]$loglike) + cat("\n LOD:\t\t", format(round(LOD[j], 2), digits = 2)) + cat("\n\n") + M <- matrix("|", x$info$n.mrk, x$info$ploidy * 2) + M <- cbind(M, " ") + M <- cbind(M, format(round(cumsum(c(0, imf_h(x$maps[[j]]$seq.rf))), 1), digits = 1)) + for (i in 1:x$info$n.mrk) { + if (all(x$maps[[j]]$seq.ph$Q[[i]] != 0)) + M[i, c(x$maps[[j]]$seq.ph$P[[i]], x$maps[[j]]$seq.ph$Q[[i]] + x$info$ploidy)] <- "o" else M[i, x$maps[[j]]$seq.ph$P[[i]]] <- "o" + } + M <- cbind(get(x$info$data.name)$mrk.names[x$maps[[j]]$seq.num], M) + big.name <- max(nchar(M[,1])) + format_name <- function(y, big.name){ + paste0(y, paste0(rep(" ", big.name-nchar(y)), collapse = "")) + } + M <- rbind(c("", letters[1:(ncol(M)-3)], "", ""), M) + format(apply(M, 1, function(y) cat(c("\t", format_name(y[1], big.name), "\t", y[2:(x$info$ploidy + 1)], rep(" ", 4), y[(x$info$ploidy + 2):(x$info$ploidy * 2 + 3)], "\n"), collapse = ""))) + } + } else { + cat("\n ---------------------------------------------\n") + cat(" Number of linkage phase configurations: ", length(x$maps)) + cat("\n ---------------------------------------------\n") + for (j in 1:length(x$maps)) { + cat(" Linkage phase configuration: ", j) + cat("\n map length:\t", format(round(sum(imf_h(x$map[[j]]$seq.rf)), 2))) + cat("\n log-likelihood:\t", format(round(x$map[[j]]$loglike, 2))) + cat("\n LOD:\t\t", format(round(LOD[j], 2), digits = 2)) + cat("\n ~~~~~~~~~~~~~~~~~~\n") + } + } +} + +#' @rdname est_rf_hmm +#' @importFrom grDevices rgb +#' @importFrom graphics rect +#' @export +plot.mappoly.map <- function(x, left.lim = 0, right.lim = Inf, + phase = TRUE, mrk.names = FALSE, + cex = 1, config = "best", P = "Parent 1", + Q = "Parent 2", xlim = NULL, ...) { + oldpar <- par(no.readonly = TRUE) + on.exit(par(oldpar)) + if(phase){ + map.info <- prepare_map(x, config) + if(any(map.info$ph.p == "B")){ + var.col <- c("black", "darkgray") + var.col <- var.col[1:2] + names(var.col) <- c("A", "B") + } else { + var.col <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3") + names(var.col) <- c("A", "T", "C", "G") + } + ploidy <- map.info$ploidy + if(ploidy == 2) { + d.col <- c(NA, "#1B9E77", "#D95F02") + }else if(ploidy == 4){ + d.col <- c(NA, "#1B9E77", "#D95F02", "#7570B3", "#E7298A") + }else if(ploidy == 6){ + d.col <- c(NA, "#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E", "#E6AB02") + }else if(ploidy == 8){ + d.col <- c(NA, "#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E", "#E6AB02", "#A6761D", "#666666") + } else d.col <- c(NA, gg_color_hue(ploidy)) + names(d.col) <- 0:ploidy + d.col[1] <- NA + x <- map.info$map + lab <- names(x) + zy <- seq(0, 0.5, length.out = ploidy) + 1.5 + pp <- map.info$ph.p + pq <- map.info$ph.q + dp <- map.info$dp + dq <- map.info$dq + x1 <- abs(left.lim - x) + x2 <- abs(right.lim - x) + id.left <- which(x1 == min(x1))[1] + id.right <- rev(which(x2 == min(x2)))[1] + par(mai = c(1,0.15,0,0), mar = c(4.5,2,1,2)) + curx <- x[id.left:id.right] + layout(mat = matrix(c(4,2,3, 1), ncol = 2), heights = c(2, 10), widths = c(1, 10)) + if(is.null(xlim)) + xlim <- range(curx) + plot(x = curx, + y = rep(.5,length(curx)), + type = "n" , + ylim = c(.25, 4.5), + axes = FALSE, + xlab = "Distance (cM)", + ylab = "", + xlim = xlim) + lines(c(x[id.left], x[id.right]), c(.5, .5), lwd = 15, col = "gray") + points(x = curx, + y = rep(.5,length(curx)), + xlab = "", ylab = "", + pch = "|", cex = 1.5, + ylim = c(0,2)) + axis(side = 1) + diplocol1 <- c("#4DAF4A", "#984EA3") + diplocol2 <- c("#E41A1C", "#377EB8") + #Parent 2 + x1 <- seq(x[id.left], x[id.right], length.out = length(curx)) + x.control <- diff(x1[1:2])/2 + if(length(x1) < 150) + x.control <- x.control * .8 + if(length(x1) < 100) + x.control <- x.control * .8 + if(length(x1) < 75) + x.control <- x.control * .8 + if(length(x1) < 50) + x.control <- x.control * .8 + if(length(x1) < 25) + x.control <- x.control * .8 + for(i in 1:ploidy) + { + if(ploidy == 2 && any(map.info$ph.p == "B")){ + lines(range(x1), c(zy[i], zy[i]), lwd = 15, col = diplocol1[i]) + } else { + lines(range(x1), c(zy[i], zy[i]), lwd = 8, col = "gray") + } + y1 <- rep(zy[i], length(curx)) + pal <- var.col[pq[id.left:id.right,i]] + rect(xleft = x1 - x.control, ybottom = y1 -.05, xright = x1 + x.control, ytop = y1 +.05, col = pal, border = NA) + } + #connecting allelic variants to markers + for(i in 1:length(x1)) + lines(c(curx[i], x1[i]), c(0.575, zy[1]-.05), lwd = 0.2) + points(x = x1, + y = zy[ploidy]+0.05+dq[id.left:id.right]/20, + col = d.col[as.character(dq[id.left:id.right])], + pch = 19, cex = .7) + #Parent 1 + zy <- zy + 1.1 + for(i in 1:ploidy) + { + if(ploidy == 2 && any(map.info$ph.p == "B")){ + lines(range(x1), c(zy[i], zy[i]), lwd = 12, col = diplocol2[i]) + } else { + lines(range(x1), c(zy[i], zy[i]), lwd = 8, col = "gray") + } + y1 <- rep(zy[i], length(curx)) + pal <- var.col[pp[id.left:id.right,i]] + rect(xleft = x1 - x.control, ybottom = y1 -.05, xright = x1 + x.control, ytop = y1 +.05, col = pal, border = NA) + } + points(x = x1, + y = zy[ploidy]+0.05+dp[id.left:id.right]/20, + col = d.col[as.character(dp[id.left:id.right])], + pch = 19, cex = .7) + if(mrk.names) + text(x = x1, + y = rep(zy[ploidy]+0.05+.3, length(curx)), + labels = names(curx), + srt = 90, adj = 0, cex = cex) + par(mar = c(4.5,1,1,0), xpd = TRUE) + plot(x = 0, + y = 0, + type = "n" , + axes = FALSE, + ylab = "", + xlab = "", + ylim = c(.25, 4.5)) + zy <- zy - 1.1 + mtext(text = Q, side = 4, at = mean(zy), line = -1, font = 4) + for(i in 1:ploidy) + mtext(letters[(ploidy+1):(2*ploidy)][i], line = 0, at = zy[i], side = 4) + zy <- zy + 1.1 + mtext(text = P, side = 4, at = mean(zy), line = -1, font = 4) + for(i in 1:ploidy) + mtext(letters[1:ploidy][i], line = 0, at = zy[i], side = 4) + par(mar = c(0,1,2,4), xpd = FALSE) + plot(x = curx, + y = rep(.5,length(curx)), + type = "n" , + axes = FALSE, + xlab = "", + ylab = "") + if(any(map.info$ph.p == "B")){ + legend("bottomleft", legend = c("A", "B"), + fill = c(var.col), title = "Variants", + box.lty = 0, bg = "transparent", ncol = 2) + } else { + legend("bottomleft", legend = c("A", "T", "C", "G", "-"), + fill = c(var.col, "white"), title = "Nucleotides", + box.lty = 0, bg = "transparent", ncol = 4) + } + legend("bottomright", legend = names(d.col)[-1], title = "Doses" , + col = d.col[-1], ncol = ploidy/2, pch = 19, + box.lty = 0) + } else { + plot_map_list(x) + } +} + +#' prepare maps for plot +#' @keywords internal +prepare_map <- function(input.map, config = "best"){ + if (!inherits(input.map, "mappoly.map")) { + stop(deparse(substitute(input.map)), " is not an object of class 'mappoly.map'") + } + ## Choosing the linkage phase configuration + LOD.conf <- get_LOD(input.map, sorted = FALSE) + if(config == "best") { + i.lpc <- which.min(LOD.conf) + } else if(config == "all"){ + i.lpc <- seq_along(LOD.conf) } else if (config > length(LOD.conf)) { + stop("invalid linkage phase configuration") + } else i.lpc <- config + ## Gathering marker positions + map <- cumsum(imf_h(c(0, input.map$maps[[i.lpc]]$seq.rf))) + names(map) <- input.map$info$mrk.names + ## + ph.p <- ph_list_to_matrix(input.map$maps[[i.lpc]]$seq.ph$P, input.map$info$ploidy) + ph.q <- ph_list_to_matrix(input.map$maps[[i.lpc]]$seq.ph$Q, input.map$info$ploidy) + dimnames(ph.p) <- list(names(map), letters[1:input.map$info$ploidy]) + dimnames(ph.q) <- list(names(map), letters[(1+input.map$info$ploidy):(2*input.map$info$ploidy)]) + if(is.null(input.map$info$seq.alt)) + { + ph.p[ph.p == 1] <- ph.q[ph.q == 1] <- "A" + ph.p[ph.p == 0] <- ph.q[ph.q == 0] <- "B" + } else { + for(i in input.map$info$mrk.names){ + ph.p[i, ph.p[i,] == 1] <- input.map$info$seq.alt[i] + ph.p[i, ph.p[i,] == 0] <- input.map$info$seq.ref[i] + ph.q[i, ph.q[i,] == 1] <- input.map$info$seq.alt[i] + ph.q[i, ph.q[i,] == 0] <- input.map$info$seq.ref[i] + } + } + dp <- input.map$info$seq.dose.p1 + dq <- input.map$info$seq.dose.p2 + list(ploidy = input.map$info$ploidy, map = map, ph.p = ph.p, ph.q = ph.q, dp = dp, dq = dq) +} + +#' Get the tail of a marker sequence up to the point where the markers +#' provide no additional information. +#' @keywords internal +get_full_info_tail <- function(input.obj, extend = NULL) { + ## checking for correct object + if(!inherits(input.obj, "mappoly.map")) + stop(deparse(substitute(input.obj)), " is not an object of class 'mappoly.map''") + if (!is.null(extend)) + if (extend > input.obj$info$n.mrk) + return(input.obj) + ploidy <- input.obj$info$ploidy + i <- ploidy/2 + w1 <- ph_list_to_matrix(input.obj$maps[[1]]$seq.ph$P, ploidy) + max1 <- length(unique(apply(w1, 2, paste0, collapse = ""))) + w2 <- ph_list_to_matrix(input.obj$maps[[1]]$seq.ph$Q, ploidy) + max2 <- length(unique(apply(w2, 2, paste0, collapse = ""))) + while (i < input.obj$info$n.mrk) { + wp <- ph_list_to_matrix(tail(input.obj$maps[[1]]$seq.ph$P, i), ploidy) + xp <- apply(wp, 2, paste, collapse = "-") + wq <- ph_list_to_matrix(tail(input.obj$maps[[1]]$seq.ph$Q, i), ploidy) + xq <- apply(wq, 2, paste, collapse = "-") + if (length(unique(xp)) == max1 && length(unique(xq)) == max2) + break() + i <- i + 1 + } + if (!is.null(extend)) + if (i < extend) + i <- extend + input.obj$info$n.mrk <- i + + + + input.obj$info$seq.num <- tail(input.obj$info$seq.num, i) + input.obj$info$mrk.names <- tail(input.obj$info$mrk.names, i) + input.obj$info$seq.dose.p1 <- tail(input.obj$info$seq.dose.p1, i) + input.obj$info$seq.dose.p2 <- tail(input.obj$info$seq.dose.p2, i) + input.obj$info$chrom <- tail(input.obj$info$chrom, i) + input.obj$info$genome.pos <- tail(input.obj$info$genome.pos, i) + input.obj$info$chisq.pval <- tail(input.obj$info$chisq.pval, i) + + for (j in 1:length(input.obj$maps)) { + input.obj$maps[[j]]$loglike <- 0 + input.obj$maps[[j]]$seq.ph$P <- tail(input.obj$maps[[j]]$seq.ph$P, n = i) + input.obj$maps[[j]]$seq.ph$Q <- tail(input.obj$maps[[j]]$seq.ph$Q, n = i) + input.obj$maps[[j]]$seq.num <- tail(input.obj$maps[[j]]$seq.num, n = i) + input.obj$maps[[j]]$seq.rf <- tail(input.obj$maps[[j]]$seq.rf, n = (i - 1)) + } + return(input.obj) +} + +#' Filter MAPpoly Map Configurations by Loglikelihood Threshold +#' +#' This function filters configurations within a `"mappoly.map"` object based on a specified +#' log-likelihood threshold. +#' +#' @param map An object of class `"mappoly.map"`, which may contain several maps +#' with different linkage phase configurations and their respective log-likelihoods. +#' @param thres.hmm The threshold for filtering configurations. +#' +#' @return Returns the modified `"mappoly.map"` object with configurations filtered +#' based on the log-likelihood threshold. +#' +#' @keywords internal +#' @export +filter_map_at_hmm_thres <- function(map, thres.hmm){ + if (!inherits(map, "mappoly.map")) { + stop("Input 'map' is not an object of class 'mappoly.map'") + } + + map$info$ph.thresh <- NULL + map$maps <- map$maps[get_LOD(map, sorted = FALSE) < thres.hmm] + return(map) +} + +#' makes a phase list from map, selecting only +#' configurations under a certain threshold +#' @keywords internal +update_ph_list_at_hmm_thres <- function(map, thres.hmm){ + temp.map <- filter_map_at_hmm_thres(map, thres.hmm) + config.to.test <- lapply(temp.map$maps, function(x) x$seq.ph) + structure(list(config.to.test = config.to.test, + ploidy = map$info$ploidy, seq.num = map$maps[[1]]$seq.num, + thres = map$info$ph.thresh, data.name = map$info$data.name, + thres.hmm = thres.hmm), + class = "two.pts.linkage.phases") +} + +#' subset of a linkage phase list +#' @keywords internal +get_ph_list_subset <- function(ph.list, seq.num, conf){ + config.to.test <- list(lapply(ph.list$config.to.test[[conf]], function(x, seq.num) x[as.character(seq.num)], seq.num)) + structure(list(config.to.test = config.to.test, + ploidy = ph.list$ploidy, seq.num = ph.list$seq.num, + thres = ph.list$ph.thresh, data.name = ph.list$data.name, + thres.hmm = ph.list$thres.hmm), + class = "two.pts.linkage.phases") +} + +#' concatenate two linkage phase lists +#' @keywords internal +concatenate_ph_list <- function(ph.list.1, ph.list.2){ + if(length(ph.list.1) == 0) + return(ph.list.2) + config.to.test <- c(ph.list.1$config.to.test, ph.list.2$config.to.test) + id <- which(!duplicated(config.to.test)) + config.to.test <- config.to.test[id] + structure(list(config.to.test = config.to.test, + ploidy = ph.list.1$ploidy, seq.num = ph.list.1$seq.num, + thres = ph.list.1$ph.thresh, data.name = ph.list.1$data.name, + thres.hmm = ph.list.1$thres.hmm), + class = "two.pts.linkage.phases") +} + +#' add a single marker at the tail of a linkage phase list +#' @keywords internal +add_mrk_at_tail_ph_list <- function(ph.list.1, ph.list.2, cor.index){ + config.to.test <- vector("list", length = nrow(cor.index)) + for(i in 1:nrow(cor.index)){ + config.to.test[[i]] <- list(P = c(ph.list.1$config.to.test[[cor.index[i,1]]]$P, tail(ph.list.2$config.to.test[[cor.index[i,2]]]$P,1)), + Q = c(ph.list.1$config.to.test[[cor.index[i,1]]]$Q, tail(ph.list.2$config.to.test[[cor.index[i,2]]]$Q,1))) + } + structure(list(config.to.test = config.to.test, + ploidy = ph.list.1$ploidy, seq.num = ph.list.1$seq.num, + thres = ph.list.1$ph.thresh, data.name = ph.list.1$data.name, + thres.hmm = ph.list.1$thres.hmm), + class = "two.pts.linkage.phases") +} + +#' Compare a list of linkage phases and return the +#' markers for which they are different. +#' @keywords internal +check_ls_phase <- function(ph){ + if(length(ph$config.to.test) == 1) return(0) + id <- rep(1, length(ph$config.to.test[[1]]$P)) + for(i in 1:length(ph$config.to.test[[1]]$P)){ + w <- ph$config.to.test[[1]]$P[i] + for(j in 2:length(ph$config.to.test)) + id[i] <- id[i] + identical(w, ph$config.to.test[[j]]$P[i]) + } + names(id) <- names((ph$config.to.test[[1]]$P)) + w <- length(ph$config.to.test[[1]]$P) - which(id < length(ph$config.to.test)) + if(length(w) == 0) return(0) + w +} + +#' cat for graphical representation of the phases +#' @keywords internal +print_ph <- function(input.ph){ + phs.P <- lapply(input.ph$config.to.test, + function(x, ploidy) { + M <- matrix("|", nrow = 1, ncol = ploidy) + M[unlist(tail(x$P, 1))] <- crayon::red(cli::symbol$bullet) + paste(M, collapse = "")}, + ploidy = input.ph$ploidy) + phs.Q <- lapply(input.ph$config.to.test, + function(x, ploidy) { + M <- matrix("|", nrow = 1, ncol = ploidy) + M[unlist(tail(x$Q, 1))] <- crayon::cyan(cli::symbol$bullet) + paste(M, collapse = "")}, + ploidy = input.ph$ploidy) + if(length(phs.P) == 1) + return(paste(unlist(phs.P)[1], unlist(phs.Q)[1], " ")) + if(length(phs.P) == 2) + return(paste(unlist(phs.P)[1], unlist(phs.Q)[1], " ", unlist(phs.P)[2], unlist(phs.Q)[2])) + if(length(phs.P) > 2) + return(paste(unlist(phs.P)[1], unlist(phs.Q)[1], " ... ", unlist(phs.P)[2], unlist(phs.Q)[2])) +} + +#' cat for phase information +#' @keywords internal +cat_phase <- function(input.seq, + input.ph, + all.ph, + ct, + seq.num, + twopt.phase.number, + hmm.phase.number){ + pc <- round(100*ct/length(input.seq$seq.num),1) + xmax11 <- nchar(length(input.seq$seq.num)) + x11 <- ct + x11 <- paste0(x11, paste(rep(" ", xmax11-nchar(x11)), collapse = "")) + x12 <- length(all.ph$config.to.test[[1]]$P) + 1 + x12 <- paste0(x12, paste(rep(" ", xmax11-nchar(x12)), collapse = "")) + x13 <- pc + x13 <- paste0(":(",pc,"%)", paste(rep(" ", 5-nchar(x13)), collapse = "")) + x1 <- paste0(x12,"/",x11, x13) + xmax21 <- nchar(max(input.seq$seq.num)) + x21 <- input.seq$seq.num[ct] + x21 <- paste0(x21, paste(rep(" ", xmax21-nchar(x21)), collapse = "")) + x22 <- length(input.ph$config.to.test) + x22 <- paste0(x22, " ph ", paste(rep(" ", 5-nchar(x22)), collapse = "")) + x23 <- paste0("(", hmm.phase.number, "/", twopt.phase.number,") ") + x23 <- paste0(x23, paste(rep(" ", 10-nchar(x23)), collapse = "")) + x2 <- paste0( x21, ": ", x22, x23) + x31 <- length(seq.num)-1 + x31 <- paste0(x31, paste(rep(" ", xmax11-nchar(x31)), collapse = "")) + x3 <- paste0(" -- tail: ", x31) + cat(x1, x2, x3, print_ph(input.ph), "\n") +} diff --git a/R/filters.R b/R/filters.R index 26efc804..700e4259 100644 --- a/R/filters.R +++ b/R/filters.R @@ -1,8 +1,5 @@ #' Filter non-conforming classes in F1, non double reduced population. -#' -#' @param void internal function to be documented #' @keywords internal -#' @export filter_non_conforming_classes <- function(input.data, prob.thres = NULL) { if (!inherits(input.data, "mappoly.data")) { @@ -64,22 +61,25 @@ filter_non_conforming_classes <- function(input.data, prob.thres = NULL) #' Filter missing genotypes #' -#' Excludes markers or individuals based on their proportion of missing data +#' Excludes markers or individuals based on their proportion of missing data. #' -#' @param input.data an object of class \code{mappoly.data} +#' @param input.data an object of class \code{mappoly.data}. #' #' @param type one of the following options: -#' \code{'marker'}{filter out markers based on their percentage of missing data (default)} -#' \code{'individual'}{filter out individuals based on their percentage of missing data} +#' \enumerate{ +#' \item \code{"marker"}: filter out markers based on their percentage of missing data (default). +#' \item \code{"individual"}: filter out individuals based on their percentage of missing data. +#' } #' Please notice that removing individuals with certain amount of data can change some marker parameters #' (such as depth), and can also change the estimated genotypes for other individuals. -#' So be careful when removing individuals. +#' So, be careful when removing individuals. #' -#' @param filter.thres maximum percentage of missing data (default = 0.2) +#' @param filter.thres maximum percentage of missing data (default = 0.2). #' -#' @param inter if \code{TRUE}, expects user-input to proceed with filtering +#' @param inter if \code{TRUE}, expects user-input to proceed with filtering. +#' +#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu}. #' -#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu} #' @examples #' plot(tetra.solcap) #' dat.filt.mrk <- filter_missing(input.data = tetra.solcap, @@ -87,6 +87,7 @@ filter_non_conforming_classes <- function(input.data, prob.thres = NULL) #' filter.thres = 0.1, #' inter = TRUE) #' plot(dat.filt.mrk) +#' #' @export #' @importFrom magrittr "%>%" #' @importFrom dplyr filter diff --git a/R/get_counts.R b/R/get_counts.R index a436a157..7ccdd3c4 100755 --- a/R/get_counts.R +++ b/R/get_counts.R @@ -20,8 +20,6 @@ get_counts_single_parent <- function(ploidy, gen.par.mk1, gen.par.mk2, gen.prog. } #' Counts for recombinant classes -#' -#' @param void internal function to be documented #' @keywords internal get_counts_two_parents <- function(x = c(2, 2), ploidy, p.k, p.k1, q.k, q.k1, verbose = FALSE, joint.prob = FALSE) { gen.prog.mk1 <- x[1] @@ -74,8 +72,6 @@ get_counts_two_parents <- function(x = c(2, 2), ploidy, p.k, p.k1, q.k, q.k1, ve } #' Counts for recombinant classes -#' -#' @param void internal function to be documented #' @keywords internal get_counts <- function(ploidy, P.k = NULL, P.k1 = NULL, Q.k = NULL, Q.k1 = NULL, verbose = FALSE, make.names = FALSE, joint.prob = FALSE) { if (verbose) { @@ -133,8 +129,6 @@ get_counts <- function(ploidy, P.k = NULL, P.k1 = NULL, Q.k = NULL, Q.k1 = NULL, #' associated names in the matrices indicates the number of shared #' homologous chromosomes. The row names indicates the dosage in loci #' k and k+1 respectively -#' -#' @param void internal function to be documented #' @keywords internal get_counts_all_phases <- function(x, ploidy, verbose = FALSE, make.names = FALSE, joint.prob = FALSE) { pk <- x[1] diff --git a/R/get_counts_from_web.R b/R/get_counts_from_web.R index da02b2ef..6ffac9da 100755 --- a/R/get_counts_from_web.R +++ b/R/get_counts_from_web.R @@ -1,9 +1,6 @@ #' Access a remote server to get Counts for recombinant classes -#' -#' @param void internal function to be documented #' @keywords internal #' @import RCurl -#' @export get_cache_two_pts_from_web get_cache_two_pts_from_web <- function(ploidy, url.address = NULL, joint.prob = TRUE, verbose = FALSE) { if (is.null(url.address)) { if (ploidy == 2) diff --git a/R/haplotype_map_utils.R b/R/haplotype_map_utils.R index fc3c6c32..98374fd3 100644 --- a/R/haplotype_map_utils.R +++ b/R/haplotype_map_utils.R @@ -122,8 +122,6 @@ generate_all_link_phases_elim_equivalent_haplo <- } #' Estimate a genetic map given a sequence of block markers -#' -#' @param void internal function to be documented #' @keywords internal est_haplo_hmm <- function(ploidy, n.mrk, n.ind, haplo, emit = NULL, @@ -179,8 +177,6 @@ est_haplo_hmm <- #' Estimate a genetic map given a sequence of block markers #' given the conditional probabilities of the genotypes -#' -#' @param void internal function to be documented #' @keywords internal est_map_haplo_given_genoprob <- function(map.list, genoprob.list, @@ -240,8 +236,6 @@ est_map_haplo_given_genoprob <- function(map.list, #' Compute conditional probabilities of the genotypes given a sequence #' of block markers -#' -#' @param void internal function to be documented #' @keywords internal calc_genoprob_haplo <- function(ploidy, n.mrk, n.ind, haplo, emit = NULL, rf_vec, ind.names, verbose = TRUE, diff --git a/R/make_seq.R b/R/make_seq.R index 22c1b61c..7e04c93a 100755 --- a/R/make_seq.R +++ b/R/make_seq.R @@ -1,373 +1,353 @@ -#' Create a sequence of markers -#' -#' Makes a sequence of markers based on an object of another class. -#' -#' @param input.obj an object of one of the following classes: -#' \code{mappoly.data}, \code{mappoly.map}, \code{mappoly.sequence}, -#' \code{mappoly.group}, \code{mappoly.unique.seq}, -#' \code{mappoly.pcmap}, \code{mappoly.pcmap3d}, \code{mappoly.geno.ord} or -#' \code{mappoly.edit.order} -#' -#' @param arg can be one of the following objects: i) a string 'all', -#' resulting in a sequence with all markers in the raw data; ii) a -#' string or a vector of strings \code{'seqx'}, where \code{x} -#' is the sequence (\code{x = 0} indicates unassigned markers); iii) a -#' \code{vector} of integers specifying which markers comprise the -#' sequence; iv) a \code{vector} of integers representing linkage group if -#' \code{input.object} has class \code{mappoly.group}; or v) NULL if -#' \code{input.object} has class \code{mappoly.pcmap}, \code{mappoly.pcmap3d}, -#' \code{mappoly.unique.seq}, or \code{mappoly.geno.ord} -#' -#' @param data.name name of the object of class \code{mappoly.data} -#' -#' @param info.parent one of the following options: -#' \code{'all'}{select all dosage combinations in both parents (default)} -#' \code{'P1'}{select informative markers parent 1} -#' \code{'P2'}{select informative markers parent 2} -#' -#' @param genomic.info optional argument applied to \code{mappoly.group} objects only. This argument can be \code{NULL}, -#' or can hold the numeric combination of sequences from genomic information to be used when making the sequences. -#' When \code{genomic.info = NULL} (default), the function returns a sequence containing all markers defined -#' by the grouping function. When \code{genomic.info = 1}, the function returns a sequence with markers -#' that matched the intersection between grouping function and genomic information, considering the sequence -#' from genomic information that holds the maximum number of markers matching the group; -#' when \code{genomic.info = c(1,2)}, the function returns a sequence with markers -#' that matched the intersection between grouping function and genomic information, considering two sequences -#' from genomic information that presented the maximum number of markers matching the group; and so on. -#' -#' @param x an object of the class \code{mappoly.sequence} -#' -#' @param ... currently ignored -#' -#' @return An object of class \code{mappoly.sequence}, which is a -#' list containing the following components: -#' \item{seq.num}{a \code{vector} containing the (ordered) indices -#' of markers in the sequence, according to the input file} -#' \item{seq.phases}{a \code{list} with the linkage phases between -#' markers in the sequence, in corresponding positions. \code{-1} -#' means that there are no defined linkage phases} -#' \item{seq.rf}{a \code{vector} with the recombination -#' frequencies between markers in the sequence. \code{-1} means -#' that there are no estimated recombination frequencies} -#' \item{loglike}{log-likelihood of the corresponding linkage -#' map} -#' \item{data.name}{name of the object of class -#' \code{mappoly.data} with the raw data} -#' \item{twopt}{name of the object of class \code{mappoly.twopt} -#' with the 2-point analyses. \code{-1} means that the twopt -#' estimates were not computed} -#' -#' @examples -#' all.mrk <- make_seq_mappoly(hexafake, 'all') -#' seq1.mrk <- make_seq_mappoly(hexafake, 'seq1') -#' plot(seq1.mrk) -#' some.mrk.pos <- c(1,4,28,32,45) -#' (some.mrk.1 <- make_seq_mappoly(hexafake, some.mrk.pos)) -#' plot(some.mrk.1) -#' -#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu}, with modifications by Gabriel Gesteira, \email{gdesiqu@ncsu.edu} -#' -#' @references -#' Mollinari, M., and Garcia, A. A. F. (2019) Linkage -#' analysis and haplotype phasing in experimental autopolyploid -#' populations with high ploidy level using hidden Markov -#' models, _G3: Genes, Genomes, Genetics_. -#' \doi{10.1534/g3.119.400378} -#' -#' @export - -make_seq_mappoly <- function(input.obj, - arg = NULL, - data.name = NULL, - info.parent = c('all', 'p1', 'p2'), - genomic.info = NULL) { - ## checking for correct object - input_classes <- c("mappoly.data", "mappoly.map", "mappoly.sequence", - "mappoly.unique.seq", "mappoly.pcmap", "mappoly.pcmap3d", - "mappoly.group", "mappoly.chitest.seq", "mappoly.geno.ord", - "mappoly.edit.order") - if (!inherits(input.obj, input_classes)) { - stop("invalid input object.", call. = FALSE) - } - ## if input object is a map, call 'make_seq_mappoly' recursively - info.parent <- match.arg(info.parent) - if(inherits (input.obj, "mappoly.map")) - return(make_seq_mappoly(get(input.obj$info$data.name, pos = 1), - arg = input.obj$info$mrk.names, - info.parent = info.parent, - data.name = input.obj$info$data.name)) - if(inherits (input.obj, "mappoly.sequence")){ - if(is.null(arg)) - arg = input.obj$seq.mrk.names - if(!is.character(arg)) - stop("provide marker names when using 'mappoly.sequence' as input object.", - call. = FALSE) - return(make_seq_mappoly(get(input.obj$data.name, pos = 1), - arg = arg, - info.parent = info.parent, - data.name = input.obj$data.name)) - } - ## checking for argument to make a sequence - if (is.null(arg) && !inherits(input.obj, "mappoly.chitest.seq") && - !inherits(input.obj, "mappoly.unique.seq") && - !inherits(input.obj, "mappoly.pcmap") && - !inherits(input.obj, "mappoly.pcmap3d") && - !inherits(input.obj, "mappoly.geno.ord") && - !inherits(input.obj, "mappoly.edit.order")) { - stop("argument 'arg' expected.") - } - ## Variables defined to block removing redundant markers - realkeep = FALSE - tokeep = FALSE - # ## Old code to handle redundant markers - # if ((!is.null(input.obj$kept))){ - # tokeep = input.obj$kept - # realkeep = TRUE - # seq.num = match(tokeep,input.obj$mrk.names) - # } - if (inherits(input.obj, "mappoly.data")) - { - chisq.pval <- input.obj$chisq.pval - chisq.pval.thres <- NULL - ## gathering sequence data - chrom <- genome.pos <- NULL - if (any(!is.na(input.obj$chrom))) - chrom <- input.obj$chrom - if (any(!is.na(input.obj$genome.pos))) - genome.pos <- input.obj$genome.pos - - ## make sequence with all markers - if (length(arg) == 1 && arg == "all") - { - if (realkeep) { - seq.num = match(tokeep,input.obj$mrk.names) - } else { - seq.num = as.integer(1:input.obj$n.mrk) - } - } - else if (all(is.character(arg)) && length(grep("seq", arg)) == length(arg)) - { - if (length(input.obj$chrom) == 1 && input.obj$chrom == 0) - stop("There is no chromosome information in ", deparse(substitute(input.obj))) - seq.num1 <- as.integer(which(!is.na(match(input.obj$chrom, gsub("[^0-9]", "", arg))))) - if(realkeep) {seq.num = intersect(seq.num1, seq.num)} else {seq.num = seq.num1} - chrom <- input.obj$chrom[seq.num] - if (length(input.obj$genome.pos) > 2) - genome.pos <- input.obj$genome.pos[seq.num] - } - else if (all(is.character(arg)) && (length(arg) == length(arg %in% input.obj$mrk.names))) - { - seq.num1 <- as.integer(match(arg, input.obj$mrk.names)) - if(realkeep) { - seq.num = intersect(seq.num1, seq.num) - } else { - seq.num = seq.num1 - } - chrom <- input.obj$chrom[seq.num] - if (length(input.obj$genome.pos) > 2) - genome.pos <- input.obj$genome.pos[seq.num] - } - else if (is.vector(arg) && all(is.numeric(arg))) - { - seq.num1 <- as.integer(arg) - if(realkeep) {seq.num = intersect(seq.num1, seq.num)} else {seq.num = seq.num1} - chrom <- input.obj$chrom[seq.num] - if (length(input.obj$genome.pos) > 2) - genome.pos <- input.obj$genome.pos[seq.num] - } - else stop("Invalid argument to select markers") - if (is.null(data.name)) - data.name <- as.character(sys.call())[2] - } - if (inherits(input.obj, "mappoly.unique.seq")) - { - if(!is.null(arg)) - warning("Ignoring argument 'arg' and using the unique sequence instead.") - return(input.obj$unique.seq) - } - if (inherits(input.obj, "mappoly.chitest.seq")) - { - if(!is.null(arg)) - warning("Ignoring argument 'arg' and using chi-square filtered markers instead.") - tmp <- make_seq_mappoly(get(input.obj$data.name, pos = 1), - arg = input.obj$keep, - info.parent = info.parent, - data.name = input.obj$data.name) - tmp$chisq.pval.thres <- input.obj$chisq.pval.thres - tmp$chisq.pval <- get(input.obj$data.name, pos = 1)$chisq.pval[input.obj$keep] - return(tmp) - } - if (inherits(input.obj, "mappoly.group")) - { - chisq.pval <- input.obj$chisq.pval - chisq.pval.thres <- input.obj$chisq.pval.thres - lgs.idx <- which(input.obj$groups.snp %in% arg) - seq.num.group = as.numeric(names(input.obj$groups.snp)[lgs.idx]) - - if (!is.null(genomic.info) && is.numeric(genomic.info)){ - seqs = colnames(input.obj$seq.vs.grouped.snp)[genomic.info] - } else { - if(realkeep) seq.num = intersect(seq.num.group, seq.num) - else seq.num = seq.num.group - } - data.name <- input.obj$data.name - input.obj <- get(data.name, pos = 1) - if (!is.null(genomic.info)){ - seq.num.seq = match(input.obj$mrk.names[(input.obj$chrom %in% seqs)], input.obj$mrk.names) - seq.num1 = intersect(seq.num.group, seq.num.seq) - if(realkeep) seq.num = intersect(seq.num1, seq.num) - else seq.num = seq.num1 - } - if(!all(is.na(input.obj$chrom)) && !all(is.na(input.obj$genome.pos))) - { - chrom <- input.obj$chrom[seq.num] - genome.pos <- input.obj$genome.pos[seq.num] - } - else - chrom <- genome.pos <- NULL - } - if (inherits(input.obj, "mappoly.pcmap") | inherits(input.obj, "mappoly.pcmap3d" )) - { - if(!is.null(arg)) - warning("Ignoring argument 'arg' and using the MDS order instead.") - return(make_seq_mappoly(input.obj = get(input.obj$data.name, pos = 1), - arg = as.character(input.obj$locimap$locus), - info.parent = info.parent, - data.name = input.obj$data.name)) - } - if (inherits(input.obj, "mappoly.geno.ord")) - { - if(!is.null(arg)) - warning("Ignoring argument 'arg' and using the genome order instead.") - return(make_seq_mappoly(get(input.obj$data.name, pos = 1), - arg = as.character(rownames(input.obj$ord)), - info.parent = info.parent, - data.name = input.obj$data.name)) - } - if (inherits(input.obj, "mappoly.edit.order")) - { - if(!is.null(arg)) - warning("Ignoring argument 'arg' and using the edited sequence order instead.") - return(make_seq_mappoly(get(input.obj$data.name, pos = 1), - arg = input.obj$edited_order, - data.name = input.obj$data.name)) - } - dp1 <- input.obj$dosage.p1[seq.num] - dp2 <- input.obj$dosage.p2[seq.num] - if(info.parent == "p1") - id <- dp2 == 0 | dp2 == input.obj$ploidy - else if(info.parent == "p2") - id <- dp1 == 0 | dp1 == input.obj$ploidy - else - id <- seq_along(seq.num) - structure(list(ploidy = input.obj$ploidy, - seq.num = seq.num[id], - seq.mrk.names = input.obj$mrk.names[seq.num][id], - seq.dose.p1 = input.obj$dosage.p1[seq.num][id], - seq.dose.p2 = input.obj$dosage.p2[seq.num][id], - seq.phases = -1, - seq.rf = -1, - loglike = -1, - chrom = chrom[id], - genome.pos = genome.pos[id], - data.name = data.name, - twopt = -1, - chisq.pval = chisq.pval, - chisq.pval.thres = chisq.pval.thres), - class = "mappoly.sequence") -} - -#' @rdname make_seq_mappoly -#' @export -print.mappoly.sequence <- function(x, ...) { - cat("This is an object of class 'mappoly.sequence'\n") - if (x$loglike == -1) { - cat(" ------------------------\n Parameters not estimated\n ------------------------\n") - } - n.mrk <- length(x$seq.num) - cat(" Ploidy level: ", x$ploidy, "\n") - cat(" No. individuals: ", get(x$data.name)$n.ind, "\n") - cat(" No. markers: ", length(x$seq.num), "\n") - w <- table(x$chrom) - if (all(is.null(x$chrom)) || all(is.na(x$chrom))) - cat("\n No. markers per sequence: not available") - else { - cat("\n ----------\n No. markers per sequence:\n") - print(data.frame(chrom = paste0(" ", names(w)), No.mrk = as.numeric(w)), row.names = FALSE) - } - cat("\n ----------\n No. of markers per dosage in both parents:\n") - freq <- table(paste(get(x$data.name)$dosage.p1[x$seq.num], get(x$data.name)$dosage.p2[x$seq.num], sep = "-")) - d.temp <- matrix(unlist(strsplit(names(freq), "-")), ncol = 2, byrow = TRUE) - print(data.frame(dP1 = paste0(" ", d.temp[, 1]), dP2 = d.temp[, 2], freq = as.numeric(freq)), row.names = FALSE) - if (x$loglike != -1) { - cat("\n ---------------------------------------------\n") - cat("\n log-likelihood:\t", x$loglike) - cat("\n rec. fraction:\t", round(imf_h(x$seq.rf), 1)) - cat("\n\n") - M <- matrix("|", n.mrk, x$ploidy * 2) - for (i in 1:n.mrk) { - if (all(x$seq.phases$Q[[i]] != 0)) - M[i, c(x$seq.phases$P[[i]], x$seq.phases$Q[[i]] + x$ploidy)] <- "o" else M[i, x$seq.phases$P[[i]]] <- "o" - } - M <- cbind(get(x$data.name)$mrk.names[x$seq.num], M) - format(apply(M, 1, function(y) cat(c("\t", y[1], "\t", y[2:(x$ploidy + 1)], rep(" ", 4), y[(x$ploidy + 2):(x$ploidy * 2 + 1)], "\n"), collapse = ""))) - } -} - -#' @rdname make_seq_mappoly -#' @export -#' @importFrom graphics barplot layout mtext image legend -#' @importFrom grDevices colorRampPalette -plot.mappoly.sequence <- function(x, ...) -{ - oldpar <- par(mar = c(5,4,1,2)) - on.exit(par(oldpar)) - ploidy <- x$ploidy - freq <- table(paste(x$seq.dose.p1, x$seq.dose.p2, sep = "-")) - d.temp <- matrix(unlist(strsplit(names(freq), "-")), ncol = 2, byrow = TRUE) - type <- apply(d.temp, 1, function(x,ploidy) paste0(sort(abs(abs(as.numeric(x)-(ploidy/2))-(ploidy/2))), collapse = ""), ploidy = x$ploidy) - type.names <- names(table(type)) - mrk.dist <- as.numeric(freq) - names(mrk.dist) <- apply(d.temp, 1 , paste, collapse = "-") - #w <- c("#FFFFFF", "#F0F0F0", "#D9D9D9", "#BDBDBD", "#969696", - # "#737373", "#525252", "#252525", "#000000") - #pal <- colorRampPalette(w)(length(type.names)) - layout(matrix(c(1,1,1,2,3,3,6,4,5), 3, 3), widths = c(1.2,3,.5), heights = c(1.5,4.5,.5)) - barplot(mrk.dist, las = 2, #col = pal[match(type, type.names)], - xlab = "Number of markers", - ylab = "Dosage combination", horiz = TRUE) - pval <- x$chisq.pval[x$seq.mrk.names] - if(is.null(x$chisq.pval)) - { - plot(0, 0, axes = FALSE, xlab = "", ylab = "", type = "n") - text(x = 0, y = 0, labels = "No segregation test", cex = 2) - } else{ - par(mar = c(1,1,1,2)) - par(xaxs = "i") - plot(log10(pval), axes = FALSE, xlab = "", ylab = "", pch = 16, - col = rgb(red = 0.25, green = 0.64, blue = 0.86, alpha = 0.3)) - axis(4, line = 1) - mtext(text = bquote(log[10](P)), side = 4, line = 4, cex = .7) - } - par(mar = c(5,1,0,2)) - pal <- c("black", colorRampPalette(c("#D73027", "#F46D43", "#FDAE61", "#FEE090", - "#FFFFBF", "#E0F3F8", "#ABD9E9", "#74ADD1", - "#4575B4"))(x$ploidy + 1)) - names(pal) <- c(-1:x$ploidy) - M <- as.matrix(get(x$data.name, pos = 1)$geno.dose[x$seq.mrk.names,]) - M[M == x$ploidy+1] <- -1 - image(x = 1:nrow(M), z = M, axes = FALSE, xlab = "", - col = pal[as.character(sort(unique(as.vector(M))))], useRaster = TRUE) - mtext(text = "Markers", side = 1, line = .4) - mtext(text = "Individuals", side = 2, line = .2) - par(mar = c(0,0,0,0)) - plot(0:10,0:10, type = "n", axes = FALSE, xlab = "", ylab = "") - legend(0,10, - horiz = FALSE, - legend = c("missing", 0:x$ploidy), - pch = 22, - pt.cex = 3, - pt.bg = pal, pt.lwd = 0, - bty = "n", xpd = TRUE) - par(mfrow = c(1,1)) -} +#' Create a Sequence of Markers +#' +#' Constructs a sequence of markers based on an object belonging to various specified classes. This +#' function is versatile, supporting multiple input types and configurations for generating marker sequences. +#' +#' @param input.obj An object belonging to one of the specified classes: \code{mappoly.data}, +#' \code{mappoly.map}, \code{mappoly.sequence}, \code{mappoly.group}, \code{mappoly.unique.seq}, +#' \code{mappoly.pcmap}, \code{mappoly.pcmap3d}, \code{mappoly.geno.ord}, or \code{mappoly.edit.order}. +#' +#' @param arg Specifies the markers to include in the sequence, accepting several formats: a string 'all' for all +#' markers; a string or vector of strings 'seqx' where x is the sequence number (0 for unassigned markers); a +#' vector of integers indicating specific markers; or a vector of integers representing linkage group numbers if +#' \code{input.obj} is of class \code{mappoly.group}. For certain classes (\code{mappoly.pcmap}, \code{mappoly.pcmap3d}, +#' \code{mappoly.unique.seq}, or \code{mappoly.geno.ord}), \code{arg} can be \code{NULL}. +#' +#' @param data.name Name of the \code{mappoly.data} class object. +#' +#' @param info.parent Selection criteria based on parental information: \code{'all'} for all dosage combinations, +#' \code{'P1'} for markers informative in parent 1, or \code{'P2'} for markers informative in parent 2. Default +#' is \code{'all'}. +#' +#' @param genomic.info Optional and applicable only to \code{mappoly.group} objects. Specifies the use of genomic +#' information in sequence creation. With \code{NULL} (default), all markers defined by the grouping function are +#' included. Numeric values indicate the use of specific sequences from genomic information, aiming to match the +#' maximum number of markers with the group. Supports single values or vectors for multiple sequence consideration. +#' +#' @param x An object of class \code{mappoly.sequence}. +#' +#' @param ... Currently ignored. +#' +#' @return Returns an object of class \code{mappoly.sequence}, comprising: +#' \itemize{ +#' \item{\code{seq.num}}{Ordered vector of marker indices according to the input.} +#' \item{\code{seq.phases}}{List of linkage phases between markers; \code{-1} for undefined phases.} +#' \item{\code{seq.rf}}{Vector of recombination frequencies; \code{-1} for unestimated frequencies.} +#' \item{\code{loglike}}{Log-likelihood of the linkage map.} +#' \item{\code{data.name}}{Name of the \code{mappoly.data} object with raw data.} +#' \item{\code{twopt}}{Name of the \code{mappoly.twopt} object with 2-point analyses; \code{-1} if not computed.} +#' } +#' +#' @examples +#' all.mrk <- make_seq_mappoly(hexafake, 'all') +#' seq1.mrk <- make_seq_mappoly(hexafake, 'seq1') +#' plot(seq1.mrk) +#' some.mrk.pos <- c(1,4,28,32,45) +#' some.mrk.1 <- make_seq_mappoly(hexafake, some.mrk.pos) +#' plot(some.mrk.1) +#' +#' @author Marcelo Mollinari \email{mmollin@ncsu.edu}, with modifications by Gabriel Gesteira +#' \email{gdesiqu@ncsu.edu} +#' +#' @references +#' Mollinari, M., and Garcia, A. A. F. (2019). Linkage analysis and haplotype phasing in experimental +#' autopolyploid populations with high ploidy level using hidden Markov models. _G3: Genes|Genomes|Genetics_, +#' \doi{10.1534/g3.119.400378}. +#' +#' @export + +make_seq_mappoly <- function(input.obj, + arg = NULL, + data.name = NULL, + info.parent = c('all', 'p1', 'p2'), + genomic.info = NULL) { + ## checking for correct object + input_classes <- c("mappoly.data", "mappoly.map", "mappoly.sequence", + "mappoly.unique.seq", "mappoly.pcmap", "mappoly.pcmap3d", + "mappoly.group", "mappoly.chitest.seq", "mappoly.geno.ord", + "mappoly.edit.order") + if (!inherits(input.obj, input_classes)) { + stop("invalid input object.", call. = FALSE) + } + ## if input object is a map, call 'make_seq_mappoly' recursively + info.parent <- match.arg(info.parent) + if(inherits (input.obj, "mappoly.map")) + return(make_seq_mappoly(get(input.obj$info$data.name, pos = 1), + arg = input.obj$info$mrk.names, + info.parent = info.parent, + data.name = input.obj$info$data.name)) + if(inherits (input.obj, "mappoly.sequence")){ + if(is.null(arg)) + arg = input.obj$seq.mrk.names + if(!is.character(arg)) + stop("provide marker names when using 'mappoly.sequence' as input object.", + call. = FALSE) + return(make_seq_mappoly(get(input.obj$data.name, pos = 1), + arg = arg, + info.parent = info.parent, + data.name = input.obj$data.name)) + } + ## checking for argument to make a sequence + if (is.null(arg) && !inherits(input.obj, "mappoly.chitest.seq") && + !inherits(input.obj, "mappoly.unique.seq") && + !inherits(input.obj, "mappoly.pcmap") && + !inherits(input.obj, "mappoly.pcmap3d") && + !inherits(input.obj, "mappoly.geno.ord") && + !inherits(input.obj, "mappoly.edit.order")) { + stop("argument 'arg' expected.") + } + ## Variables defined to block removing redundant markers + realkeep = FALSE + tokeep = FALSE + # ## Old code to handle redundant markers + # if ((!is.null(input.obj$kept))){ + # tokeep = input.obj$kept + # realkeep = TRUE + # seq.num = match(tokeep,input.obj$mrk.names) + # } + if (inherits(input.obj, "mappoly.data")) + { + chisq.pval <- input.obj$chisq.pval + chisq.pval.thres <- NULL + ## gathering sequence data + chrom <- genome.pos <- NULL + if (any(!is.na(input.obj$chrom))) + chrom <- input.obj$chrom + if (any(!is.na(input.obj$genome.pos))) + genome.pos <- input.obj$genome.pos + + ## make sequence with all markers + if (length(arg) == 1 && arg == "all") + { + if (realkeep) { + seq.num = match(tokeep,input.obj$mrk.names) + } else { + seq.num = as.integer(1:input.obj$n.mrk) + } + } + else if (all(is.character(arg)) && length(grep("seq", arg)) == length(arg)) + { + if (length(input.obj$chrom) == 1 && input.obj$chrom == 0) + stop("There is no chromosome information in ", deparse(substitute(input.obj))) + seq.num1 <- as.integer(which(!is.na(match(input.obj$chrom, gsub("[^0-9]", "", arg))))) + if(realkeep) {seq.num = intersect(seq.num1, seq.num)} else {seq.num = seq.num1} + chrom <- input.obj$chrom[seq.num] + if (length(input.obj$genome.pos) > 2) + genome.pos <- input.obj$genome.pos[seq.num] + } + else if (all(is.character(arg)) && (length(arg) == length(arg %in% input.obj$mrk.names))) + { + seq.num1 <- as.integer(match(arg, input.obj$mrk.names)) + if(realkeep) { + seq.num = intersect(seq.num1, seq.num) + } else { + seq.num = seq.num1 + } + chrom <- input.obj$chrom[seq.num] + if (length(input.obj$genome.pos) > 2) + genome.pos <- input.obj$genome.pos[seq.num] + } + else if (is.vector(arg) && all(is.numeric(arg))) + { + seq.num1 <- as.integer(arg) + if(realkeep) {seq.num = intersect(seq.num1, seq.num)} else {seq.num = seq.num1} + chrom <- input.obj$chrom[seq.num] + if (length(input.obj$genome.pos) > 2) + genome.pos <- input.obj$genome.pos[seq.num] + } + else stop("Invalid argument to select markers") + if (is.null(data.name)) + data.name <- as.character(sys.call())[2] + } + if (inherits(input.obj, "mappoly.unique.seq")) + { + if(!is.null(arg)) + warning("Ignoring argument 'arg' and using the unique sequence instead.") + return(input.obj$unique.seq) + } + if (inherits(input.obj, "mappoly.chitest.seq")) + { + if(!is.null(arg)) + warning("Ignoring argument 'arg' and using chi-square filtered markers instead.") + tmp <- make_seq_mappoly(get(input.obj$data.name, pos = 1), + arg = input.obj$keep, + info.parent = info.parent, + data.name = input.obj$data.name) + tmp$chisq.pval.thres <- input.obj$chisq.pval.thres + tmp$chisq.pval <- get(input.obj$data.name, pos = 1)$chisq.pval[input.obj$keep] + return(tmp) + } + if (inherits(input.obj, "mappoly.group")) + { + chisq.pval <- input.obj$chisq.pval + chisq.pval.thres <- input.obj$chisq.pval.thres + lgs.idx <- which(input.obj$groups.snp %in% arg) + seq.num.group = as.numeric(names(input.obj$groups.snp)[lgs.idx]) + + if (!is.null(genomic.info) && is.numeric(genomic.info)){ + seqs = colnames(input.obj$seq.vs.grouped.snp)[genomic.info] + } else { + if(realkeep) seq.num = intersect(seq.num.group, seq.num) + else seq.num = seq.num.group + } + data.name <- input.obj$data.name + input.obj <- get(data.name, pos = 1) + if (!is.null(genomic.info)){ + seq.num.seq = match(input.obj$mrk.names[(input.obj$chrom %in% seqs)], input.obj$mrk.names) + seq.num1 = intersect(seq.num.group, seq.num.seq) + if(realkeep) seq.num = intersect(seq.num1, seq.num) + else seq.num = seq.num1 + } + if(!all(is.na(input.obj$chrom)) && !all(is.na(input.obj$genome.pos))) + { + chrom <- input.obj$chrom[seq.num] + genome.pos <- input.obj$genome.pos[seq.num] + } + else + chrom <- genome.pos <- NULL + } + if (inherits(input.obj, "mappoly.pcmap") | inherits(input.obj, "mappoly.pcmap3d" )) + { + if(!is.null(arg)) + warning("Ignoring argument 'arg' and using the MDS order instead.") + return(make_seq_mappoly(input.obj = get(input.obj$data.name, pos = 1), + arg = as.character(input.obj$locimap$locus), + info.parent = info.parent, + data.name = input.obj$data.name)) + } + if (inherits(input.obj, "mappoly.geno.ord")) + { + if(!is.null(arg)) + warning("Ignoring argument 'arg' and using the genome order instead.") + return(make_seq_mappoly(get(input.obj$data.name, pos = 1), + arg = as.character(rownames(input.obj$ord)), + info.parent = info.parent, + data.name = input.obj$data.name)) + } + if (inherits(input.obj, "mappoly.edit.order")) + { + if(!is.null(arg)) + warning("Ignoring argument 'arg' and using the edited sequence order instead.") + return(make_seq_mappoly(get(input.obj$data.name, pos = 1), + arg = input.obj$edited_order, + data.name = input.obj$data.name)) + } + dp1 <- input.obj$dosage.p1[seq.num] + dp2 <- input.obj$dosage.p2[seq.num] + if(info.parent == "p1") + id <- dp2 == 0 | dp2 == input.obj$ploidy + else if(info.parent == "p2") + id <- dp1 == 0 | dp1 == input.obj$ploidy + else + id <- seq_along(seq.num) + structure(list(ploidy = input.obj$ploidy, + seq.num = seq.num[id], + seq.mrk.names = input.obj$mrk.names[seq.num][id], + seq.dose.p1 = input.obj$dosage.p1[seq.num][id], + seq.dose.p2 = input.obj$dosage.p2[seq.num][id], + seq.phases = -1, + seq.rf = -1, + loglike = -1, + chrom = chrom[id], + genome.pos = genome.pos[id], + data.name = data.name, + twopt = -1, + chisq.pval = chisq.pval, + chisq.pval.thres = chisq.pval.thres), + class = "mappoly.sequence") +} + +#' @rdname make_seq_mappoly +#' @export +print.mappoly.sequence <- function(x, ...) { + cat("This is an object of class 'mappoly.sequence'\n") + if (x$loglike == -1) { + cat(" ------------------------\n Parameters not estimated\n ------------------------\n") + } + n.mrk <- length(x$seq.num) + cat(" Ploidy level: ", x$ploidy, "\n") + cat(" No. individuals: ", get(x$data.name)$n.ind, "\n") + cat(" No. markers: ", length(x$seq.num), "\n") + w <- table(x$chrom) + if (all(is.null(x$chrom)) || all(is.na(x$chrom))) + cat("\n No. markers per sequence: not available") + else { + cat("\n ----------\n No. markers per sequence:\n") + print(data.frame(chrom = paste0(" ", names(w)), No.mrk = as.numeric(w)), row.names = FALSE) + } + cat("\n ----------\n No. of markers per dosage in both parents:\n") + freq <- table(paste(get(x$data.name)$dosage.p1[x$seq.num], get(x$data.name)$dosage.p2[x$seq.num], sep = "-")) + d.temp <- matrix(unlist(strsplit(names(freq), "-")), ncol = 2, byrow = TRUE) + print(data.frame(dP1 = paste0(" ", d.temp[, 1]), dP2 = d.temp[, 2], freq = as.numeric(freq)), row.names = FALSE) + if (x$loglike != -1) { + cat("\n ---------------------------------------------\n") + cat("\n log-likelihood:\t", x$loglike) + cat("\n rec. fraction:\t", round(imf_h(x$seq.rf), 1)) + cat("\n\n") + M <- matrix("|", n.mrk, x$ploidy * 2) + for (i in 1:n.mrk) { + if (all(x$seq.phases$Q[[i]] != 0)) + M[i, c(x$seq.phases$P[[i]], x$seq.phases$Q[[i]] + x$ploidy)] <- "o" else M[i, x$seq.phases$P[[i]]] <- "o" + } + M <- cbind(get(x$data.name)$mrk.names[x$seq.num], M) + format(apply(M, 1, function(y) cat(c("\t", y[1], "\t", y[2:(x$ploidy + 1)], rep(" ", 4), y[(x$ploidy + 2):(x$ploidy * 2 + 1)], "\n"), collapse = ""))) + } +} + +#' @rdname make_seq_mappoly +#' @export +#' @importFrom graphics barplot layout mtext image legend +#' @importFrom grDevices colorRampPalette +plot.mappoly.sequence <- function(x, ...) +{ + oldpar <- par(mar = c(5,4,1,2)) + on.exit(par(oldpar)) + ploidy <- x$ploidy + freq <- table(paste(x$seq.dose.p1, x$seq.dose.p2, sep = "-")) + d.temp <- matrix(unlist(strsplit(names(freq), "-")), ncol = 2, byrow = TRUE) + type <- apply(d.temp, 1, function(x,ploidy) paste0(sort(abs(abs(as.numeric(x)-(ploidy/2))-(ploidy/2))), collapse = ""), ploidy = x$ploidy) + type.names <- names(table(type)) + mrk.dist <- as.numeric(freq) + names(mrk.dist) <- apply(d.temp, 1 , paste, collapse = "-") + #w <- c("#FFFFFF", "#F0F0F0", "#D9D9D9", "#BDBDBD", "#969696", + # "#737373", "#525252", "#252525", "#000000") + #pal <- colorRampPalette(w)(length(type.names)) + layout(matrix(c(1,1,1,2,3,3,6,4,5), 3, 3), widths = c(1.2,3,.5), heights = c(1.5,4.5,.5)) + barplot(mrk.dist, las = 2, #col = pal[match(type, type.names)], + xlab = "Number of markers", + ylab = "Dosage combination", horiz = TRUE) + pval <- x$chisq.pval[x$seq.mrk.names] + if(is.null(x$chisq.pval)) + { + plot(0, 0, axes = FALSE, xlab = "", ylab = "", type = "n") + text(x = 0, y = 0, labels = "No segregation test", cex = 2) + } else{ + par(mar = c(1,1,1,2)) + par(xaxs = "i") + plot(log10(pval), axes = FALSE, xlab = "", ylab = "", pch = 16, + col = rgb(red = 0.25, green = 0.64, blue = 0.86, alpha = 0.3)) + axis(4, line = 1) + mtext(text = bquote(log[10](P)), side = 4, line = 4, cex = .7) + } + par(mar = c(5,1,0,2)) + pal <- c("black", colorRampPalette(c("#D73027", "#F46D43", "#FDAE61", "#FEE090", + "#FFFFBF", "#E0F3F8", "#ABD9E9", "#74ADD1", + "#4575B4"))(x$ploidy + 1)) + names(pal) <- c(-1:x$ploidy) + M <- as.matrix(get(x$data.name, pos = 1)$geno.dose[x$seq.mrk.names,]) + M[M == x$ploidy+1] <- -1 + image(x = 1:nrow(M), z = M, axes = FALSE, xlab = "", + col = pal[as.character(sort(unique(as.vector(M))))], useRaster = TRUE) + mtext(text = "Markers", side = 1, line = .4) + mtext(text = "Individuals", side = 2, line = .2) + par(mar = c(0,0,0,0)) + plot(0:10,0:10, type = "n", axes = FALSE, xlab = "", ylab = "") + legend(0,10, + horiz = FALSE, + legend = c("missing", 0:x$ploidy), + pch = 22, + pt.cex = 3, + pt.bg = pal, pt.lwd = 0, + bty = "n", xpd = TRUE) + par(mfrow = c(1,1)) +} diff --git a/R/pairwise_rf.R b/R/pairwise_rf.R index 37a6a7e2..8d877709 100755 --- a/R/pairwise_rf.R +++ b/R/pairwise_rf.R @@ -40,25 +40,15 @@ #' @param ll will return log-likelihood instead of LOD scores. #' (for internal use) #' -#' @return An object of class \code{mappoly.twopt} which -#' is a list containing the following components: -#' \item{data.name}{name of the object of class -#' \code{mappoly.data} with the raw data} -#' \item{n.mrk}{number of markers in the sequence} -#' \item{seq.num}{a \code{vector} containing the (ordered) indices -#' of markers in the sequence, according to the input file} -#' \item{pairwise}{a list of size -#' \code{choose(length(input.seq$seq.num), 2)}, each of them containing a -#' matrix where the name of the rows have the form x-y, where x and y indicate -#' how many homologues share the same allelic variant in parents P and Q, -#' respectively (see Mollinari and Garcia, 2019 for notation). The first -#' column indicates the LOD Score in relation to the most likely linkage -#' phase configuration. The second column shows the estimated recombination -#' fraction for each configuration, and the third indicates the LOD Score -#' comparing the likelihood under no linkage (r = 0.5) with the estimated -#' recombination fraction (evidence of linkage).} -#' \code{chisq.pval.thres}{threshold used to perform the segregation tests} -#' \code{chisq.pval}{p-values associated with the performed segregation tests} +#' @return An object of class \code{mappoly.twopt} which is a list containing the following components: +#' \describe{ +#' \item{\code{data.name}}{Name of the object of class \code{mappoly.data} containing the raw data.} +#' \item{\code{n.mrk}}{Number of markers in the sequence.} +#' \item{\code{seq.num}}{A \code{vector} containing the (ordered) indices of markers in the sequence, according to the input file.} +#' \item{\code{pairwise}}{A list of size \code{choose(length(input.seq$seq.num), 2)}, where each element is a matrix. The rows are named in the format x-y, where x and y indicate how many homologues share the same allelic variant in parents P and Q, respectively (see Mollinari and Garcia, 2019 for notation). The first column indicates the LOD Score for the most likely linkage phase configuration. The second column shows the estimated recombination fraction for each configuration, and the third column indicates the LOD Score for comparing the likelihood under no linkage (r = 0.5) with the estimated recombination fraction (evidence of linkage).} +#' \item{\code{chisq.pval.thres}}{Threshold used to perform the segregation tests.} +#' \item{\code{chisq.pval}}{P-values associated with the performed segregation tests.} +#' } #' #' @examples #' ## Tetraploid example (first 50 markers) @@ -260,10 +250,7 @@ est_pairwise_rf <- function(input.seq, count.cache = NULL, } #' Wrapper function to discrete-based pairwise two-point estimation in C++ -#' -#' @param void internal function to be documented #' @keywords internal -#' @export paralell_pairwise_discrete <- function(mrk.pairs, input.seq, geno, @@ -287,10 +274,7 @@ paralell_pairwise_discrete <- function(mrk.pairs, } #' Wrapper function to probability-based pairwise two-point estimation in C++ -#' -#' @param void internal function to be documented #' @keywords internal -#' @export paralell_pairwise_probability <- function(mrk.pairs, input.seq, geno, @@ -315,8 +299,6 @@ paralell_pairwise_probability <- function(mrk.pairs, } #' Format results from pairwise two-point estimation in C++ -#' -#' @param void internal function to be documented #' @keywords internal format_rf <- function(res) { x <- res @@ -492,11 +474,8 @@ est_pairwise_rf2 <- function(input.seq, } #' Wrapper function to discrete-based pairwise two-point estimation in C++ -#' -#' @param void internal function to be documented #' @keywords internal #' @importFrom RcppParallel RcppParallelLibs -#' @export paralell_pairwise_discrete_rcpp <- function(mrk.pairs, m, geno, diff --git a/R/plot_map_list.R b/R/plot_map_list.R index 30edc9d7..61ebc7b7 100644 --- a/R/plot_map_list.R +++ b/R/plot_map_list.R @@ -119,8 +119,6 @@ extract_map <- function(input.map, phase.config = "best") } #' plot a single linkage group with no phase -#' -#' @param void internal function to be documented #' @keywords internal plot_one_map <- function(x, i = 0, horiz = FALSE, col = "lightgray") { diff --git a/R/prior_dist_hmm.R b/R/prior_dist_hmm.R index fa760a96..3d23aa9b 100755 --- a/R/prior_dist_hmm.R +++ b/R/prior_dist_hmm.R @@ -1,7 +1,6 @@ #' Estimate genetic map using as input the probability distribution of #' genotypes (wrapper function to C++) #' -#' @param void internal function to be documented #' @keywords internal #' poly_hmm_est <- diff --git a/R/read_mappoly_csv.R b/R/read_mappoly_csv.R index 3c0932dd..cc05945f 100644 --- a/R/read_mappoly_csv.R +++ b/R/read_mappoly_csv.R @@ -91,10 +91,7 @@ read_geno_csv <- function(file.in, ploidy, filter.non.conforming = TRUE, elim.re } #' Conversion of data.frame to mappoly.data -#' -#' @param void internal function to be documented #' @keywords internal -#' @export table_to_mappoly <- function(dat, ploidy, filter.non.conforming = TRUE, elim.redundant = TRUE, verbose = TRUE){ ## Removing markers with missing data points for parents dat = dat[which(!is.na(dat[,2,drop = TRUE]) & !is.na(dat[,3,drop = TRUE])),] diff --git a/R/reest_map_error.R b/R/reest_map_error.R index 0a4d7c27..71c53c29 100755 --- a/R/reest_map_error.R +++ b/R/reest_map_error.R @@ -3,8 +3,6 @@ #' If \code{restricted = TRUE}, it restricts the prior to the #' possible classes under Mendelian, non double-reduced segregation #' given dosage of the parents -#' -#' @param void internal function to be documented #' @keywords internal genotyping_global_error <- function(x, ploidy, restricted = TRUE, error = 0.01, th.prob = 0.95) { diff --git a/R/rf_list_to_matrix.R b/R/rf_list_to_matrix.R index 3172b023..ade189e1 100755 --- a/R/rf_list_to_matrix.R +++ b/R/rf_list_to_matrix.R @@ -294,8 +294,6 @@ plot.mappoly.rf.matrix <- function(x, type = c("rf", "lod"), ord = NULL, rem = N #' Select rf and lod based on thresholds -#' -#' @param void internal function to be documented #' @keywords internal select_rf <- function(x, thresh.LOD.ph, thresh.LOD.rf, thresh.rf, shared.alleles = FALSE) { diff --git a/R/simulation_utils.R b/R/simulation_utils.R index 3669b85a..5c9cf84d 100755 --- a/R/simulation_utils.R +++ b/R/simulation_utils.R @@ -4,8 +4,6 @@ #' under random chromosome segregation #' with one informative parent. This function is not to be #' directly called by the user -#' -#' @param void internal function to be documented #' @keywords internal sim_cross_one_informative_parent <- function(ploidy, n.mrk, @@ -81,8 +79,6 @@ sim_cross_one_informative_parent <- function(ploidy, } #' Simulate mapping population (tow parents) -#' -#' @param void internal function to be documented #' @keywords internal sim_cross_two_informative_parents <- function(ploidy, n.mrk, @@ -114,8 +110,6 @@ sim_cross_two_informative_parents <- function(ploidy, #' This function draws the parental map (including the linkage #' phase configuration) in a pdf output. This function is not to #' be directly called by the user -#' -#' @param void internal function to be documented #' @importFrom grDevices pdf dev.off #' @keywords internal draw_cross <- function(ploidy, diff --git a/R/single_map_hmm.R b/R/single_map_hmm.R index 99069564..8d7649ce 100755 --- a/R/single_map_hmm.R +++ b/R/single_map_hmm.R @@ -1,35 +1,6 @@ #' Multipoint analysis using Hidden Markov Models (single phase) -#' -#' @param void internal function to be documented #' @keywords internal -#' @examples -#' \donttest{ -#' seq.all.mrk <- make_seq_mappoly(hexafake, 1:20) -#' id <- get_genomic_order(seq.all.mrk) -#' s.go <- make_seq_mappoly(id) -#' ## Using the 5 contiguous markers -#' seq5 <- make_seq_mappoly(hexafake, s.go$seq.mrk.names[6:10]) -#' twopt <- est_pairwise_rf(seq5) -#' l5 <- ls_linkage_phases(input.seq = seq5, thres = 2, twopt = twopt) -#' plot(l5) -#' -#' ## Evaluating 2 linkage phase configurations using HMM -#' maps1 <- vector("list", length(l5$config.to.test)) -#' for(i in 1:length(maps1)) -#' maps1[[i]] <- est_rf_hmm_single_phase(seq5, l5$config.to.test[[i]], -#' tol = 10e-3, -#' high.prec = FALSE) -#' (best <- which.max(sapply(maps1, function(x) x$loglike))) -#' dist1 <- round(cumsum(c(0, imf_h(maps1[[best]]$seq.rf))),2) -#' -#' ## Same thing using automatic search -#' maps2 <- est_rf_hmm(input.seq = seq5, twopt = twopt, thres = 2, -#' verbose = TRUE, tol = 10e-3, high.prec = FALSE) -#' plot(maps2) -#' dist1 -#' } #' @author Marcelo Mollinari, \email{mmollin@ncsu.edu} -#' @export est_rf_hmm_single_phase est_rf_hmm_single_phase <- function(input.seq, input.ph.single, rf.temp = NULL, diff --git a/R/single_paprent_single_phase_hmm.R b/R/single_paprent_single_phase_hmm.R index 801cfc50..a79fa2e2 100644 --- a/R/single_paprent_single_phase_hmm.R +++ b/R/single_paprent_single_phase_hmm.R @@ -1,10 +1,5 @@ #' Multilocus analysis using Hidden Markov Models (single parent, single phase) -#' -#' @param void internal function to be documented #' @keywords internal -#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu} -#' @export est_rf_hmm_single_phase_single_parent -#' est_rf_hmm_single_phase_single_parent <- function(input.seq, input.ph.single, info.parent = 1, diff --git a/R/utils.R b/R/utils.R index b4369c1b..4f52af82 100755 --- a/R/utils.R +++ b/R/utils.R @@ -14,10 +14,7 @@ get_LOD <- function(x, sorted = TRUE) { } #' Get recombination fraction from a matrix -#' -#' @param void internal function to be documented #' @keywords internal -#' @export get_rf_from_mat <- function(M){ r <- numeric(nrow(M)-1) for(i in 1:(nrow(M)-1)){ @@ -27,20 +24,33 @@ get_rf_from_mat <- function(M){ } -#' Is it a probability dataset? +#' Check if Object is a Probability Dataset in MAPpoly +#' +#' Determines whether the specified object is a probability dataset +#' by checking for the existence of the 'geno' component within a +#' `"mappoly.data"` object. +#' +#' @param x An object of class `"mappoly.data"` +#' +#' @return A logical value: `TRUE` if the 'geno' component exists within `x`, +#' indicating it is a valid probability dataset for genetic analysis; `FALSE` +#' otherwise. #' -#' @param void internal function to be documented #' @keywords internal #' @export is.prob.data <- function(x){ + # Verify if 'x' is indeed an object of class 'mappoly.data' + if (!inherits(x, "mappoly.data")) { + stop("Input is not an object of class 'mappoly.data'") + } + + # Check for the existence of 'geno' within the 'mappoly.data' object exists('geno', where = x) } + #' Get the number of bivalent configurations -#' -#' @param void internal function to be documented #' @keywords internal -#' @export get_w_m <- function(ploidy){ if(ploidy%%2 != 0) stop("ploidy level should be an even number") if(ploidy <= 0) stop("ploidy level should be greater than zero") @@ -141,8 +151,6 @@ export_data_to_polymapR <- function(data.in) } #' Msg function -#' -#' @param void internal function to be documented #' @keywords internal #' @importFrom cli rule msg <- function(text, line = 1){ @@ -165,51 +173,54 @@ text_col <- function(x) { if (isTRUE(theme$dark)) crayon::white(x) else crayon::black(x) } -#' Map functions +#' Genetic Mapping Functions #' -#' @param void internal function to be documented -#' @keywords internal -#' @export -mf_k <- function(d) 0.5 * tanh(d/50) -#' -#' Map functions -#' -#' @param void internal function to be documented -#' @keywords internal -#' @export -mf_h <- function(d) 0.5 * (1 - exp(-d/50)) -#' Map functions +#' These functions facilitate the conversion between recombination fractions (r) and genetic distances (d) +#' using various mapping models. The functions starting with `mf_` convert recombination fractions to genetic distances, +#' while those starting with `imf_` convert genetic distances back into recombination fractions. #' -#' @param void internal function to be documented -#' @keywords internal -#' @export -mf_m <- function(d) sapply(d, function(a) min(a/100, 0.5)) -#' Map functions -#' -#' @param void internal function to be documented -#' @keywords internal +#' @name genetic-mapping-functions +#' @aliases mf_k mf_h mf_m imf_k imf_h imf_m +#' @usage mf_k(d) +#' @usage mf_h(d) +#' @usage mf_m(d) +#' @usage imf_k(r) +#' @usage imf_h(r) +#' @usage imf_m(r) +#' @param d Numeric or numeric vector, representing genetic distances in centiMorgans (cM) for direct functions (mf_k, mf_h, mf_m). +#' @param r Numeric or numeric vector, representing recombination fractions for inverse functions (imf_k, imf_h, imf_m). +#' @details +#' The `mf_` prefixed functions apply different models to convert recombination fractions into genetic distances: +#' \itemize{ +#' \item \code{mf_k}: Kosambi mapping function. +#' \item \code{mf_h}: Haldane mapping function. +#' \item \code{mf_m}: Morgan mapping function. +#'} +#' The `imf_` prefixed functions convert genetic distances back into recombination fractions: +#' \itemize{ +#' \item \code{imf_k}: Inverse Kosambi mapping function. +#' \item \code{imf_h}: Inverse Haldane mapping function. +#' \item \code{imf_m}: Inverse Morgan mapping function. +#'} +#' @references +#' Kosambi, D.D. (1944). The estimation of map distances from recombination values. Ann Eugen., 12, 172-175. +#' Haldane, J.B.S. (1919). The combination of linkage values, and the calculation of distances between the loci of linked factors. J Genet, 8, 299-309. +#' Morgan, T.H. (1911). Random segregation versus coupling in Mendelian inheritance. Science, 34(873), 384. +#' @keywords genetics #' @export +mf_k <- function(d) 0.5 * tanh(d / 50) +mf_h <- function(d) 0.5 * (1 - exp(-d / 50)) +mf_m <- function(d) sapply(d, function(a) min(a / 100, 0.5)) imf_k <- function(r) { r[r >= 0.5] <- 0.5 - 1e-14 50 * atanh(2 * r) } -#' Map functions -#' -#' @param void internal function to be documented -#' @keywords internal -#' @export imf_h <- function(r) { r[r >= 0.5] <- 0.5 - 1e-14 -50 * log(1 - 2 * r) } -#' Map functions -#' -#' @param void internal function to be documented -#' @keywords internal -#' @export imf_m <- function(r) sapply(r, function(a) min(a * 100, 50)) - #' Compare two polyploid haplotypes stored in list format #' #' @param ploidy ploidy level @@ -221,6 +232,7 @@ imf_m <- function(r) sapply(r, function(a) min(a * 100, 50)) #' that homolog. #' @keywords internal #' @export compare_haplotypes +#' compare_haplotypes <- function(ploidy, h1, h2) { I1 <- matrix(0, ploidy, length(h1)) I2 <- matrix(0, ploidy, length(h2)) @@ -279,11 +291,30 @@ plot_GIC <- function(hprobs, P = "P1", Q = "P2"){ return(invisible(DF)) } -#' Plot two overlapped haplotypes +#' Plot Two Overlapped Haplotypes +#' +#' This function plots two sets of haplotypes for comparison, allowing for visual +#' inspection of homologous allele patterns across two groups or conditions. +#' It is designed to handle and display genetic data for organisms with varying ploidy levels. +#' +#' @param ploidy Integer, specifying the ploidy level of the organism being represented. +#' @param hom.allele.p1 A list where each element represents the alleles for a marker in the first haplotype group, for 'p' parent. +#' @param hom.allele.q1 A list where each element represents the alleles for a marker in the first haplotype group, for 'q' parent. +#' @param hom.allele.p2 Optionally, a list where each element represents the alleles for a marker in the second haplotype group, for 'p' parent. +#' @param hom.allele.q2 Optionally, a list where each element represents the alleles for a marker in the second haplotype group, for 'q' parent. +#' +#' @details The function creates a graphical representation of haplotypes, where each marker's alleles are plotted +#' along a line for each parent ('p' and 'q'). If provided, the second set of haplotypes (for comparison) are overlaid +#' on the same plot. This allows for direct visual comparison of allele presence or absence across the two sets. +#' Different colors are used to distinguish between the first and second sets of haplotypes. +#' +#' The function uses several internal helper functions (`ph_list_to_matrix` and `ph_matrix_to_list`) to manipulate +#' haplotype data. These functions should correctly handle the conversion between list and matrix representations +#' of haplotype data. #' -#' @param void internal function to be documented +#' @return Invisible. The function primarily generates a plot for visual analysis and does not return any data. +#' @export #' @keywords internal -#' @export plot_compare_haplotypes plot_compare_haplotypes <- function(ploidy, hom.allele.p1, hom.allele.q1, hom.allele.p2 = NULL, hom.allele.q2 = NULL) { nmmrk <- names(hom.allele.p1) oldpar <- par(mar = c(5.1, 4.1, 4.1, 2.1)) @@ -336,10 +367,7 @@ plot_compare_haplotypes <- function(ploidy, hom.allele.p1, hom.allele.q1, hom.al #' Check if it is possible to estimate the recombination #' fraction between neighbor markers using two-point #' estimation -#' -#' @param void internal function to be documented #' @keywords internal -#' @export check_if_rf_is_possible <- function(input.seq){ dp <- abs(abs(input.seq$seq.dose.p1-(input.seq$ploidy/2))-(input.seq$ploidy/2)) dq <- abs(abs(input.seq$seq.dose.p2-(input.seq$ploidy/2))-(input.seq$ploidy/2)) @@ -354,10 +382,7 @@ check_if_rf_is_possible <- function(input.seq){ } #' N! combination -#' -#' @param void internal function to be documented #' @keywords internal -#' @export perm_tot <- function(v) { n <- length(v) result <- v @@ -377,10 +402,7 @@ perm_tot <- function(v) { } #' N!/2 combination -#' -#' @param void internal function to be documented #' @keywords internal -#' @export perm_pars <- function(v) { n <- length(v) result <- v @@ -409,10 +431,7 @@ perm_pars <- function(v) { } #' Color pallet ggplot-like -#' -#' @param void internal function to be documented #' @keywords internal -#' @export #' @importFrom grDevices hcl col2rgb hsv rgb2hsv gg_color_hue <- function(n) { x <- rgb2hsv(col2rgb("steelblue"))[, 1] @@ -422,25 +441,40 @@ gg_color_hue <- function(n) { return(hsv(cols, x[2], x[3])) } -#' MAPpoly pallet 1 +#' MAPpoly Color Palettes #' -#' @param void internal function to be documented -#' @keywords internal -#' @export -mp_pallet1 <- colorRampPalette(c("#ffe119", "#f58231","#e6194b","#808000","#9a6324", "#800000")) - -#' MAPpoly pallet 2 +#' Provides a set of color palettes designed for use with MAPpoly, +#' a package for genetic mapping in polyploids. These palettes are +#' intended to enhance the visual representation of genetic data. #' -#' @param void internal function to be documented -#' @keywords internal -#' @export -mp_pallet2 <- colorRampPalette(c("#911eb4", "#000075","#4363d8","#42d4f4","#469990", "#3cb44b")) - -#' MAPpoly pallet 3 +#' The available palettes are: +#' \describe{ +#' \item{\code{mp_pallet1}}{A palette with warm colors ranging from yellow to dark red and brown.} +#' \item{\code{mp_pallet2}}{A palette with cool colors, including purples, blues, and green.} +#' \item{\code{mp_pallet3}}{A comprehensive palette that combines colors from both \code{mp_pallet1} and \code{mp_pallet2}, offering a broad range of colors.} +#'} +#' +#' Each palette function returns a function that can generate color vectors of variable length, suitable for mapping or plotting functions in R. #' -#' @param void internal function to be documented +#' @name mappoly-color-palettes +#' @aliases mp_pallet1 mp_pallet2 mp_pallet3 #' @keywords internal -#' @export +#' @export mp_pallet1 mp_pallet2 mp_pallet3 +#' @examples +#' # Generate a palette of 5 colors from mp_pallet1 +#' pal1 <- mp_pallet1(5) +#' plot(1:5, pch=19, col=pal1) +#' +#' # Generate a palette of 10 colors from mp_pallet2 +#' pal2 <- mp_pallet2(10) +#' plot(1:10, pch=19, col=pal2) +#' +#' # Generate a palette of 15 colors from mp_pallet3 +#' pal3 <- mp_pallet3(15) +#' plot(1:15, pch=19, col=pal3) + +mp_pallet1 <- colorRampPalette(c("#ffe119", "#f58231","#e6194b","#808000","#9a6324", "#800000")) +mp_pallet2 <- colorRampPalette(c("#911eb4", "#000075","#4363d8","#42d4f4","#469990", "#3cb44b")) mp_pallet3 <- colorRampPalette(c("#ffe119", "#f58231","#e6194b","#808000","#9a6324", "#800000","#911eb4", "#000075","#4363d8","#42d4f4","#469990", "#3cb44b")) #' Update missing information @@ -475,8 +509,6 @@ update_missing <- function(input.data, prob.thres = 0.95){ } #' Chi-square test -#' -#' @param void internal function to be documented #' @keywords internal mrk_chisq_test <- function(x, ploidy){ y <- x[-c(1:(ploidy+1))] @@ -591,8 +623,6 @@ check_data_sanity <- function(x){ } #' Checks the consistency of dataset (dosage) -#' -#' @param void internal function to be documented #' @keywords internal check_data_dose_sanity <- function(x){ test <- logical(24L) @@ -644,8 +674,6 @@ check_data_dose_sanity <- function(x){ } #' Checks the consistency of dataset (probability distribution) -#' -#' @param void internal function to be documented #' @keywords internal check_data_dist_sanity <- function(x){ test <- logical(29L) @@ -1051,7 +1079,7 @@ update_map = function(input.maps, verbose = TRUE){ } #' Random sampling of dataset -#' @param x an object of class \code{mappoly.data} +#' @param input.data an object of class \code{mappoly.data} #' @param n number of individuals or markers to be sampled #' @param percentage if \code{n == NULL}, the percentage of individuals or markers to be sampled #' @param type should sample individuals or markers? @@ -1143,10 +1171,7 @@ sample_data <- function(input.data, n = NULL, #' Get weighted ordinary least squared map give a sequence and rf matrix -#' -#' @param void internal function to be documented #' @keywords internal -#' @export #' @importFrom zoo na.approx get_ols_map <- function(input.seq, input.mat, weight = TRUE){ id <- input.seq$seq.mrk.names @@ -1175,9 +1200,22 @@ get_ols_map <- function(input.seq, input.mat, weight = TRUE){ d } -#' Get dosage type in a sequence +#' Get Dosage Type in a Sequence #' -#' @param void internal function to be documented +#' Analyzes a genomic sequence object to categorize markers based on their dosage type. +#' The function calculates the dosage type by comparing the dosage of two parental sequences +#' (p1 and p2) against the ploidy level. It categorizes markers into simplex for parent 1 (simplex.p), +#' simplex for parent 2 (simplex.q), double simplex (ds), and multiplex based on the calculated dosages. +#' +#' @param input.seq An object of class \code{"mappoly.sequence"}: +#' +#' @return A list with four components categorizing marker names into: +#' \describe{ +#' \item{simplex.p}{Markers with a simplex dosage from parent 1.} +#' \item{simplex.q}{Markers with a simplex dosage from parent 2.} +#' \item{double.simplex}{Markers with a double simplex dosage.} +#' \item{multiplex}{Markers not fitting into the above categories, indicating a multiplex dosage.} +#' } #' @keywords internal #' @export get_dosage_type <- function(input.seq){ @@ -1193,10 +1231,7 @@ get_dosage_type <- function(input.seq){ } #' Aggregate matrix cells (lower the resolution by a factor) -#' -#' @param void internal function to be documented #' @keywords internal -#' @export aggregate_matrix <- function(M, fact){ id <- seq(1,ncol(M), by = fact) id <- cbind(id, c(id[-1]-1, ncol(M))) @@ -1210,10 +1245,7 @@ aggregate_matrix <- function(M, fact){ } #' Get states and emission in one informative parent -#' -#' @param void internal function to be documented #' @keywords internal -#' @export get_states_and_emission_single_parent <- function(ploidy, ph, global.err, D, dose.notinf.P){ n.mrk <- nrow(D) n.ind <- ncol(D) @@ -1262,10 +1294,7 @@ get_states_and_emission_single_parent <- function(ploidy, ph, global.err, D, dos #' Conversion: vector to matrix -#' -#' @param void internal function to be documented #' @keywords internal -#' @export v_2_m <- function(x, n){ y <- base::matrix(NA, n, n) y[base::lower.tri(y)] <- as.numeric(x) @@ -1337,7 +1366,7 @@ detect_info_par<-function(x){ # .Call("rec_number", as.integer(m), as.numeric(rf), PACKAGE = "mappoly") # -#' @export + .mappoly_data_skeleton<-function(ploidy =NULL, n.ind =NULL, n.mrk =NULL, @@ -1378,7 +1407,7 @@ detect_info_par<-function(x){ elim.correspondence = elim.correspondence), class = "mappoly.data") -#' @export + .mappoly_map_skeleton<-function(ploidy=NULL, n.mrk=NULL, seq.num=NULL, @@ -1416,10 +1445,7 @@ detect_info_par<-function(x){ #' Split map into sub maps given a gap threshold -#' -#' @param void internal function to be documented #' @keywords internal -#' @export split_mappoly <- function(input.map, gap.threshold = 5, size.rem.cluster = 1, diff --git a/man/add_mrk_at_tail_ph_list.Rd b/man/add_mrk_at_tail_ph_list.Rd index 7214d983..7ab19eb2 100644 --- a/man/add_mrk_at_tail_ph_list.Rd +++ b/man/add_mrk_at_tail_ph_list.Rd @@ -6,9 +6,6 @@ \usage{ add_mrk_at_tail_ph_list(ph.list.1, ph.list.2, cor.index) } -\arguments{ -\item{void}{internal function} -} \description{ add a single marker at the tail of a linkage phase list } diff --git a/man/aggregate_matrix.Rd b/man/aggregate_matrix.Rd index fb597d50..be945216 100644 --- a/man/aggregate_matrix.Rd +++ b/man/aggregate_matrix.Rd @@ -6,9 +6,6 @@ \usage{ aggregate_matrix(M, fact) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Aggregate matrix cells (lower the resolution by a factor) } diff --git a/man/calc_genoprob_haplo.Rd b/man/calc_genoprob_haplo.Rd index eb6e9e0d..fa3e5198 100644 --- a/man/calc_genoprob_haplo.Rd +++ b/man/calc_genoprob_haplo.Rd @@ -17,9 +17,6 @@ calc_genoprob_haplo( highprec = FALSE ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Compute conditional probabilities of the genotypes given a sequence of block markers diff --git a/man/cat_phase.Rd b/man/cat_phase.Rd index 74d47ef9..4f9fdd63 100644 --- a/man/cat_phase.Rd +++ b/man/cat_phase.Rd @@ -14,9 +14,6 @@ cat_phase( hmm.phase.number ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ cat for phase information } diff --git a/man/check_data_dist_sanity.Rd b/man/check_data_dist_sanity.Rd index 9901db86..641cb643 100644 --- a/man/check_data_dist_sanity.Rd +++ b/man/check_data_dist_sanity.Rd @@ -6,9 +6,6 @@ \usage{ check_data_dist_sanity(x) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Checks the consistency of dataset (probability distribution) } diff --git a/man/check_data_dose_sanity.Rd b/man/check_data_dose_sanity.Rd index df3ff206..0d1b96a3 100644 --- a/man/check_data_dose_sanity.Rd +++ b/man/check_data_dose_sanity.Rd @@ -6,9 +6,6 @@ \usage{ check_data_dose_sanity(x) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Checks the consistency of dataset (dosage) } diff --git a/man/check_if_rf_is_possible.Rd b/man/check_if_rf_is_possible.Rd index 5c3e7613..46f64998 100644 --- a/man/check_if_rf_is_possible.Rd +++ b/man/check_if_rf_is_possible.Rd @@ -8,9 +8,6 @@ estimation} \usage{ check_if_rf_is_possible(input.seq) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Check if it is possible to estimate the recombination fraction between neighbor markers using two-point diff --git a/man/check_ls_phase.Rd b/man/check_ls_phase.Rd index 0b9b9fc5..56c5f4bd 100644 --- a/man/check_ls_phase.Rd +++ b/man/check_ls_phase.Rd @@ -7,9 +7,6 @@ markers for which they are different.} \usage{ check_ls_phase(ph) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Compare a list of linkage phases and return the markers for which they are different. diff --git a/man/concatenate_ph_list.Rd b/man/concatenate_ph_list.Rd index ed38d21a..40d9eb1b 100644 --- a/man/concatenate_ph_list.Rd +++ b/man/concatenate_ph_list.Rd @@ -6,9 +6,6 @@ \usage{ concatenate_ph_list(ph.list.1, ph.list.2) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ concatenate two linkage phase lists } diff --git a/man/create_map.Rd b/man/create_map.Rd index 853a3a25..b5976b82 100644 --- a/man/create_map.Rd +++ b/man/create_map.Rd @@ -6,9 +6,6 @@ \usage{ create_map(input.map, step = 0, phase.config = "best") } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Create a map with pseudomarkers at a given step } diff --git a/man/draw_cross.Rd b/man/draw_cross.Rd index ad176a52..ced3c7ed 100644 --- a/man/draw_cross.Rd +++ b/man/draw_cross.Rd @@ -14,9 +14,6 @@ draw_cross( height = 6 ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ This function draws the parental map (including the linkage phase configuration) in a pdf output. This function is not to diff --git a/man/est_haplo_hmm.Rd b/man/est_haplo_hmm.Rd index 3091a4f2..d6642a42 100644 --- a/man/est_haplo_hmm.Rd +++ b/man/est_haplo_hmm.Rd @@ -17,9 +17,6 @@ est_haplo_hmm( tol = 0.001 ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Estimate a genetic map given a sequence of block markers } diff --git a/man/est_map_haplo_given_genoprob.Rd b/man/est_map_haplo_given_genoprob.Rd index 35c6453b..ac5f5b85 100644 --- a/man/est_map_haplo_given_genoprob.Rd +++ b/man/est_map_haplo_given_genoprob.Rd @@ -7,9 +7,6 @@ given the conditional probabilities of the genotypes} \usage{ est_map_haplo_given_genoprob(map.list, genoprob.list, tol = 1e-04) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Estimate a genetic map given a sequence of block markers given the conditional probabilities of the genotypes diff --git a/man/est_pairwise_rf.Rd b/man/est_pairwise_rf.Rd index 94c90024..1388779c 100644 --- a/man/est_pairwise_rf.Rd +++ b/man/est_pairwise_rf.Rd @@ -57,25 +57,15 @@ be either PSOCK (default) or FORK.} (for internal use)} } \value{ -An object of class \code{mappoly.twopt} which - is a list containing the following components: - \item{data.name}{name of the object of class - \code{mappoly.data} with the raw data} - \item{n.mrk}{number of markers in the sequence} - \item{seq.num}{a \code{vector} containing the (ordered) indices - of markers in the sequence, according to the input file} - \item{pairwise}{a list of size - \code{choose(length(input.seq$seq.num), 2)}, each of them containing a - matrix where the name of the rows have the form x-y, where x and y indicate - how many homologues share the same allelic variant in parents P and Q, - respectively (see Mollinari and Garcia, 2019 for notation). The first - column indicates the LOD Score in relation to the most likely linkage - phase configuration. The second column shows the estimated recombination - fraction for each configuration, and the third indicates the LOD Score - comparing the likelihood under no linkage (r = 0.5) with the estimated - recombination fraction (evidence of linkage).} - \code{chisq.pval.thres}{threshold used to perform the segregation tests} - \code{chisq.pval}{p-values associated with the performed segregation tests} +An object of class \code{mappoly.twopt} which is a list containing the following components: +\describe{ + \item{\code{data.name}}{Name of the object of class \code{mappoly.data} containing the raw data.} + \item{\code{n.mrk}}{Number of markers in the sequence.} + \item{\code{seq.num}}{A \code{vector} containing the (ordered) indices of markers in the sequence, according to the input file.} + \item{\code{pairwise}}{A list of size \code{choose(length(input.seq$seq.num), 2)}, where each element is a matrix. The rows are named in the format x-y, where x and y indicate how many homologues share the same allelic variant in parents P and Q, respectively (see Mollinari and Garcia, 2019 for notation). The first column indicates the LOD Score for the most likely linkage phase configuration. The second column shows the estimated recombination fraction for each configuration, and the third column indicates the LOD Score for comparing the likelihood under no linkage (r = 0.5) with the estimated recombination fraction (evidence of linkage).} + \item{\code{chisq.pval.thres}}{Threshold used to perform the segregation tests.} + \item{\code{chisq.pval}}{P-values associated with the performed segregation tests.} +} } \description{ Performs the two-point pairwise analysis between all markers in a sequence. diff --git a/man/est_rf_hmm_single_phase.Rd b/man/est_rf_hmm_single_phase.Rd index 9e5e4adc..f40571c8 100644 --- a/man/est_rf_hmm_single_phase.Rd +++ b/man/est_rf_hmm_single_phase.Rd @@ -15,39 +15,9 @@ est_rf_hmm_single_phase( max.rf.to.break.EM = 0.5 ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Multipoint analysis using Hidden Markov Models (single phase) } -\examples{ - \donttest{ - seq.all.mrk <- make_seq_mappoly(hexafake, 1:20) - id <- get_genomic_order(seq.all.mrk) - s.go <- make_seq_mappoly(id) - ## Using the 5 contiguous markers - seq5 <- make_seq_mappoly(hexafake, s.go$seq.mrk.names[6:10]) - twopt <- est_pairwise_rf(seq5) - l5 <- ls_linkage_phases(input.seq = seq5, thres = 2, twopt = twopt) - plot(l5) - - ## Evaluating 2 linkage phase configurations using HMM - maps1 <- vector("list", length(l5$config.to.test)) - for(i in 1:length(maps1)) - maps1[[i]] <- est_rf_hmm_single_phase(seq5, l5$config.to.test[[i]], - tol = 10e-3, - high.prec = FALSE) - (best <- which.max(sapply(maps1, function(x) x$loglike))) - dist1 <- round(cumsum(c(0, imf_h(maps1[[best]]$seq.rf))),2) - - ## Same thing using automatic search - maps2 <- est_rf_hmm(input.seq = seq5, twopt = twopt, thres = 2, - verbose = TRUE, tol = 10e-3, high.prec = FALSE) - plot(maps2) - dist1 - } -} \author{ Marcelo Mollinari, \email{mmollin@ncsu.edu} } diff --git a/man/est_rf_hmm_single_phase_single_parent.Rd b/man/est_rf_hmm_single_phase_single_parent.Rd index 9ca73e6e..40a02c1a 100644 --- a/man/est_rf_hmm_single_phase_single_parent.Rd +++ b/man/est_rf_hmm_single_phase_single_parent.Rd @@ -16,13 +16,7 @@ est_rf_hmm_single_phase_single_parent( ret.map.no.rf.estimation = FALSE ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Multilocus analysis using Hidden Markov Models (single parent, single phase) } -\author{ -Marcelo Mollinari, \email{mmollin@ncsu.edu} -} \keyword{internal} diff --git a/man/filter_map_at_hmm_thres.Rd b/man/filter_map_at_hmm_thres.Rd index b2f8c42d..1a30aa3b 100644 --- a/man/filter_map_at_hmm_thres.Rd +++ b/man/filter_map_at_hmm_thres.Rd @@ -2,14 +2,22 @@ % Please edit documentation in R/est_map_hmm.R \name{filter_map_at_hmm_thres} \alias{filter_map_at_hmm_thres} -\title{remove maps under a certain threshold} +\title{Filter MAPpoly Map Configurations by Loglikelihood Threshold} \usage{ filter_map_at_hmm_thres(map, thres.hmm) } \arguments{ -\item{void}{internal function to be documented} +\item{map}{An object of class `"mappoly.map"`, which may contain several maps +with different linkage phase configurations and their respective log-likelihoods.} + +\item{thres.hmm}{The threshold for filtering configurations.} +} +\value{ +Returns the modified `"mappoly.map"` object with configurations filtered +based on the log-likelihood threshold. } \description{ -remove maps under a certain threshold +This function filters configurations within a `"mappoly.map"` object based on a specified +log-likelihood threshold. } \keyword{internal} diff --git a/man/filter_missing.Rd b/man/filter_missing.Rd index 1e9bbefb..32282360 100644 --- a/man/filter_missing.Rd +++ b/man/filter_missing.Rd @@ -12,21 +12,23 @@ filter_missing( ) } \arguments{ -\item{input.data}{an object of class \code{mappoly.data}} +\item{input.data}{an object of class \code{mappoly.data}.} \item{type}{one of the following options: -\code{'marker'}{filter out markers based on their percentage of missing data (default)} -\code{'individual'}{filter out individuals based on their percentage of missing data} +\enumerate{ + \item \code{"marker"}: filter out markers based on their percentage of missing data (default). + \item \code{"individual"}: filter out individuals based on their percentage of missing data. +} Please notice that removing individuals with certain amount of data can change some marker parameters (such as depth), and can also change the estimated genotypes for other individuals. -So be careful when removing individuals.} +So, be careful when removing individuals.} -\item{filter.thres}{maximum percentage of missing data (default = 0.2)} +\item{filter.thres}{maximum percentage of missing data (default = 0.2).} -\item{inter}{if \code{TRUE}, expects user-input to proceed with filtering} +\item{inter}{if \code{TRUE}, expects user-input to proceed with filtering.} } \description{ -Excludes markers or individuals based on their proportion of missing data +Excludes markers or individuals based on their proportion of missing data. } \examples{ plot(tetra.solcap) @@ -35,7 +37,8 @@ dat.filt.mrk <- filter_missing(input.data = tetra.solcap, filter.thres = 0.1, inter = TRUE) plot(dat.filt.mrk) + } \author{ -Marcelo Mollinari, \email{mmollin@ncsu.edu} +Marcelo Mollinari, \email{mmollin@ncsu.edu}. } diff --git a/man/filter_non_conforming_classes.Rd b/man/filter_non_conforming_classes.Rd index b586dbb9..d86b33a0 100644 --- a/man/filter_non_conforming_classes.Rd +++ b/man/filter_non_conforming_classes.Rd @@ -6,9 +6,6 @@ \usage{ filter_non_conforming_classes(input.data, prob.thres = NULL) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Filter non-conforming classes in F1, non double reduced population. } diff --git a/man/format_rf.Rd b/man/format_rf.Rd index cc0f4b9d..ec292a37 100644 --- a/man/format_rf.Rd +++ b/man/format_rf.Rd @@ -6,9 +6,6 @@ \usage{ format_rf(res) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Format results from pairwise two-point estimation in C++ } diff --git a/man/genetic-mapping-functions.Rd b/man/genetic-mapping-functions.Rd new file mode 100644 index 00000000..35ff8493 --- /dev/null +++ b/man/genetic-mapping-functions.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{genetic-mapping-functions} +\alias{genetic-mapping-functions} +\alias{mf_k} +\alias{mf_h} +\alias{mf_m} +\alias{imf_k} +\alias{imf_h} +\alias{imf_m} +\title{Genetic Mapping Functions} +\usage{ +mf_k(d) + +mf_h(d) + +mf_m(d) + +imf_k(r) + +imf_h(r) + +imf_m(r) +} +\arguments{ +\item{d}{Numeric or numeric vector, representing genetic distances in centiMorgans (cM) for direct functions (mf_k, mf_h, mf_m).} + +\item{r}{Numeric or numeric vector, representing recombination fractions for inverse functions (imf_k, imf_h, imf_m).} +} +\description{ +These functions facilitate the conversion between recombination fractions (r) and genetic distances (d) +using various mapping models. The functions starting with `mf_` convert recombination fractions to genetic distances, +while those starting with `imf_` convert genetic distances back into recombination fractions. +} +\details{ +The `mf_` prefixed functions apply different models to convert recombination fractions into genetic distances: +\itemize{ + \item \code{mf_k}: Kosambi mapping function. + \item \code{mf_h}: Haldane mapping function. + \item \code{mf_m}: Morgan mapping function. +} +The `imf_` prefixed functions convert genetic distances back into recombination fractions: +\itemize{ + \item \code{imf_k}: Inverse Kosambi mapping function. + \item \code{imf_h}: Inverse Haldane mapping function. + \item \code{imf_m}: Inverse Morgan mapping function. +} +} +\references{ +Kosambi, D.D. (1944). The estimation of map distances from recombination values. Ann Eugen., 12, 172-175. +Haldane, J.B.S. (1919). The combination of linkage values, and the calculation of distances between the loci of linked factors. J Genet, 8, 299-309. +Morgan, T.H. (1911). Random segregation versus coupling in Mendelian inheritance. Science, 34(873), 384. +} +\keyword{genetics} diff --git a/man/genotyping_global_error.Rd b/man/genotyping_global_error.Rd index 79255223..35f65aa0 100644 --- a/man/genotyping_global_error.Rd +++ b/man/genotyping_global_error.Rd @@ -12,9 +12,6 @@ genotyping_global_error( th.prob = 0.95 ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ If \code{restricted = TRUE}, it restricts the prior to the possible classes under Mendelian, non double-reduced segregation diff --git a/man/get_cache_two_pts_from_web.Rd b/man/get_cache_two_pts_from_web.Rd index 5b874e92..86ab1a18 100644 --- a/man/get_cache_two_pts_from_web.Rd +++ b/man/get_cache_two_pts_from_web.Rd @@ -11,9 +11,6 @@ get_cache_two_pts_from_web( verbose = FALSE ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Access a remote server to get Counts for recombinant classes } diff --git a/man/get_counts.Rd b/man/get_counts.Rd index 8e7bfa1b..725fb2ab 100644 --- a/man/get_counts.Rd +++ b/man/get_counts.Rd @@ -15,9 +15,6 @@ get_counts( joint.prob = FALSE ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Counts for recombinant classes } diff --git a/man/get_counts_all_phases.Rd b/man/get_counts_all_phases.Rd index ded43f1d..754aaa23 100644 --- a/man/get_counts_all_phases.Rd +++ b/man/get_counts_all_phases.Rd @@ -12,9 +12,6 @@ get_counts_all_phases( joint.prob = FALSE ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ return the counts of each recombinant class (for two loci) in polyploid cross. The results of this function contains several diff --git a/man/get_counts_two_parents.Rd b/man/get_counts_two_parents.Rd index 4540768b..df1609b4 100644 --- a/man/get_counts_two_parents.Rd +++ b/man/get_counts_two_parents.Rd @@ -15,9 +15,6 @@ get_counts_two_parents( joint.prob = FALSE ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Counts for recombinant classes } diff --git a/man/get_dosage_type.Rd b/man/get_dosage_type.Rd index f6e8dfe2..17bbceaf 100644 --- a/man/get_dosage_type.Rd +++ b/man/get_dosage_type.Rd @@ -2,14 +2,26 @@ % Please edit documentation in R/utils.R \name{get_dosage_type} \alias{get_dosage_type} -\title{Get dosage type in a sequence} +\title{Get Dosage Type in a Sequence} \usage{ get_dosage_type(input.seq) } \arguments{ -\item{void}{internal function to be documented} +\item{input.seq}{An object of class \code{"mappoly.sequence"}:} +} +\value{ +A list with four components categorizing marker names into: +\describe{ + \item{simplex.p}{Markers with a simplex dosage from parent 1.} + \item{simplex.q}{Markers with a simplex dosage from parent 2.} + \item{double.simplex}{Markers with a double simplex dosage.} + \item{multiplex}{Markers not fitting into the above categories, indicating a multiplex dosage.} +} } \description{ -Get dosage type in a sequence +Analyzes a genomic sequence object to categorize markers based on their dosage type. +The function calculates the dosage type by comparing the dosage of two parental sequences +(p1 and p2) against the ploidy level. It categorizes markers into simplex for parent 1 (simplex.p), +simplex for parent 2 (simplex.q), double simplex (ds), and multiplex based on the calculated dosages. } \keyword{internal} diff --git a/man/get_full_info_tail.Rd b/man/get_full_info_tail.Rd index 9dacdd7b..352b266d 100644 --- a/man/get_full_info_tail.Rd +++ b/man/get_full_info_tail.Rd @@ -7,9 +7,6 @@ provide no additional information.} \usage{ get_full_info_tail(input.obj, extend = NULL) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Get the tail of a marker sequence up to the point where the markers provide no additional information. diff --git a/man/get_ols_map.Rd b/man/get_ols_map.Rd index cc083274..8e34e2bf 100644 --- a/man/get_ols_map.Rd +++ b/man/get_ols_map.Rd @@ -6,9 +6,6 @@ \usage{ get_ols_map(input.seq, input.mat, weight = TRUE) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Get weighted ordinary least squared map give a sequence and rf matrix } diff --git a/man/get_ph_list_subset.Rd b/man/get_ph_list_subset.Rd index cc7b746b..e096ef71 100644 --- a/man/get_ph_list_subset.Rd +++ b/man/get_ph_list_subset.Rd @@ -6,9 +6,6 @@ \usage{ get_ph_list_subset(ph.list, seq.num, conf) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ subset of a linkage phase list } diff --git a/man/get_rf_from_mat.Rd b/man/get_rf_from_mat.Rd index 9deac002..d6bbdacd 100644 --- a/man/get_rf_from_mat.Rd +++ b/man/get_rf_from_mat.Rd @@ -6,9 +6,6 @@ \usage{ get_rf_from_mat(M) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Get recombination fraction from a matrix } diff --git a/man/get_states_and_emission_single_parent.Rd b/man/get_states_and_emission_single_parent.Rd index 40fa36d4..7e6ed44d 100644 --- a/man/get_states_and_emission_single_parent.Rd +++ b/man/get_states_and_emission_single_parent.Rd @@ -6,9 +6,6 @@ \usage{ get_states_and_emission_single_parent(ploidy, ph, global.err, D, dose.notinf.P) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Get states and emission in one informative parent } diff --git a/man/get_w_m.Rd b/man/get_w_m.Rd index ed547e01..9ae827ff 100644 --- a/man/get_w_m.Rd +++ b/man/get_w_m.Rd @@ -6,9 +6,6 @@ \usage{ get_w_m(ploidy) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Get the number of bivalent configurations } diff --git a/man/gg_color_hue.Rd b/man/gg_color_hue.Rd index e533dbdd..dc254875 100644 --- a/man/gg_color_hue.Rd +++ b/man/gg_color_hue.Rd @@ -6,9 +6,6 @@ \usage{ gg_color_hue(n) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Color pallet ggplot-like } diff --git a/man/imf_h.Rd b/man/imf_h.Rd deleted file mode 100644 index afdf972a..00000000 --- a/man/imf_h.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{imf_h} -\alias{imf_h} -\title{Map functions} -\usage{ -imf_h(r) -} -\arguments{ -\item{void}{internal function to be documented} -} -\description{ -Map functions -} -\keyword{internal} diff --git a/man/imf_k.Rd b/man/imf_k.Rd deleted file mode 100644 index 587e8aba..00000000 --- a/man/imf_k.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{imf_k} -\alias{imf_k} -\title{Map functions} -\usage{ -imf_k(r) -} -\arguments{ -\item{void}{internal function to be documented} -} -\description{ -Map functions -} -\keyword{internal} diff --git a/man/imf_m.Rd b/man/imf_m.Rd deleted file mode 100644 index 51b75447..00000000 --- a/man/imf_m.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{imf_m} -\alias{imf_m} -\title{Map functions} -\usage{ -imf_m(r) -} -\arguments{ -\item{void}{internal function to be documented} -} -\description{ -Map functions -} -\keyword{internal} diff --git a/man/is.prob.data.Rd b/man/is.prob.data.Rd index 7dff080c..f5eda8e6 100644 --- a/man/is.prob.data.Rd +++ b/man/is.prob.data.Rd @@ -2,14 +2,21 @@ % Please edit documentation in R/utils.R \name{is.prob.data} \alias{is.prob.data} -\title{Is it a probability dataset?} +\title{Check if Object is a Probability Dataset in MAPpoly} \usage{ is.prob.data(x) } \arguments{ -\item{void}{internal function to be documented} +\item{x}{An object of class `"mappoly.data"`} +} +\value{ +A logical value: `TRUE` if the 'geno' component exists within `x`, +indicating it is a valid probability dataset for genetic analysis; `FALSE` +otherwise. } \description{ -Is it a probability dataset? +Determines whether the specified object is a probability dataset +by checking for the existence of the 'geno' component within a +`"mappoly.data"` object. } \keyword{internal} diff --git a/man/make_seq_mappoly.Rd b/man/make_seq_mappoly.Rd index 81c2af35..090bebe2 100644 --- a/man/make_seq_mappoly.Rd +++ b/man/make_seq_mappoly.Rd @@ -4,7 +4,7 @@ \alias{make_seq_mappoly} \alias{print.mappoly.sequence} \alias{plot.mappoly.sequence} -\title{Create a sequence of markers} +\title{Create a Sequence of Markers} \usage{ make_seq_mappoly( input.obj, @@ -19,81 +19,61 @@ make_seq_mappoly( \method{plot}{mappoly.sequence}(x, ...) } \arguments{ -\item{input.obj}{an object of one of the following classes: -\code{mappoly.data}, \code{mappoly.map}, \code{mappoly.sequence}, -\code{mappoly.group}, \code{mappoly.unique.seq}, -\code{mappoly.pcmap}, \code{mappoly.pcmap3d}, \code{mappoly.geno.ord} or -\code{mappoly.edit.order}} +\item{input.obj}{An object belonging to one of the specified classes: \code{mappoly.data}, +\code{mappoly.map}, \code{mappoly.sequence}, \code{mappoly.group}, \code{mappoly.unique.seq}, +\code{mappoly.pcmap}, \code{mappoly.pcmap3d}, \code{mappoly.geno.ord}, or \code{mappoly.edit.order}.} -\item{arg}{can be one of the following objects: i) a string 'all', -resulting in a sequence with all markers in the raw data; ii) a -string or a vector of strings \code{'seqx'}, where \code{x} -is the sequence (\code{x = 0} indicates unassigned markers); iii) a -\code{vector} of integers specifying which markers comprise the -sequence; iv) a \code{vector} of integers representing linkage group if -\code{input.object} has class \code{mappoly.group}; or v) NULL if -\code{input.object} has class \code{mappoly.pcmap}, \code{mappoly.pcmap3d}, -\code{mappoly.unique.seq}, or \code{mappoly.geno.ord}} +\item{arg}{Specifies the markers to include in the sequence, accepting several formats: a string 'all' for all +markers; a string or vector of strings 'seqx' where x is the sequence number (0 for unassigned markers); a +vector of integers indicating specific markers; or a vector of integers representing linkage group numbers if +\code{input.obj} is of class \code{mappoly.group}. For certain classes (\code{mappoly.pcmap}, \code{mappoly.pcmap3d}, +\code{mappoly.unique.seq}, or \code{mappoly.geno.ord}), \code{arg} can be \code{NULL}.} -\item{data.name}{name of the object of class \code{mappoly.data}} +\item{data.name}{Name of the \code{mappoly.data} class object.} -\item{info.parent}{one of the following options: -\code{'all'}{select all dosage combinations in both parents (default)} -\code{'P1'}{select informative markers parent 1} -\code{'P2'}{select informative markers parent 2}} +\item{info.parent}{Selection criteria based on parental information: \code{'all'} for all dosage combinations, +\code{'P1'} for markers informative in parent 1, or \code{'P2'} for markers informative in parent 2. Default +is \code{'all'}.} -\item{genomic.info}{optional argument applied to \code{mappoly.group} objects only. This argument can be \code{NULL}, -or can hold the numeric combination of sequences from genomic information to be used when making the sequences. -When \code{genomic.info = NULL} (default), the function returns a sequence containing all markers defined -by the grouping function. When \code{genomic.info = 1}, the function returns a sequence with markers -that matched the intersection between grouping function and genomic information, considering the sequence -from genomic information that holds the maximum number of markers matching the group; -when \code{genomic.info = c(1,2)}, the function returns a sequence with markers -that matched the intersection between grouping function and genomic information, considering two sequences -from genomic information that presented the maximum number of markers matching the group; and so on.} +\item{genomic.info}{Optional and applicable only to \code{mappoly.group} objects. Specifies the use of genomic +information in sequence creation. With \code{NULL} (default), all markers defined by the grouping function are +included. Numeric values indicate the use of specific sequences from genomic information, aiming to match the +maximum number of markers with the group. Supports single values or vectors for multiple sequence consideration.} -\item{x}{an object of the class \code{mappoly.sequence}} +\item{x}{An object of class \code{mappoly.sequence}.} -\item{...}{currently ignored} +\item{...}{Currently ignored.} } \value{ -An object of class \code{mappoly.sequence}, which is a - list containing the following components: - \item{seq.num}{a \code{vector} containing the (ordered) indices - of markers in the sequence, according to the input file} - \item{seq.phases}{a \code{list} with the linkage phases between - markers in the sequence, in corresponding positions. \code{-1} - means that there are no defined linkage phases} - \item{seq.rf}{a \code{vector} with the recombination - frequencies between markers in the sequence. \code{-1} means - that there are no estimated recombination frequencies} - \item{loglike}{log-likelihood of the corresponding linkage - map} - \item{data.name}{name of the object of class - \code{mappoly.data} with the raw data} - \item{twopt}{name of the object of class \code{mappoly.twopt} - with the 2-point analyses. \code{-1} means that the twopt - estimates were not computed} +Returns an object of class \code{mappoly.sequence}, comprising: +\itemize{ + \item{\code{seq.num}}{Ordered vector of marker indices according to the input.} + \item{\code{seq.phases}}{List of linkage phases between markers; \code{-1} for undefined phases.} + \item{\code{seq.rf}}{Vector of recombination frequencies; \code{-1} for unestimated frequencies.} + \item{\code{loglike}}{Log-likelihood of the linkage map.} + \item{\code{data.name}}{Name of the \code{mappoly.data} object with raw data.} + \item{\code{twopt}}{Name of the \code{mappoly.twopt} object with 2-point analyses; \code{-1} if not computed.} +} } \description{ -Makes a sequence of markers based on an object of another class. +Constructs a sequence of markers based on an object belonging to various specified classes. This +function is versatile, supporting multiple input types and configurations for generating marker sequences. } \examples{ - all.mrk <- make_seq_mappoly(hexafake, 'all') - seq1.mrk <- make_seq_mappoly(hexafake, 'seq1') - plot(seq1.mrk) - some.mrk.pos <- c(1,4,28,32,45) - (some.mrk.1 <- make_seq_mappoly(hexafake, some.mrk.pos)) - plot(some.mrk.1) +all.mrk <- make_seq_mappoly(hexafake, 'all') +seq1.mrk <- make_seq_mappoly(hexafake, 'seq1') +plot(seq1.mrk) +some.mrk.pos <- c(1,4,28,32,45) +some.mrk.1 <- make_seq_mappoly(hexafake, some.mrk.pos) +plot(some.mrk.1) } \references{ -Mollinari, M., and Garcia, A. A. F. (2019) Linkage - analysis and haplotype phasing in experimental autopolyploid - populations with high ploidy level using hidden Markov - models, _G3: Genes, Genomes, Genetics_. - \doi{10.1534/g3.119.400378} +Mollinari, M., and Garcia, A. A. F. (2019). Linkage analysis and haplotype phasing in experimental +autopolyploid populations with high ploidy level using hidden Markov models. _G3: Genes|Genomes|Genetics_, +\doi{10.1534/g3.119.400378}. } \author{ -Marcelo Mollinari, \email{mmollin@ncsu.edu}, with modifications by Gabriel Gesteira, \email{gdesiqu@ncsu.edu} +Marcelo Mollinari \email{mmollin@ncsu.edu}, with modifications by Gabriel Gesteira +\email{gdesiqu@ncsu.edu} } diff --git a/man/mappoly-color-palettes.Rd b/man/mappoly-color-palettes.Rd new file mode 100644 index 00000000..359a21a1 --- /dev/null +++ b/man/mappoly-color-palettes.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{mappoly-color-palettes} +\alias{mappoly-color-palettes} +\alias{mp_pallet1} +\alias{mp_pallet2} +\alias{mp_pallet3} +\title{MAPpoly Color Palettes} +\usage{ +mp_pallet1(n) +} +\description{ +Provides a set of color palettes designed for use with MAPpoly, +a package for genetic mapping in polyploids. These palettes are +intended to enhance the visual representation of genetic data. +} +\details{ +The available palettes are: +\describe{ + \item{\code{mp_pallet1}}{A palette with warm colors ranging from yellow to dark red and brown.} + \item{\code{mp_pallet2}}{A palette with cool colors, including purples, blues, and green.} + \item{\code{mp_pallet3}}{A comprehensive palette that combines colors from both \code{mp_pallet1} and \code{mp_pallet2}, offering a broad range of colors.} +} + +Each palette function returns a function that can generate color vectors of variable length, suitable for mapping or plotting functions in R. +} +\examples{ +# Generate a palette of 5 colors from mp_pallet1 +pal1 <- mp_pallet1(5) +plot(1:5, pch=19, col=pal1) + +# Generate a palette of 10 colors from mp_pallet2 +pal2 <- mp_pallet2(10) +plot(1:10, pch=19, col=pal2) + +# Generate a palette of 15 colors from mp_pallet3 +pal3 <- mp_pallet3(15) +plot(1:15, pch=19, col=pal3) +} +\keyword{internal} diff --git a/man/mf_h.Rd b/man/mf_h.Rd deleted file mode 100644 index a9b53668..00000000 --- a/man/mf_h.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{mf_h} -\alias{mf_h} -\title{Map functions} -\usage{ -mf_h(d) -} -\arguments{ -\item{void}{internal function to be documented} -} -\description{ -Map functions -} -\keyword{internal} diff --git a/man/mf_k.Rd b/man/mf_k.Rd deleted file mode 100644 index 712073bc..00000000 --- a/man/mf_k.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{mf_k} -\alias{mf_k} -\title{Map functions} -\usage{ -mf_k(d) -} -\arguments{ -\item{void}{internal function to be documented} -} -\description{ -Map functions -} -\keyword{internal} diff --git a/man/mf_m.Rd b/man/mf_m.Rd deleted file mode 100644 index 379df078..00000000 --- a/man/mf_m.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{mf_m} -\alias{mf_m} -\title{Map functions} -\usage{ -mf_m(d) -} -\arguments{ -\item{void}{internal function to be documented} -} -\description{ -Map functions -} -\keyword{internal} diff --git a/man/mp_pallet1.Rd b/man/mp_pallet1.Rd deleted file mode 100644 index 06f8c0a1..00000000 --- a/man/mp_pallet1.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{mp_pallet1} -\alias{mp_pallet1} -\title{MAPpoly pallet 1} -\usage{ -mp_pallet1(n) -} -\arguments{ -\item{void}{internal function to be documented} -} -\description{ -MAPpoly pallet 1 -} -\keyword{internal} diff --git a/man/mp_pallet2.Rd b/man/mp_pallet2.Rd deleted file mode 100644 index d9501058..00000000 --- a/man/mp_pallet2.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{mp_pallet2} -\alias{mp_pallet2} -\title{MAPpoly pallet 2} -\usage{ -mp_pallet2(n) -} -\arguments{ -\item{void}{internal function to be documented} -} -\description{ -MAPpoly pallet 2 -} -\keyword{internal} diff --git a/man/mp_pallet3.Rd b/man/mp_pallet3.Rd deleted file mode 100644 index dd169b5c..00000000 --- a/man/mp_pallet3.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{mp_pallet3} -\alias{mp_pallet3} -\title{MAPpoly pallet 3} -\usage{ -mp_pallet3(n) -} -\arguments{ -\item{void}{internal function to be documented} -} -\description{ -MAPpoly pallet 3 -} -\keyword{internal} diff --git a/man/mrk_chisq_test.Rd b/man/mrk_chisq_test.Rd index d3d3e2b9..2aa5f9c3 100644 --- a/man/mrk_chisq_test.Rd +++ b/man/mrk_chisq_test.Rd @@ -6,9 +6,6 @@ \usage{ mrk_chisq_test(x, ploidy) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Chi-square test } diff --git a/man/msg.Rd b/man/msg.Rd index ba5e72fc..48b92203 100644 --- a/man/msg.Rd +++ b/man/msg.Rd @@ -6,9 +6,6 @@ \usage{ msg(text, line = 1) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Msg function } diff --git a/man/paralell_pairwise_discrete.Rd b/man/paralell_pairwise_discrete.Rd index e1c17daa..2159f699 100644 --- a/man/paralell_pairwise_discrete.Rd +++ b/man/paralell_pairwise_discrete.Rd @@ -15,9 +15,6 @@ paralell_pairwise_discrete( ll = ll ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Wrapper function to discrete-based pairwise two-point estimation in C++ } diff --git a/man/paralell_pairwise_discrete_rcpp.Rd b/man/paralell_pairwise_discrete_rcpp.Rd index 48c894f1..06951882 100644 --- a/man/paralell_pairwise_discrete_rcpp.Rd +++ b/man/paralell_pairwise_discrete_rcpp.Rd @@ -19,9 +19,6 @@ paralell_pairwise_discrete_rcpp( tol = .Machine$double.eps^0.25 ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Wrapper function to discrete-based pairwise two-point estimation in C++ } diff --git a/man/paralell_pairwise_probability.Rd b/man/paralell_pairwise_probability.Rd index d01f54aa..c46bb72d 100644 --- a/man/paralell_pairwise_probability.Rd +++ b/man/paralell_pairwise_probability.Rd @@ -15,9 +15,6 @@ paralell_pairwise_probability( ll = ll ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Wrapper function to probability-based pairwise two-point estimation in C++ } diff --git a/man/perm_pars.Rd b/man/perm_pars.Rd index 94a8a31b..99151340 100644 --- a/man/perm_pars.Rd +++ b/man/perm_pars.Rd @@ -6,9 +6,6 @@ \usage{ perm_pars(v) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ N!/2 combination } diff --git a/man/perm_tot.Rd b/man/perm_tot.Rd index f954af09..d86084dc 100644 --- a/man/perm_tot.Rd +++ b/man/perm_tot.Rd @@ -6,9 +6,6 @@ \usage{ perm_tot(v) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ N! combination } diff --git a/man/plot_compare_haplotypes.Rd b/man/plot_compare_haplotypes.Rd index 660844b1..101eb9be 100644 --- a/man/plot_compare_haplotypes.Rd +++ b/man/plot_compare_haplotypes.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/utils.R \name{plot_compare_haplotypes} \alias{plot_compare_haplotypes} -\title{Plot two overlapped haplotypes} +\title{Plot Two Overlapped Haplotypes} \usage{ plot_compare_haplotypes( ploidy, @@ -13,9 +13,32 @@ plot_compare_haplotypes( ) } \arguments{ -\item{void}{internal function to be documented} +\item{ploidy}{Integer, specifying the ploidy level of the organism being represented.} + +\item{hom.allele.p1}{A list where each element represents the alleles for a marker in the first haplotype group, for 'p' parent.} + +\item{hom.allele.q1}{A list where each element represents the alleles for a marker in the first haplotype group, for 'q' parent.} + +\item{hom.allele.p2}{Optionally, a list where each element represents the alleles for a marker in the second haplotype group, for 'p' parent.} + +\item{hom.allele.q2}{Optionally, a list where each element represents the alleles for a marker in the second haplotype group, for 'q' parent.} +} +\value{ +Invisible. The function primarily generates a plot for visual analysis and does not return any data. } \description{ -Plot two overlapped haplotypes +This function plots two sets of haplotypes for comparison, allowing for visual +inspection of homologous allele patterns across two groups or conditions. +It is designed to handle and display genetic data for organisms with varying ploidy levels. +} +\details{ +The function creates a graphical representation of haplotypes, where each marker's alleles are plotted +along a line for each parent ('p' and 'q'). If provided, the second set of haplotypes (for comparison) are overlaid +on the same plot. This allows for direct visual comparison of allele presence or absence across the two sets. +Different colors are used to distinguish between the first and second sets of haplotypes. + +The function uses several internal helper functions (`ph_list_to_matrix` and `ph_matrix_to_list`) to manipulate +haplotype data. These functions should correctly handle the conversion between list and matrix representations +of haplotype data. } \keyword{internal} diff --git a/man/plot_one_map.Rd b/man/plot_one_map.Rd index ae8a35f8..38752d74 100644 --- a/man/plot_one_map.Rd +++ b/man/plot_one_map.Rd @@ -6,9 +6,6 @@ \usage{ plot_one_map(x, i = 0, horiz = FALSE, col = "lightgray") } -\arguments{ -\item{void}{internal function to be documented} -} \description{ plot a single linkage group with no phase } diff --git a/man/poly_hmm_est.Rd b/man/poly_hmm_est.Rd index 0e510af8..6c3a5c45 100644 --- a/man/poly_hmm_est.Rd +++ b/man/poly_hmm_est.Rd @@ -19,9 +19,6 @@ poly_hmm_est( tol = 0.001 ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Estimate genetic map using as input the probability distribution of genotypes (wrapper function to C++) diff --git a/man/prepare_map.Rd b/man/prepare_map.Rd index a77a87bd..5f4e7383 100644 --- a/man/prepare_map.Rd +++ b/man/prepare_map.Rd @@ -6,9 +6,6 @@ \usage{ prepare_map(input.map, config = "best") } -\arguments{ -\item{void}{internal function to be documented} -} \description{ prepare maps for plot } diff --git a/man/print_ph.Rd b/man/print_ph.Rd index c4e7c370..37a62c6c 100644 --- a/man/print_ph.Rd +++ b/man/print_ph.Rd @@ -6,9 +6,6 @@ \usage{ print_ph(input.ph) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ cat for graphical representation of the phases } diff --git a/man/sample_data.Rd b/man/sample_data.Rd index 6ed2410a..639467ab 100644 --- a/man/sample_data.Rd +++ b/man/sample_data.Rd @@ -14,6 +14,8 @@ sample_data( ) } \arguments{ +\item{input.data}{an object of class \code{mappoly.data}} + \item{n}{number of individuals or markers to be sampled} \item{percentage}{if \code{n == NULL}, the percentage of individuals or markers to be sampled} @@ -25,8 +27,6 @@ if \code{type = "individual"}, \code{n = NULL} and \code{percentage = NULL}} \item{selected.mrk}{a vector containing the name of the markers to select. Only has effect if \code{type = "marker"}, \code{n = NULL} and \code{percentage = NULL}} - -\item{x}{an object of class \code{mappoly.data}} } \value{ an object of class \code{mappoly.data} diff --git a/man/select_rf.Rd b/man/select_rf.Rd index dfc590ec..f76881fd 100644 --- a/man/select_rf.Rd +++ b/man/select_rf.Rd @@ -6,9 +6,6 @@ \usage{ select_rf(x, thresh.LOD.ph, thresh.LOD.rf, thresh.rf, shared.alleles = FALSE) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Select rf and lod based on thresholds } diff --git a/man/sim_cross_one_informative_parent.Rd b/man/sim_cross_one_informative_parent.Rd index cd51034f..9ba9b4cf 100644 --- a/man/sim_cross_one_informative_parent.Rd +++ b/man/sim_cross_one_informative_parent.Rd @@ -14,9 +14,6 @@ sim_cross_one_informative_parent( prob = NULL ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ This function simulates a polyploid mapping population under random chromosome segregation diff --git a/man/sim_cross_two_informative_parents.Rd b/man/sim_cross_two_informative_parents.Rd index b66e1d3b..8660c090 100644 --- a/man/sim_cross_two_informative_parents.Rd +++ b/man/sim_cross_two_informative_parents.Rd @@ -16,9 +16,6 @@ sim_cross_two_informative_parents( seed = NULL ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Simulate mapping population (tow parents) } diff --git a/man/split_mappoly.Rd b/man/split_mappoly.Rd index d661620e..156efe05 100644 --- a/man/split_mappoly.Rd +++ b/man/split_mappoly.Rd @@ -13,9 +13,6 @@ split_mappoly( verbose = TRUE ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Split map into sub maps given a gap threshold } diff --git a/man/table_to_mappoly.Rd b/man/table_to_mappoly.Rd index d3ad8206..270f49e4 100644 --- a/man/table_to_mappoly.Rd +++ b/man/table_to_mappoly.Rd @@ -12,9 +12,6 @@ table_to_mappoly( verbose = TRUE ) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Conversion of data.frame to mappoly.data } diff --git a/man/update_ph_list_at_hmm_thres.Rd b/man/update_ph_list_at_hmm_thres.Rd index 8128813a..7fc9a701 100644 --- a/man/update_ph_list_at_hmm_thres.Rd +++ b/man/update_ph_list_at_hmm_thres.Rd @@ -7,9 +7,6 @@ configurations under a certain threshold} \usage{ update_ph_list_at_hmm_thres(map, thres.hmm) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ makes a phase list from map, selecting only configurations under a certain threshold diff --git a/man/v_2_m.Rd b/man/v_2_m.Rd index ffa11ead..9288daff 100644 --- a/man/v_2_m.Rd +++ b/man/v_2_m.Rd @@ -6,9 +6,6 @@ \usage{ v_2_m(x, n) } -\arguments{ -\item{void}{internal function to be documented} -} \description{ Conversion: vector to matrix } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index f10427fb..252d4950 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -59,22 +59,22 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP calc_genoprob(void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP calc_genoprob_prior(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP calc_genprob_haplo(void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP calc_genprob_haplo_highprec(void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP calc_genprob_single_parent(void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP est_haplotype_map(void *, void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP est_haplotype_map_highprec(void *, void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP est_hmm_map_single_parent(void *, void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP est_map_hmm(void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP est_map_hmm_highprec(void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP get_counts_single_parent_cpp(void *, void *, void *, void *, void *, void *); -RcppExport SEXP loglike_hmm(void *, void *, void *, void *, void *, void *); -RcppExport SEXP pairwise_rf_estimation_disc(void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP pairwise_rf_estimation_disc_rcpp(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP pairwise_rf_estimation_prob(void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP poly_hmm_est_CPP(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); +RcppExport SEXP calc_genoprob(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP calc_genoprob_prior(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP calc_genprob_haplo(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP calc_genprob_haplo_highprec(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP calc_genprob_single_parent(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP est_haplotype_map(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP est_haplotype_map_highprec(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP est_hmm_map_single_parent(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP est_map_hmm(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP est_map_hmm_highprec(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP get_counts_single_parent_cpp(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP loglike_hmm(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP pairwise_rf_estimation_disc(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP pairwise_rf_estimation_disc_rcpp(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP pairwise_rf_estimation_prob(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +RcppExport SEXP poly_hmm_est_CPP(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); static const R_CallMethodDef CallEntries[] = { {"_mappoly_vcf_get_probabilities", (DL_FUNC) &_mappoly_vcf_get_probabilities, 2}, diff --git a/src/read_mappoly_vcf.cpp b/src/read_mappoly_vcf.cpp index c38da12f..8cda7a3e 100755 --- a/src/read_mappoly_vcf.cpp +++ b/src/read_mappoly_vcf.cpp @@ -255,7 +255,6 @@ std::vector get_probabilities(std::string mystring, int pl_pos){ } // Getting PL for all markers and all individuals -//' @export // [[Rcpp::export(name=".vcf_get_probabilities")]] Rcpp::List vcf_get_probabilities(Rcpp::StringMatrix& mat, int pl_pos){ int rowmat = mat.nrow(); @@ -292,7 +291,6 @@ Rcpp::List vcf_get_probabilities(Rcpp::StringMatrix& mat, int pl_pos){ } // Getting dosages for all markers and all individuals -//' @export // [[Rcpp::export(name=".vcf_transform_dosage")]] Rcpp::NumericMatrix vcf_transform_dosage(Rcpp::StringMatrix& mat, int gt_pos){ int rowmat = mat.nrow(); @@ -309,7 +307,6 @@ Rcpp::NumericMatrix vcf_transform_dosage(Rcpp::StringMatrix& mat, int gt_pos){ } // Getting ploidy for all markers and all individuals -//' @export // [[Rcpp::export(name=".vcf_get_ploidy")]] Rcpp::NumericMatrix vcf_get_ploidy(Rcpp::StringMatrix& mat, int gt_pos){ int rowmat = mat.nrow(); @@ -326,7 +323,6 @@ Rcpp::NumericMatrix vcf_get_ploidy(Rcpp::StringMatrix& mat, int gt_pos){ } // Getting depth for all markers and all individuals -//' @export // [[Rcpp::export(name=".vcf_get_depth")]] Rcpp::NumericMatrix vcf_get_depth(Rcpp::StringMatrix& mat, int dp_pos){ int rowmat = mat.nrow();