diff --git a/MAPpoly.Rproj b/MAPpoly.Rproj index 67a3ef33..9b14df21 100755 --- a/MAPpoly.Rproj +++ b/MAPpoly.Rproj @@ -1,20 +1,20 @@ -Version: 1.0 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: knitr -LaTeX: pdfLaTeX - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source --resave-data -PackageBuildArgs: --resave-data -PackageCheckArgs: --no-vignettes --as-cran --install-args=--build -PackageRoxygenize: rd,collate,namespace,vignette +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: knitr +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source --resave-data +PackageBuildArgs: --resave-data +PackageCheckArgs: --no-vignettes --as-cran --install-args=--build +PackageRoxygenize: rd,collate,namespace,vignette diff --git a/NAMESPACE b/NAMESPACE index 5d7b52b9..e69bc9b1 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -237,6 +237,7 @@ importFrom(stats,dhyper) importFrom(stats,hclust) importFrom(stats,lm) importFrom(stats,na.omit) +importFrom(stats,prcomp) importFrom(stats,predict) importFrom(stats,quantile) importFrom(stats,rect.hclust) diff --git a/R/RcppExports.R b/R/RcppExports.R index a0d59d91..78f35b01 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,23 +1,23 @@ -# Generated by using Rcpp::compileAttributes() -> do not edit by hand -# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -#' @export -.vcf_get_probabilities <- function(mat, pl_pos) { - .Call('_mappoly_vcf_get_probabilities', PACKAGE = 'mappoly', mat, pl_pos) -} - -#' @export -.vcf_transform_dosage <- function(mat, gt_pos) { - .Call('_mappoly_vcf_transform_dosage', PACKAGE = 'mappoly', mat, gt_pos) -} - -#' @export -.vcf_get_ploidy <- function(mat, gt_pos) { - .Call('_mappoly_vcf_get_ploidy', PACKAGE = 'mappoly', mat, gt_pos) -} - -#' @export -.vcf_get_depth <- function(mat, dp_pos) { - .Call('_mappoly_vcf_get_depth', PACKAGE = 'mappoly', mat, dp_pos) -} - +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#' @export +.vcf_get_probabilities <- function(mat, pl_pos) { + .Call('_mappoly_vcf_get_probabilities', PACKAGE = 'mappoly', mat, pl_pos) +} + +#' @export +.vcf_transform_dosage <- function(mat, gt_pos) { + .Call('_mappoly_vcf_transform_dosage', PACKAGE = 'mappoly', mat, gt_pos) +} + +#' @export +.vcf_get_ploidy <- function(mat, gt_pos) { + .Call('_mappoly_vcf_get_ploidy', PACKAGE = 'mappoly', mat, gt_pos) +} + +#' @export +.vcf_get_depth <- function(mat, dp_pos) { + .Call('_mappoly_vcf_get_depth', PACKAGE = 'mappoly', mat, dp_pos) +} + diff --git a/R/filters.R b/R/filters.R index fc2881a6..26efc804 100644 --- a/R/filters.R +++ b/R/filters.R @@ -1,653 +1,709 @@ -#' Filter non-conforming classes in F1, non double reduced population. -#' -#' @param void internal function to be documented -#' @keywords internal -#' @export -filter_non_conforming_classes <- function(input.data, prob.thres = NULL) -{ - if (!inherits(input.data, "mappoly.data")) { - stop(deparse(substitute(input.data)), " is not an object of class 'mappoly.data'") - } - ploidy <- input.data$ploidy - dp <- input.data$dosage.p1 - dq <- input.data$dosage.p2 - Ds <- array(NA, dim = c(ploidy+1, ploidy+1, ploidy+1)) - for(i in 0:ploidy) - for(j in 0:ploidy) - Ds[i+1,j+1,] <- segreg_poly(ploidy = ploidy, dP = i, dQ = j) - Dpop <- cbind(dp,dq) - #Gathering segregation probabilities given parental dosages - M <- t(apply(Dpop, 1, function(x) Ds[x[1]+1, x[2]+1,])) - M[M != 0] <- 1 - dimnames(M) <- list(input.data$mrk.names, 0:ploidy) - ##if no prior probabilities - if(!is.prob.data(input.data)){ - for(i in 1:nrow(M)){ - id0 <- !as.numeric(input.data$geno.dose[i,])%in%(which(M[i,] == 1)-1) - if(any(id0)) - input.data$geno.dose[i,id0] <- (ploidy+1) - } - return(input.data) - } - ## 1 represents conforming classes/ 0 represents non-conforming classes - dp <- rep(dp, input.data$n.ind) - dq <- rep(dq, input.data$n.ind) - M <- M[rep(seq_len(nrow(M)), input.data$n.ind),] - R <- input.data$geno[,-c(1:2)] - input.data$geno[,-c(1:2)]*M - id1 <- apply(R, 1, function(x) abs(sum(x))) > 0.3 # if the sum of the excluded classes is greater than 0.3, use segreg_poly - N <- matrix(NA, sum(id1), input.data$ploidy+1) - ct <- 1 - for(i in which(id1)){ - N[ct,] <- Ds[dp[i]+1, dq[i]+1, ] - ct <- ct+1 - } - input.data$geno[id1,-c(1:2)] <- N - # if the sum of the excluded classes is greater than zero - # and smaller than 0.3, assign zero to those classes and normalize the vector - input.data$geno[,-c(1:2)][R > 0] <- 0 - input.data$geno[,-c(1:2)] <- sweep(input.data$geno[,-c(1:2)], 1, rowSums(input.data$geno[,-c(1:2)]), FUN = "/") - if(is.null(prob.thres)) - prob.thres <- input.data$prob.thres - geno.dose <- dist_prob_to_class(geno = input.data$geno, prob.thres = prob.thres) - if(geno.dose$flag) - { - input.data$geno <- geno.dose$geno - input.data$geno.dose <- geno.dose$geno.dose - } else { - input.data$geno.dose <- geno.dose$geno.dose - } - input.data$geno.dose[is.na(input.data$geno.dose)] <- ploidy + 1 - input.data$n.ind <- ncol(input.data$geno.dose) - input.data$ind.names <- colnames(input.data$geno.dose) - return(input.data) -} - -#' Filter missing genotypes -#' -#' Excludes markers or individuals based on their proportion of missing data -#' -#' @param input.data an object of class \code{mappoly.data} -#' -#' @param type one of the following options: -#' \code{'marker'}{filter out markers based on their percentage of missing data (default)} -#' \code{'individual'}{filter out individuals based on their percentage of missing data} -#' Please notice that removing individuals with certain amount of data can change some marker parameters -#' (such as depth), and can also change the estimated genotypes for other individuals. -#' So be careful when removing individuals. -#' -#' @param filter.thres maximum percentage of missing data (default = 0.2) -#' -#' @param inter if \code{TRUE}, expects user-input to proceed with filtering -#' -#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu} -#' @examples -#' plot(tetra.solcap) -#' dat.filt.mrk <- filter_missing(input.data = tetra.solcap, -#' type = "marker", -#' filter.thres = 0.1, -#' inter = TRUE) -#' plot(dat.filt.mrk) -#' @export -#' @importFrom magrittr "%>%" -#' @importFrom dplyr filter -#' @importFrom graphics axis -filter_missing <- function(input.data, - type = c("marker", "individual"), - filter.thres = 0.2, - inter = TRUE) -{ - if (!inherits(input.data, "mappoly.data")) { - stop(deparse(substitute(input.data)), " is not an object of class 'mappoly.data'") - } - type <- match.arg(type) - switch(type, - marker = filter_missing_mrk(input.data, - filter.thres = filter.thres, - inter = inter), - individual = filter_missing_ind(input.data, - filter.thres = filter.thres, - inter = inter) - ) -} - -#' Filter markers based on missing genotypes -#' -#' @param input.data an object of class \code{"mappoly.data"} -#' @param filter.thres maximum percentage of missing data -#' @param inter if \code{TRUE}, expects user-input to proceed with filtering -#' @keywords internal -filter_missing_mrk <- function(input.data, filter.thres = 0.2, inter = TRUE) -{ - op <- par(pty="s") - on.exit(par(op)) - ANSWER <- "flag" - if(interactive() && inter) - { - while(substr(ANSWER, 1, 1) != "y" && substr(ANSWER, 1, 1) != "yes" && substr(ANSWER, 1, 1) != "Y" && ANSWER != "") - { - na.num <- apply(input.data$geno.dose, 1, function(x,ploidy) sum(x == ploidy+1), ploidy = input.data$ploidy) - perc.na <- na.num/input.data$n.ind - x <- sort(perc.na) - plot(x, - xlab = "markers", - ylab = "frequency of missing data", - col = ifelse(x <= filter.thres, 4, 2), - pch =ifelse(x <= filter.thres, 1, 4)) - abline(h = filter.thres, lty = 2) - f<-paste0("Filtered out: ", sum(perc.na > filter.thres)) - i<-paste0("Included: ", sum(perc.na <= filter.thres)) - legend("topleft", c(f, i) , col = c(2,4), pch = c(4,1)) - ANSWER <- readline("Enter 'Y/n' to proceed or update the filter threshold: ") - if(substr(ANSWER, 1, 1) == "n" | substr(ANSWER, 1, 1) == "no" | substr(ANSWER, 1, 1) == "N") - stop("Stop function.") - if(substr(ANSWER, 1, 1) != "y" && substr(ANSWER, 1, 1) != "yes" && substr(ANSWER, 1, 1) != "Y" && ANSWER != "") - filter.thres <- as.numeric(ANSWER) - } - mrks.id <- which(perc.na <= filter.thres) - if(length(mrks.id) == input.data$n.mrk){ - return(input.data) - } - out.dat <- sample_data(input.data, type = "markers", selected.mrk = names(mrks.id)) - return(out.dat) - } - else{ - na.num <- apply(input.data$geno.dose, 1, function(x,ploidy) sum(x == ploidy+1), ploidy = input.data$ploidy) - perc.na <- na.num/input.data$n.ind - mrks.id <- which(perc.na <= filter.thres) - if(length(mrks.id) == input.data$n.mrk){ - return(input.data) - } - out.dat <- sample_data(input.data, type = "markers", selected.mrk = names(mrks.id)) - return(out.dat) - } -} - -#' Filter individuals based on missing genotypes -#' -#' @param input.data an object of class \code{"mappoly.data"} -#' @param filter.thres maximum percentage of missing data -#' @param inter if \code{TRUE}, expects user-input to proceed with filtering -#' @keywords internal -#' @importFrom magrittr "%>%" -#' @importFrom dplyr filter -#' @importFrom graphics axis -filter_missing_ind <- function(input.data, filter.thres = 0.2, inter = TRUE) -{ - op <- par(pty="s") - on.exit(par(op)) - ANSWER <- "flag" - ind <- NULL - if(interactive() && inter) - { - while(substr(ANSWER, 1, 1) != "y" && substr(ANSWER, 1, 1) != "yes" && substr(ANSWER, 1, 1) != "Y" && ANSWER != "") - { - na.num <- apply(input.data$geno.dose, 2, function(x,ploidy) sum(x == ploidy+1), ploidy = input.data$ploidy) - perc.na <- na.num/input.data$n.mrk - x <- sort(perc.na) - plot(x, - xlab = "offspring", - ylab = "frequency of missing data", - col = ifelse(x <= filter.thres, 4, 2), - pch =ifelse(x <= filter.thres, 1, 4)); - abline(h = filter.thres, lty = 2) - f<-paste0("Filtered out: ", sum(perc.na > filter.thres)) - i<-paste0("Included: ", sum(perc.na <= filter.thres)) - legend("topleft", c(f, i) , col = c(2,4), pch = c(4,1)) - ANSWER <- readline("Enter 'Y/n' to proceed or update the filter threshold: ") - if(substr(ANSWER, 1, 1) == "n" | substr(ANSWER, 1, 1) == "no" | substr(ANSWER, 1, 1) == "N") - stop("You decided to stop the function.") - if(substr(ANSWER, 1, 1) != "y" && substr(ANSWER, 1, 1) != "yes" && substr(ANSWER, 1, 1) != "Y" && ANSWER != "") - filter.thres <- as.numeric(ANSWER) - } - ind.id <- which(perc.na <= filter.thres) - if(length(ind.id) == input.data$n.ind){ - return(input.data) - } - ind <- names(ind.id) - out.dat <- sample_data(input.data, type = "individual", selected.ind = names(ind.id)) - return(out.dat) - } - else{ - na.num <- apply(input.data$geno.dose, 2, function(x,ploidy) sum(x == ploidy+1), ploidy = input.data$ploidy) - perc.na <- na.num/input.data$n.mrk - ind.id <- which(perc.na <= filter.thres) - if(length(ind.id) == input.data$n.ind){ - return(input.data) - } - ind <- names(ind.id) - out.dat <- sample_data(input.data, type = "individual", selected.ind = names(ind.id)) - return(out.dat) - } -} - -#' Filter markers based on chi-square test -#' -#' This function filter markers based on p-values of a chi-square test. -#' The chi-square test assumes that markers follow the expected segregation -#' patterns under Mendelian inheritance, random chromosome bivalent -#' pairing and no double reduction. -#' -#' @param input.obj name of input object (class \code{mappoly.data}) -#' -#' @param chisq.pval.thres p-value threshold used for chi-square tests -#' (default = Bonferroni aproximation with global alpha of 0.05, i.e., -#' 0.05/n.mrk) -#' -#' @param inter if TRUE (default), plots distorted vs. non-distorted markers -#' -#' @return An object of class \code{mappoly.chitest.seq} which contains a list with the following components: -#' \item{keep}{markers that follow Mendelian segregation pattern} -#' \item{exclude}{markers with distorted segregation} -#' \item{chisq.pval.thres}{threshold p-value used for chi-square tests} -#' \item{data.name}{input dataset used to perform the chi-square tests} -#' -#'@examples -#' mrks.chi.filt <- filter_segregation(input.obj = tetra.solcap, -#' chisq.pval.thres = 0.05/tetra.solcap$n.mrk, -#' inter = TRUE) -#' seq.init <- make_seq_mappoly(mrks.chi.filt) -#' -#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu} -#' -#' @importFrom graphics axis -#' @export -filter_segregation <- function(input.obj, chisq.pval.thres = NULL, inter = TRUE){ - op <- par(pty="s") - on.exit(par(op)) - if(inherits(input.obj, c("mappoly.data"))){ - chisq.val <- input.obj$chisq.pval - n.mrk <- input.obj$n.mrk - data.name <- as.character(sys.call())[2] - } else if (inherits(input.obj, c("mappoly.sequence"))){ - chisq.val <- input.obj$chisq.pval[input.obj$seq.mrk.names] - n.mrk <- length(input.obj$seq.num) - data.name <- input.obj$data.name - } else { - stop(deparse(substitute(input.obj)), - " is not an object of class 'mappoly.data' or 'mappoly.sequence'") - } - ##Bonferroni approx - if(is.null(chisq.pval.thres)) - chisq.pval.thres <- 0.05/n.mrk - ANSWER <- "flag" - - if(interactive() && inter) - { - while(substr(ANSWER, 1, 1) != "y" && substr(ANSWER, 1, 1) != "yes" && substr(ANSWER, 1, 1) != "Y" && ANSWER != "") - { - x <- log10(sort(chisq.val, decreasing = TRUE)) - th <- log10(chisq.pval.thres) - plot(x, - xlab = "markers", - ylab = bquote(log[10](P)), - col = ifelse(x <= th, 2, 4), - pch =ifelse(x <= th, 4, 1)) - abline(h = th, lty = 2) - f<-paste0("Filtered out: ", sum(x < th)) - i<-paste0("Included: ", sum(x >= th)) - legend("bottomleft", c(f, i) , col = c(2,4), pch = c(4,1)) - ANSWER <- readline("Enter 'Y/n' to proceed or update the p value threshold: ") - if(substr(ANSWER, 1, 1) == "n" | substr(ANSWER, 1, 1) == "no" | substr(ANSWER, 1, 1) == "N") - stop("You decided to stop the function.") - if(substr(ANSWER, 1, 1) != "y" && substr(ANSWER, 1, 1) != "yes" && substr(ANSWER, 1, 1) != "Y" && ANSWER != "") - chisq.pval.thres <- as.numeric(ANSWER) - } - } - keep <- names(which(chisq.val >= chisq.pval.thres)) - exclude <- names(which(chisq.val < chisq.pval.thres)) - structure(list(keep = keep, exclude = exclude, chisq.pval.thres = chisq.pval.thres, - data.name = data.name), - class = "mappoly.chitest.seq") -} - -#' Filter out individuals -#' -#' This function removes individuals from the data set. Individuals can be -#' user-defined or can be accessed via interactive kinship analysis. -#' -#' @param input.data name of input object (class \code{mappoly.data}) -#' -#' @param ind.to.remove individuals to be removed. If \code{NULL} it opens -#' an interactive graphic to proceed with the individual -#' selection -#' @param inter if \code{TRUE}, expects user-input to proceed with filtering -#' -#' @param verbose if \code{TRUE} (default), shows the filtered out individuals -#' -#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu} -#' -#' @export -#' -filter_individuals <- function(input.data, ind.to.remove = NULL, inter = TRUE, verbose = TRUE){ - if (!inherits(input.data, "mappoly.data")) { - stop(deparse(substitute(input.data)), " is not an object of class 'mappoly.data'") - } - op <- par(pty="s") - on.exit(par(op)) - # if (!require(AGHmatrix)) - # stop("Please install package 'AGHmatrix' to proceed") - D <- t(input.data$geno.dose) - D[D == input.data$ploidy+1] <- NA - D <- rbind(input.data$dosage.p1, input.data$dosage.p2, D) - rownames(D)[1:2] <- c("P1", "P2") - G <- AGHmatrix::Gmatrix(D, method = "VanRaden",ploidy = input.data$ploidy) - x <- G[1,] - y <- G[2,] - #a <- 2*atan(y/x)/pi - #b <- sqrt(x^2 + y^2) - df <- data.frame(x = x, y = y, type = c(2, 2, rep(4, length(x)-2))) - plot(df[,1:2], col = df$type, pch = 19, - xlab = "relationships between the offspring and P1", - ylab = "relationships between the offspring and P2") - abline(c(0,1), lty = 2) - abline(c(-0.4,1), lty = 2, col = "gray") - abline(c(0.4,1), lty = 2, col = "gray") - legend("topright", c("Parents", "Offspring") , col = c(2,4), pch = 19) - if(!is.null(ind.to.remove)){ - out.data <- sample_data(input.data, selected.ind = setdiff(input.data$ind.names, ind.to.remove)) - return(out.data) - } - if(interactive() && inter) - { - # if (!require(gatepoints)) - # stop("Please install package 'gatepoints' to proceed") - ANSWER <- readline("Enter 'Y/n' to proceed with interactive filtering or quit: ") - if(substr(ANSWER, 1, 1) == "y" | substr(ANSWER, 1, 1) == "yes" | substr(ANSWER, 1, 1) == "Y" | ANSWER == "") - { - ind.to.remove <- gatepoints::fhs(df, mark = TRUE) - ind.to.remove <- setdiff(ind.to.remove, c("P1", "P2")) - ind.to.include <- setdiff(rownames(df)[-c(1:2)], ind.to.remove) - if(verbose){ - cat("Removing individual(s): \n") - print(ind.to.remove) - cat("...\n") - } - if(length(ind.to.remove) == 0){ - warning("No individuals removed. Returning original data set.") - return(input.data) - } - out.data <- sample_data(input.data, selected.ind = ind.to.include) - return(out.data) - } else{ - warning("No individuals removed. Returning original data set.") - return(input.data) - } - } -} -#' Remove markers that do not meet a LOD criteria -#' -#' Remove markers that do not meet a LOD and recombination fraction -#' criteria for at least a percentage of the pairwise marker -#' combinations. It also removes markers with strong evidence of -#' linkage across the whole linkage group (false positive). -#' -#' \code{thresh.LOD.ph} should be set in order to only select -#' recombination fractions that have LOD scores associated to the -#' linkage phase configuration higher than \code{thresh_LOD_ph} -#' when compared to the second most likely linkage phase configuration. -#' That action usually eliminates markers that are unlinked to the -#' set of analyzed markers. -#' -#' @param input.twopt an object of class \code{mappoly.twopt} -#' -#' @param thresh.LOD.ph LOD score threshold for linkage phase configuration -#' (default = 5) -#' -#' @param thresh.LOD.rf LOD score threshold for recombination fraction -#' (default = 5) -#' -#' @param thresh.rf threshold for recombination fractions (default = 0.15) -#' -#' @param probs indicates the probability corresponding to the filtering -#' quantiles. (default = c(0.05, 1)) -#' -#' @param diag.markers A window where marker pairs should be considered. -#' If NULL (default), all markers are considered. -#' -#' @param mrk.order marker order. Only has effect if 'diag.markers' is not NULL -#' -#' @param ncpus number of parallel processes (i.e. cores) to spawn -#' (default = 1) -#' -#' @param diagnostic.plot if \code{TRUE} produces a diagnostic plot -#' -#' @param breaks number of cells for the histogram -#' -#' @return A filtered object of class \code{mappoly.sequence}. -#' See \code{\link[mappoly]{make_seq_mappoly}} for details -#' -#' @examples -#' all.mrk <- make_seq_mappoly(hexafake, 1:20) -#' red.mrk <- elim_redundant(all.mrk) -#' unique.mrks <- make_seq_mappoly(red.mrk) -#' all.pairs <- est_pairwise_rf(input.seq = unique.mrks, -#' ncpus = 1, -#' verbose = TRUE) -#' -#' ## Full recombination fraction matrix -#' mat.full <- rf_list_to_matrix(input.twopt = all.pairs) -#' plot(mat.full) -#' -#' ## Removing disruptive SNPs -#' tpt.filt <- rf_snp_filter(all.pairs, 2, 2, 0.07, probs = c(0.15, 1)) -#' p1.filt <- make_pairs_mappoly(input.seq = tpt.filt, input.twopt = all.pairs) -#' m1.filt <- rf_list_to_matrix(input.twopt = p1.filt) -#' plot(mat.full, main.text = "LG1") -#' plot(m1.filt, main.text = "LG1.filt") -#' -#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu} with updates by Gabriel Gesteira, \email{gdesiqu@ncsu.edu} -#' -#' @references -#' Mollinari, M., and Garcia, A. A. F. (2019) Linkage -#' analysis and haplotype phasing in experimental autopolyploid -#' populations with high ploidy level using hidden Markov -#' models, _G3: Genes, Genomes, Genetics_. -#' \doi{10.1534/g3.119.400378} -#' -#' @export rf_snp_filter -#' @importFrom ggplot2 ggplot geom_histogram aes scale_fill_manual xlab ggtitle -#' @importFrom graphics hist -rf_snp_filter <- function(input.twopt, - thresh.LOD.ph = 5, - thresh.LOD.rf = 5, - thresh.rf = 0.15, - probs = c(0.05, 1), - diag.markers = NULL, - mrk.order = NULL, - ncpus = 1L, - diagnostic.plot = TRUE, - breaks = 100) -{ - - input_classes <- c("mappoly.twopt", "mappoly.twopt2") - if (!inherits(input.twopt, input_classes)) { - stop(deparse(substitute(input.twopt)), paste0(" is not an object of class ", paste0(input_classes, collapse = " or "))) - } - probs <- range(probs) - ## Getting filtered rf matrix - rf_mat <- rf_list_to_matrix(input.twopt = input.twopt, thresh.LOD.ph = thresh.LOD.ph, - thresh.LOD.rf = thresh.LOD.rf, thresh.rf = thresh.rf, - ncpus = ncpus, verbose = FALSE) - M <- rf_mat$rec.mat - if(!is.null(mrk.order)) - M <- M[mrk.order, mrk.order] - if(!is.null(diag.markers)) - M[abs(col(M) - row(M)) > diag.markers] <- NA - x <- apply(M, 1, function(x) sum(!is.na(x))) - w <- hist(x, breaks = breaks, plot = FALSE) - th <- quantile(x, probs = probs) - rem <- c(which(x < th[1]), which(x > th[2])) - ids <- names(which(x >= th[1] & x <= th[2])) - value <- type <- NULL - if(diagnostic.plot){ - d <- rbind(data.frame(type = "original", value = x), - data.frame(type = "filtered", value = x[ids])) - p <- ggplot2::ggplot(d, ggplot2::aes(value)) + - ggplot2::geom_histogram(ggplot2::aes(fill = type), - alpha = 0.4, position = "identity", binwidth = diff(w$mids)[1]) + - ggplot2::scale_fill_manual(values = c("#00AFBB", "#E7B800")) + - ggplot2::ggtitle( paste0("Filtering probs: [", probs[1], " : ", probs[2], "] - Non NA values by row in rf matrix - b width: ", diff(w$mids)[1])) + - ggplot2::xlab(paste0("Non 'NA' values at LOD.ph = ", thresh.LOD.ph, ", LOD.rf = ", thresh.LOD.rf, ", and thresh.rf = ", thresh.rf)) - print(p) - } - ## Returning sequence object - ch_filt <- make_seq_mappoly(input.obj = get(input.twopt$data.name, pos = 1), arg = ids, data.name = input.twopt$data.name) - return(ch_filt) -} - -#' Edit sequence ordered by reference genome positions -#' comparing to another set order -#' -#' @param input.seq object of class mappoly.sequence with alternative order (not genomic order) -#' @param invert vector of marker names to be inverted -#' @param remove vector of marker names to be removed -#' -#' @author Cristiane Taniguti, \email{chtaniguti@tamu.edu} -#' -#' @examples -#' \donttest{ -#' dat <- filter_segregation(tetra.solcap, inter = FALSE) -#' seq_dat <- make_seq_mappoly(dat) -#' seq_chr <- make_seq_mappoly(seq_dat, arg = seq_dat$seq.mrk.names[which(seq_dat$chrom=="1")]) -#' -#' tpt <- est_pairwise_rf(seq_chr) -#' seq.filt <- rf_snp_filter(tpt, probs = c(0.05, 0.95)) -#' mat <- rf_list_to_matrix(tpt) -#' mat2 <- make_mat_mappoly(mat, seq.filt) -#' -#' seq_test_mds <- mds_mappoly(mat2) -#' seq_mds <- make_seq_mappoly(seq_test_mds) -#' edit_seq <- edit_order(input.seq = seq_mds) -#' } -#' -#' @return object of class \code{mappoly.edit.order}: a list containing -#' vector of marker names ordered according to editions (`edited_order`); -#' vector of removed markers names (`removed`); -#' vector of inverted markers names (`inverted`). -#' -#' @export -edit_order <- function(input.seq, invert= NULL, remove = NULL){ - - if (!inherits(input.seq, "mappoly.sequence")) { - stop(deparse(substitute(input.seq)), " is not an object of class 'mappoly.sequence'") - } - - get_weird <- data.frame(x = 1:length(input.seq$genome.pos), - y = input.seq$genome.pos) - - rownames(get_weird) <- input.seq$seq.mrk.names - get_weird <- get_weird[order(get_weird$y),] - plot(get_weird$x, get_weird$y, xlab="input sequence order", ylab = "genomic position (bp)") - - inverted <- removed <- vector() - if(!is.null(invert) | !is.null(remove)){ - if(!is.null(invert)){ - inverted <- c(inverted, as.vector(invert)) - repl <- get_weird[rev(match(as.vector(invert),rownames(get_weird))),] - get_weird[match(as.vector(invert),rownames(get_weird)),2] <- repl[,2] - rownames(get_weird)[match(as.vector(invert), rownames(get_weird))] <- rownames(repl) - } - if(!is.null(remove)){ - removed <- c(removed, as.vector(remove)) - get_weird <- get_weird[-match(remove, rownames(get_weird)),] - } - plot(get_weird$x, get_weird$y, xlab="input sequence order", ylab = "genomic position (bp)") - } else { - cat("Mark at least three points on the plot and press `Esc` to continue.") - if(interactive()){ - ANSWER <- "Y" - while(substr(ANSWER, 1, 1) == "y" | substr(ANSWER, 1, 1) == "yes" | substr(ANSWER, 1, 1) == "Y" | ANSWER == ""){ - plot(get_weird$x, get_weird$y, xlab="input sequence order", ylab = "genomic position (bp)") - mks.to.remove <- gatepoints::fhs(get_weird, mark = TRUE) - if(length(which(rownames(get_weird) %in% mks.to.remove)) > 0){ - ANSWER2 <- readline("Enter 'invert/remove' to proceed with the edition: ") - if(ANSWER2 == "invert"){ - inverted <- c(inverted, as.vector(mks.to.remove)) - repl <- get_weird[rev(match(as.vector(mks.to.remove),rownames(get_weird))),] - get_weird[match(as.vector(mks.to.remove),rownames(get_weird)),2] <- repl[,2] - rownames(get_weird)[match(as.vector(mks.to.remove), rownames(get_weird))] <- rownames(repl) - } else { - removed <- c(removed, as.vector(mks.to.remove)) - get_weird <- get_weird[-match(mks.to.remove, rownames(get_weird)),] - } - } - ANSWER <- readline("Enter 'Y/n' to proceed with interactive edition or quit: ") - } - plot(get_weird$x, get_weird$y, xlab="input sequence order", ylab = "genomic position (bp)") - } - } - - return(structure(list(edited_order = rownames(get_weird), - removed = removed, - inverted = inverted, - data.name = input.seq$data.name), class = "mappoly.edit.order")) -} - -#' Filter aneuploid chromosomes from progeny individuals -#' -#' @param input.data name of input object (class \code{mappoly.data}) -#' -#' @param aneuploid.info data.frame with ploidy information by chromosome (columns) for each individual -#' in progeny (rows). The chromosome and individuals names must match the ones in the file used as input -#' in mappoly. -#' -#' @param ploidy main ploidy -#' -#' @param rm_missing remove also genotype information from chromosomes with missing data (NA) in the aneuploid.info file -#' -#' @examples -#' aneuploid.info <- matrix(4, nrow=tetra.solcap$n.ind, ncol = 12) -#' set.seed(8080) -#' aneuploid.info[sample(1:length(aneuploid.info), round((4*length(aneuploid.info))/100),0)] <- 3 -#' aneuploid.info[sample(1:length(aneuploid.info), round((4*length(aneuploid.info))/100),0)] <- 5 -#' -#' colnames(aneuploid.info) <- paste0(1:12) -#' aneuploid.info <- cbind(inds = tetra.solcap$ind.names, aneuploid.info) -#' -#' filt.dat <- filter_aneuploid(input.data = tetra.solcap, -#' aneuploid.info = aneuploid.info, ploidy = 4) -#' -#' @author Cristiane Taniguti, \email{chtaniguti@tamu.edu} -#' -#' @return object of class \code{mappoly.data} -#' -#' @export -filter_aneuploid <- function(input.data, aneuploid.info, ploidy, rm_missing = TRUE){ - - if (!inherits(input.data, "mappoly.data")) { - stop(deparse(substitute(input.data)), " is not an object of class 'mappoly.data'") - } - - aneu.chroms <- colnames(aneuploid.info)[-1] - - keep.ind <- match(input.data$ind.names, aneuploid.info[,1]) - - if(length(keep.ind) != length(input.data$ind.names) | any(is.na(keep.ind))) - stop("The aneuploid information ploidy is missing at least one sample.") - - aneuploid.info.filt <- aneuploid.info[keep.ind,] - - idx.list <- apply(aneuploid.info.filt[,-1], 1, function(x) which(x != ploidy)) - names(idx.list) <- aneuploid.info.filt[,1] - - idx.list <- idx.list[-which(!sapply(idx.list, function(x) length(x) > 0))] - - if(rm_missing){ - idx.list.na <- apply(aneuploid.info.filt[,-1], 1, function(x) which(is.na(x))) - if(length(idx.list.na) > 0){ - names(idx.list.na) <- aneuploid.info.filt[,1] - idx.list.na <- idx.list.na[-which(!sapply(idx.list.na, function(x) length(x) > 0))] - idx.list <- c(idx.list, idx.list.na) - } - cat(round((length(unlist(idx.list))/(dim(aneuploid.info.filt)[1]*(dim(aneuploid.info.filt)[2]-1)))*100,2), "% of the chromosomes x individuals are aneuploids or has missing ploidy information\n") - } else - cat(round((length(unlist(idx.list))/(dim(aneuploid.info.filt)[1]*(dim(aneuploid.info.filt)[2]-1)))*100,2), "% of the chromosomes x individuals are aneuploids\n") - - for(i in 1:length(idx.list)){ - idx <- which(input.data$chrom %in% names(idx.list[[i]])) - input.data$geno.dose[idx,names(idx.list[i])] <- ploidy + 1 - } - - return(input.data) -} - +#' Filter non-conforming classes in F1, non double reduced population. +#' +#' @param void internal function to be documented +#' @keywords internal +#' @export +filter_non_conforming_classes <- function(input.data, prob.thres = NULL) +{ + if (!inherits(input.data, "mappoly.data")) { + stop(deparse(substitute(input.data)), " is not an object of class 'mappoly.data'") + } + ploidy <- input.data$ploidy + dp <- input.data$dosage.p1 + dq <- input.data$dosage.p2 + Ds <- array(NA, dim = c(ploidy+1, ploidy+1, ploidy+1)) + for(i in 0:ploidy) + for(j in 0:ploidy) + Ds[i+1,j+1,] <- segreg_poly(ploidy = ploidy, dP = i, dQ = j) + Dpop <- cbind(dp,dq) + #Gathering segregation probabilities given parental dosages + M <- t(apply(Dpop, 1, function(x) Ds[x[1]+1, x[2]+1,])) + M[M != 0] <- 1 + dimnames(M) <- list(input.data$mrk.names, 0:ploidy) + ##if no prior probabilities + if(!is.prob.data(input.data)){ + for(i in 1:nrow(M)){ + id0 <- !as.numeric(input.data$geno.dose[i,])%in%(which(M[i,] == 1)-1) + if(any(id0)) + input.data$geno.dose[i,id0] <- (ploidy+1) + } + return(input.data) + } + ## 1 represents conforming classes/ 0 represents non-conforming classes + dp <- rep(dp, input.data$n.ind) + dq <- rep(dq, input.data$n.ind) + M <- M[rep(seq_len(nrow(M)), input.data$n.ind),] + R <- input.data$geno[,-c(1:2)] - input.data$geno[,-c(1:2)]*M + id1 <- apply(R, 1, function(x) abs(sum(x))) > 0.3 # if the sum of the excluded classes is greater than 0.3, use segreg_poly + N <- matrix(NA, sum(id1), input.data$ploidy+1) + ct <- 1 + for(i in which(id1)){ + N[ct,] <- Ds[dp[i]+1, dq[i]+1, ] + ct <- ct+1 + } + input.data$geno[id1,-c(1:2)] <- N + # if the sum of the excluded classes is greater than zero + # and smaller than 0.3, assign zero to those classes and normalize the vector + input.data$geno[,-c(1:2)][R > 0] <- 0 + input.data$geno[,-c(1:2)] <- sweep(input.data$geno[,-c(1:2)], 1, rowSums(input.data$geno[,-c(1:2)]), FUN = "/") + if(is.null(prob.thres)) + prob.thres <- input.data$prob.thres + geno.dose <- dist_prob_to_class(geno = input.data$geno, prob.thres = prob.thres) + if(geno.dose$flag) + { + input.data$geno <- geno.dose$geno + input.data$geno.dose <- geno.dose$geno.dose + } else { + input.data$geno.dose <- geno.dose$geno.dose + } + input.data$geno.dose[is.na(input.data$geno.dose)] <- ploidy + 1 + input.data$n.ind <- ncol(input.data$geno.dose) + input.data$ind.names <- colnames(input.data$geno.dose) + return(input.data) +} + +#' Filter missing genotypes +#' +#' Excludes markers or individuals based on their proportion of missing data +#' +#' @param input.data an object of class \code{mappoly.data} +#' +#' @param type one of the following options: +#' \code{'marker'}{filter out markers based on their percentage of missing data (default)} +#' \code{'individual'}{filter out individuals based on their percentage of missing data} +#' Please notice that removing individuals with certain amount of data can change some marker parameters +#' (such as depth), and can also change the estimated genotypes for other individuals. +#' So be careful when removing individuals. +#' +#' @param filter.thres maximum percentage of missing data (default = 0.2) +#' +#' @param inter if \code{TRUE}, expects user-input to proceed with filtering +#' +#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu} +#' @examples +#' plot(tetra.solcap) +#' dat.filt.mrk <- filter_missing(input.data = tetra.solcap, +#' type = "marker", +#' filter.thres = 0.1, +#' inter = TRUE) +#' plot(dat.filt.mrk) +#' @export +#' @importFrom magrittr "%>%" +#' @importFrom dplyr filter +#' @importFrom graphics axis +filter_missing <- function(input.data, + type = c("marker", "individual"), + filter.thres = 0.2, + inter = TRUE) +{ + if (!inherits(input.data, "mappoly.data")) { + stop(deparse(substitute(input.data)), " is not an object of class 'mappoly.data'") + } + type <- match.arg(type) + switch(type, + marker = filter_missing_mrk(input.data, + filter.thres = filter.thres, + inter = inter), + individual = filter_missing_ind(input.data, + filter.thres = filter.thres, + inter = inter) + ) +} + +#' Filter markers based on missing genotypes +#' +#' @param input.data an object of class \code{"mappoly.data"} +#' @param filter.thres maximum percentage of missing data +#' @param inter if \code{TRUE}, expects user-input to proceed with filtering +#' @keywords internal +filter_missing_mrk <- function(input.data, filter.thres = 0.2, inter = TRUE) +{ + op <- par(pty="s") + on.exit(par(op)) + ANSWER <- "flag" + if(interactive() && inter) + { + while(substr(ANSWER, 1, 1) != "y" && substr(ANSWER, 1, 1) != "yes" && substr(ANSWER, 1, 1) != "Y" && ANSWER != "") + { + na.num <- apply(input.data$geno.dose, 1, function(x,ploidy) sum(x == ploidy+1), ploidy = input.data$ploidy) + perc.na <- na.num/input.data$n.ind + x <- sort(perc.na) + plot(x, + xlab = "markers", + ylab = "frequency of missing data", + col = ifelse(x <= filter.thres, 4, 2), + pch =ifelse(x <= filter.thres, 1, 4)) + abline(h = filter.thres, lty = 2) + f<-paste0("Filtered out: ", sum(perc.na > filter.thres)) + i<-paste0("Included: ", sum(perc.na <= filter.thres)) + legend("topleft", c(f, i) , col = c(2,4), pch = c(4,1)) + ANSWER <- readline("Enter 'Y/n' to proceed or update the filter threshold: ") + if(substr(ANSWER, 1, 1) == "n" | substr(ANSWER, 1, 1) == "no" | substr(ANSWER, 1, 1) == "N") + stop("Stop function.") + if(substr(ANSWER, 1, 1) != "y" && substr(ANSWER, 1, 1) != "yes" && substr(ANSWER, 1, 1) != "Y" && ANSWER != "") + filter.thres <- as.numeric(ANSWER) + } + mrks.id <- which(perc.na <= filter.thres) + if(length(mrks.id) == input.data$n.mrk){ + return(input.data) + } + out.dat <- sample_data(input.data, type = "markers", selected.mrk = names(mrks.id)) + return(out.dat) + } + else{ + na.num <- apply(input.data$geno.dose, 1, function(x,ploidy) sum(x == ploidy+1), ploidy = input.data$ploidy) + perc.na <- na.num/input.data$n.ind + mrks.id <- which(perc.na <= filter.thres) + if(length(mrks.id) == input.data$n.mrk){ + return(input.data) + } + out.dat <- sample_data(input.data, type = "markers", selected.mrk = names(mrks.id)) + return(out.dat) + } +} + +#' Filter individuals based on missing genotypes +#' +#' @param input.data an object of class \code{"mappoly.data"} +#' @param filter.thres maximum percentage of missing data +#' @param inter if \code{TRUE}, expects user-input to proceed with filtering +#' @keywords internal +#' @importFrom magrittr "%>%" +#' @importFrom dplyr filter +#' @importFrom graphics axis +filter_missing_ind <- function(input.data, filter.thres = 0.2, inter = TRUE) +{ + op <- par(pty="s") + on.exit(par(op)) + ANSWER <- "flag" + ind <- NULL + if(interactive() && inter) + { + while(substr(ANSWER, 1, 1) != "y" && substr(ANSWER, 1, 1) != "yes" && substr(ANSWER, 1, 1) != "Y" && ANSWER != "") + { + na.num <- apply(input.data$geno.dose, 2, function(x,ploidy) sum(x == ploidy+1), ploidy = input.data$ploidy) + perc.na <- na.num/input.data$n.mrk + x <- sort(perc.na) + plot(x, + xlab = "offspring", + ylab = "frequency of missing data", + col = ifelse(x <= filter.thres, 4, 2), + pch =ifelse(x <= filter.thres, 1, 4)); + abline(h = filter.thres, lty = 2) + f<-paste0("Filtered out: ", sum(perc.na > filter.thres)) + i<-paste0("Included: ", sum(perc.na <= filter.thres)) + legend("topleft", c(f, i) , col = c(2,4), pch = c(4,1)) + ANSWER <- readline("Enter 'Y/n' to proceed or update the filter threshold: ") + if(substr(ANSWER, 1, 1) == "n" | substr(ANSWER, 1, 1) == "no" | substr(ANSWER, 1, 1) == "N") + stop("You decided to stop the function.") + if(substr(ANSWER, 1, 1) != "y" && substr(ANSWER, 1, 1) != "yes" && substr(ANSWER, 1, 1) != "Y" && ANSWER != "") + filter.thres <- as.numeric(ANSWER) + } + ind.id <- which(perc.na <= filter.thres) + if(length(ind.id) == input.data$n.ind){ + return(input.data) + } + ind <- names(ind.id) + out.dat <- sample_data(input.data, type = "individual", selected.ind = names(ind.id)) + return(out.dat) + } + else{ + na.num <- apply(input.data$geno.dose, 2, function(x,ploidy) sum(x == ploidy+1), ploidy = input.data$ploidy) + perc.na <- na.num/input.data$n.mrk + ind.id <- which(perc.na <= filter.thres) + if(length(ind.id) == input.data$n.ind){ + return(input.data) + } + ind <- names(ind.id) + out.dat <- sample_data(input.data, type = "individual", selected.ind = names(ind.id)) + return(out.dat) + } +} + +#' Filter markers based on chi-square test +#' +#' This function filter markers based on p-values of a chi-square test. +#' The chi-square test assumes that markers follow the expected segregation +#' patterns under Mendelian inheritance, random chromosome bivalent +#' pairing and no double reduction. +#' +#' @param input.obj name of input object (class \code{mappoly.data}) +#' +#' @param chisq.pval.thres p-value threshold used for chi-square tests +#' (default = Bonferroni aproximation with global alpha of 0.05, i.e., +#' 0.05/n.mrk) +#' +#' @param inter if TRUE (default), plots distorted vs. non-distorted markers +#' +#' @return An object of class \code{mappoly.chitest.seq} which contains a list with the following components: +#' \item{keep}{markers that follow Mendelian segregation pattern} +#' \item{exclude}{markers with distorted segregation} +#' \item{chisq.pval.thres}{threshold p-value used for chi-square tests} +#' \item{data.name}{input dataset used to perform the chi-square tests} +#' +#'@examples +#' mrks.chi.filt <- filter_segregation(input.obj = tetra.solcap, +#' chisq.pval.thres = 0.05/tetra.solcap$n.mrk, +#' inter = TRUE) +#' seq.init <- make_seq_mappoly(mrks.chi.filt) +#' +#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu} +#' +#' @importFrom graphics axis +#' @export +filter_segregation <- function(input.obj, chisq.pval.thres = NULL, inter = TRUE){ + op <- par(pty="s") + on.exit(par(op)) + if(inherits(input.obj, c("mappoly.data"))){ + chisq.val <- input.obj$chisq.pval + n.mrk <- input.obj$n.mrk + data.name <- as.character(sys.call())[2] + } else if (inherits(input.obj, c("mappoly.sequence"))){ + chisq.val <- input.obj$chisq.pval[input.obj$seq.mrk.names] + n.mrk <- length(input.obj$seq.num) + data.name <- input.obj$data.name + } else { + stop(deparse(substitute(input.obj)), + " is not an object of class 'mappoly.data' or 'mappoly.sequence'") + } + ##Bonferroni approx + if(is.null(chisq.pval.thres)) + chisq.pval.thres <- 0.05/n.mrk + ANSWER <- "flag" + + if(interactive() && inter) + { + while(substr(ANSWER, 1, 1) != "y" && substr(ANSWER, 1, 1) != "yes" && substr(ANSWER, 1, 1) != "Y" && ANSWER != "") + { + x <- log10(sort(chisq.val, decreasing = TRUE)) + th <- log10(chisq.pval.thres) + plot(x, + xlab = "markers", + ylab = bquote(log[10](P)), + col = ifelse(x <= th, 2, 4), + pch =ifelse(x <= th, 4, 1)) + abline(h = th, lty = 2) + f<-paste0("Filtered out: ", sum(x < th)) + i<-paste0("Included: ", sum(x >= th)) + legend("bottomleft", c(f, i) , col = c(2,4), pch = c(4,1)) + ANSWER <- readline("Enter 'Y/n' to proceed or update the p value threshold: ") + if(substr(ANSWER, 1, 1) == "n" | substr(ANSWER, 1, 1) == "no" | substr(ANSWER, 1, 1) == "N") + stop("You decided to stop the function.") + if(substr(ANSWER, 1, 1) != "y" && substr(ANSWER, 1, 1) != "yes" && substr(ANSWER, 1, 1) != "Y" && ANSWER != "") + chisq.pval.thres <- as.numeric(ANSWER) + } + } + keep <- names(which(chisq.val >= chisq.pval.thres)) + exclude <- names(which(chisq.val < chisq.pval.thres)) + structure(list(keep = keep, exclude = exclude, chisq.pval.thres = chisq.pval.thres, + data.name = data.name), + class = "mappoly.chitest.seq") +} + +#' Filter out individuals +#' +#' This function removes individuals from the data set. Individuals can be +#' user-defined or can be accessed via interactive kinship analysis. +#' +#' @param input.data name of input object (class \code{mappoly.data}) +#' +#' @param ind.to.remove individuals to be removed. If \code{NULL} it opens +#' an interactive graphic to proceed with the individual +#' selection +#' @param inter if \code{TRUE}, expects user-input to proceed with filtering +#' +#' @param type A character string specifying the procedure to be used for +#' detecting outlier offspring. Options include "Gmat", which +#' utilizes the genomic kinship matrix, and "PCA", which employs +#' principal component analysis on the dosage matrix. +#' +#' coefficient (or covariance) is to be computed. One of "pearson" (default), "kendall", or "spearman": can be abbreviated. +#' +#' @param verbose if \code{TRUE} (default), shows the filtered out individuals +#' +#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu} +#' +#' @importFrom stats prcomp +#' @export +filter_individuals <- function(input.data, + ind.to.remove = NULL, + inter = TRUE, + type = c("Gmat", "PCA"), + verbose = TRUE){ + if (!inherits(input.data, "mappoly.data")) { + stop(deparse(substitute(input.data)), " is not an object of class 'mappoly.data'") + } + type <- match.arg(type) + op <- par(pty="s") + on.exit(par(op)) + D <- t(input.data$geno.dose) + D[D == input.data$ploidy+1] <- NA + D <- rbind(input.data$dosage.p1, input.data$dosage.p2, D) + rownames(D)[1:2] <- c("P1", "P2") + if(type == "Gmat"){ + G <- AGHmatrix::Gmatrix(D, method = "VanRaden",ploidy = input.data$ploidy) + x <- G[1,] + y <- G[2,] + df <- data.frame(x = x, y = y, type = c(2, 2, rep(4, length(x)-2))) + plot(df[,1:2], col = df$type, pch = 19, + xlab = "relationships between the offspring and P1", + ylab = "relationships between the offspring and P2") + abline(c(0,1), lty = 2) + abline(c(-0.4,1), lty = 2, col = "gray") + abline(c(0.4,1), lty = 2, col = "gray") + legend("topright", c("Parents", "Offspring") , col = c(2,4), pch = 19) + if(!is.null(ind.to.remove)){ + out.data <- sample_data(input.data, selected.ind = setdiff(input.data$ind.names, ind.to.remove)) + return(out.data) + } + if(interactive() && inter) + { + # if (!require(gatepoints)) + # stop("Please install package 'gatepoints' to proceed") + ANSWER <- readline("Enter 'Y/n' to proceed with interactive filtering or quit: ") + if(substr(ANSWER, 1, 1) == "y" | substr(ANSWER, 1, 1) == "yes" | substr(ANSWER, 1, 1) == "Y" | ANSWER == "") + { + ind.to.remove <- gatepoints::fhs(df, mark = TRUE) + ind.to.remove <- setdiff(ind.to.remove, c("P1", "P2")) + ind.to.include <- setdiff(rownames(df)[-c(1:2)], ind.to.remove) + if(verbose){ + cat("Removing individual(s): \n") + print(ind.to.remove) + cat("...\n") + } + if(length(ind.to.remove) == 0){ + warning("No individuals removed. Returning original data set.") + return(input.data) + } + out.data <- sample_data(input.data, selected.ind = ind.to.include) + return(out.data) + } else{ + warning("No individuals removed. Returning original data set.") + return(input.data) + } + } + } + else{ + row_means <- rowMeans(D, na.rm = TRUE) + for (i in 1:nrow(D)) + D[i, is.na(D[i, ])] <- row_means[i] + pc <- prcomp(D) + x <- pc$x[,"PC1"] + y <- pc$x[,"PC2"] + a <- diff(range(x))*0.05 + b <- diff(range(y))*0.05 + df <- data.frame(x = x, y = y, type = c(2, 2, rep(4, length(x)-2))) + plot(df[,1:2], col = df$type, pch = 19, + xlab = "PC1", + ylab = "PC2", + xlim = c(min(x)-a, max(x)+a), + ylim = c(min(y)-b, max(y)+b)) + points(df[1:2,1:2], col = 2, pch = 19) + legend("bottomleft", c("Parents", "Offspring") , col = c(2,4), pch = 19) + if(!is.null(ind.to.remove)){ + out.data <- sample_data(input.data, selected.ind = setdiff(input.data$ind.names, ind.to.remove)) + return(out.data) + } + if(interactive() && inter) + { + ANSWER <- readline("Enter 'Y/n' to proceed with interactive filtering or quit: ") + if(substr(ANSWER, 1, 1) == "y" | substr(ANSWER, 1, 1) == "yes" | substr(ANSWER, 1, 1) == "Y" | ANSWER == "") + { + ind.to.remove <- gatepoints::fhs(df, mark = TRUE) + ind.to.remove <- setdiff(ind.to.remove, c("P1", "P2")) + ind.to.include <- setdiff(rownames(df)[-c(1:2)], ind.to.remove) + if(verbose){ + cat("Removing individual(s): \n") + print(ind.to.remove) + cat("...\n") + } + if(length(ind.to.remove) == 0){ + warning("No individuals removed. Returning original data set.") + return(input.data) + } + out.data <- sample_data(input.data, selected.ind = ind.to.include) + return(out.data) + } else{ + warning("No individuals removed. Returning original data set.") + return(input.data) + } + } + } +} +#' Remove markers that do not meet a LOD criteria +#' +#' Remove markers that do not meet a LOD and recombination fraction +#' criteria for at least a percentage of the pairwise marker +#' combinations. It also removes markers with strong evidence of +#' linkage across the whole linkage group (false positive). +#' +#' \code{thresh.LOD.ph} should be set in order to only select +#' recombination fractions that have LOD scores associated to the +#' linkage phase configuration higher than \code{thresh_LOD_ph} +#' when compared to the second most likely linkage phase configuration. +#' That action usually eliminates markers that are unlinked to the +#' set of analyzed markers. +#' +#' @param input.twopt an object of class \code{mappoly.twopt} +#' +#' @param thresh.LOD.ph LOD score threshold for linkage phase configuration +#' (default = 5) +#' +#' @param thresh.LOD.rf LOD score threshold for recombination fraction +#' (default = 5) +#' +#' @param thresh.rf threshold for recombination fractions (default = 0.15) +#' +#' @param probs indicates the probability corresponding to the filtering +#' quantiles. (default = c(0.05, 1)) +#' +#' @param diag.markers A window where marker pairs should be considered. +#' If NULL (default), all markers are considered. +#' +#' @param mrk.order marker order. Only has effect if 'diag.markers' is not NULL +#' +#' @param ncpus number of parallel processes (i.e. cores) to spawn +#' (default = 1) +#' +#' @param diagnostic.plot if \code{TRUE} produces a diagnostic plot +#' +#' @param breaks number of cells for the histogram +#' +#' @return A filtered object of class \code{mappoly.sequence}. +#' See \code{\link[mappoly]{make_seq_mappoly}} for details +#' +#' @examples +#' all.mrk <- make_seq_mappoly(hexafake, 1:20) +#' red.mrk <- elim_redundant(all.mrk) +#' unique.mrks <- make_seq_mappoly(red.mrk) +#' all.pairs <- est_pairwise_rf(input.seq = unique.mrks, +#' ncpus = 1, +#' verbose = TRUE) +#' +#' ## Full recombination fraction matrix +#' mat.full <- rf_list_to_matrix(input.twopt = all.pairs) +#' plot(mat.full) +#' +#' ## Removing disruptive SNPs +#' tpt.filt <- rf_snp_filter(all.pairs, 2, 2, 0.07, probs = c(0.15, 1)) +#' p1.filt <- make_pairs_mappoly(input.seq = tpt.filt, input.twopt = all.pairs) +#' m1.filt <- rf_list_to_matrix(input.twopt = p1.filt) +#' plot(mat.full, main.text = "LG1") +#' plot(m1.filt, main.text = "LG1.filt") +#' +#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu} with updates by Gabriel Gesteira, \email{gdesiqu@ncsu.edu} +#' +#' @references +#' Mollinari, M., and Garcia, A. A. F. (2019) Linkage +#' analysis and haplotype phasing in experimental autopolyploid +#' populations with high ploidy level using hidden Markov +#' models, _G3: Genes, Genomes, Genetics_. +#' \doi{10.1534/g3.119.400378} +#' +#' @export rf_snp_filter +#' @importFrom ggplot2 ggplot geom_histogram aes scale_fill_manual xlab ggtitle +#' @importFrom graphics hist +rf_snp_filter <- function(input.twopt, + thresh.LOD.ph = 5, + thresh.LOD.rf = 5, + thresh.rf = 0.15, + probs = c(0.05, 1), + diag.markers = NULL, + mrk.order = NULL, + ncpus = 1L, + diagnostic.plot = TRUE, + breaks = 100) +{ + + input_classes <- c("mappoly.twopt", "mappoly.twopt2") + if (!inherits(input.twopt, input_classes)) { + stop(deparse(substitute(input.twopt)), paste0(" is not an object of class ", paste0(input_classes, collapse = " or "))) + } + probs <- range(probs) + ## Getting filtered rf matrix + rf_mat <- rf_list_to_matrix(input.twopt = input.twopt, thresh.LOD.ph = thresh.LOD.ph, + thresh.LOD.rf = thresh.LOD.rf, thresh.rf = thresh.rf, + ncpus = ncpus, verbose = FALSE) + M <- rf_mat$rec.mat + if(!is.null(mrk.order)) + M <- M[mrk.order, mrk.order] + if(!is.null(diag.markers)) + M[abs(col(M) - row(M)) > diag.markers] <- NA + x <- apply(M, 1, function(x) sum(!is.na(x))) + w <- hist(x, breaks = breaks, plot = FALSE) + th <- quantile(x, probs = probs) + rem <- c(which(x < th[1]), which(x > th[2])) + ids <- names(which(x >= th[1] & x <= th[2])) + value <- type <- NULL + if(diagnostic.plot){ + d <- rbind(data.frame(type = "original", value = x), + data.frame(type = "filtered", value = x[ids])) + p <- ggplot2::ggplot(d, ggplot2::aes(value)) + + ggplot2::geom_histogram(ggplot2::aes(fill = type), + alpha = 0.4, position = "identity", binwidth = diff(w$mids)[1]) + + ggplot2::scale_fill_manual(values = c("#00AFBB", "#E7B800")) + + ggplot2::ggtitle( paste0("Filtering probs: [", probs[1], " : ", probs[2], "] - Non NA values by row in rf matrix - b width: ", diff(w$mids)[1])) + + ggplot2::xlab(paste0("Non 'NA' values at LOD.ph = ", thresh.LOD.ph, ", LOD.rf = ", thresh.LOD.rf, ", and thresh.rf = ", thresh.rf)) + print(p) + } + ## Returning sequence object + ch_filt <- make_seq_mappoly(input.obj = get(input.twopt$data.name, pos = 1), arg = ids, data.name = input.twopt$data.name) + return(ch_filt) +} + +#' Edit sequence ordered by reference genome positions +#' comparing to another set order +#' +#' @param input.seq object of class mappoly.sequence with alternative order (not genomic order) +#' @param invert vector of marker names to be inverted +#' @param remove vector of marker names to be removed +#' +#' @author Cristiane Taniguti, \email{chtaniguti@tamu.edu} +#' +#' @examples +#' \donttest{ +#' dat <- filter_segregation(tetra.solcap, inter = FALSE) +#' seq_dat <- make_seq_mappoly(dat) +#' seq_chr <- make_seq_mappoly(seq_dat, arg = seq_dat$seq.mrk.names[which(seq_dat$chrom=="1")]) +#' +#' tpt <- est_pairwise_rf(seq_chr) +#' seq.filt <- rf_snp_filter(tpt, probs = c(0.05, 0.95)) +#' mat <- rf_list_to_matrix(tpt) +#' mat2 <- make_mat_mappoly(mat, seq.filt) +#' +#' seq_test_mds <- mds_mappoly(mat2) +#' seq_mds <- make_seq_mappoly(seq_test_mds) +#' edit_seq <- edit_order(input.seq = seq_mds) +#' } +#' +#' @return object of class \code{mappoly.edit.order}: a list containing +#' vector of marker names ordered according to editions (`edited_order`); +#' vector of removed markers names (`removed`); +#' vector of inverted markers names (`inverted`). +#' +#' @export +edit_order <- function(input.seq, invert= NULL, remove = NULL){ + + if (!inherits(input.seq, "mappoly.sequence")) { + stop(deparse(substitute(input.seq)), " is not an object of class 'mappoly.sequence'") + } + + get_weird <- data.frame(x = 1:length(input.seq$genome.pos), + y = input.seq$genome.pos) + + rownames(get_weird) <- input.seq$seq.mrk.names + get_weird <- get_weird[order(get_weird$y),] + plot(get_weird$x, get_weird$y, xlab="input sequence order", ylab = "genomic position (bp)") + + inverted <- removed <- vector() + if(!is.null(invert) | !is.null(remove)){ + if(!is.null(invert)){ + inverted <- c(inverted, as.vector(invert)) + repl <- get_weird[rev(match(as.vector(invert),rownames(get_weird))),] + get_weird[match(as.vector(invert),rownames(get_weird)),2] <- repl[,2] + rownames(get_weird)[match(as.vector(invert), rownames(get_weird))] <- rownames(repl) + } + if(!is.null(remove)){ + removed <- c(removed, as.vector(remove)) + get_weird <- get_weird[-match(remove, rownames(get_weird)),] + } + plot(get_weird$x, get_weird$y, xlab="input sequence order", ylab = "genomic position (bp)") + } else { + cat("Mark at least three points on the plot and press `Esc` to continue.") + if(interactive()){ + ANSWER <- "Y" + while(substr(ANSWER, 1, 1) == "y" | substr(ANSWER, 1, 1) == "yes" | substr(ANSWER, 1, 1) == "Y" | ANSWER == ""){ + plot(get_weird$x, get_weird$y, xlab="input sequence order", ylab = "genomic position (bp)") + mks.to.remove <- gatepoints::fhs(get_weird, mark = TRUE) + if(length(which(rownames(get_weird) %in% mks.to.remove)) > 0){ + ANSWER2 <- readline("Enter 'invert/remove' to proceed with the edition: ") + if(ANSWER2 == "invert"){ + inverted <- c(inverted, as.vector(mks.to.remove)) + repl <- get_weird[rev(match(as.vector(mks.to.remove),rownames(get_weird))),] + get_weird[match(as.vector(mks.to.remove),rownames(get_weird)),2] <- repl[,2] + rownames(get_weird)[match(as.vector(mks.to.remove), rownames(get_weird))] <- rownames(repl) + } else { + removed <- c(removed, as.vector(mks.to.remove)) + get_weird <- get_weird[-match(mks.to.remove, rownames(get_weird)),] + } + } + ANSWER <- readline("Enter 'Y/n' to proceed with interactive edition or quit: ") + } + plot(get_weird$x, get_weird$y, xlab="input sequence order", ylab = "genomic position (bp)") + } + } + + return(structure(list(edited_order = rownames(get_weird), + removed = removed, + inverted = inverted, + data.name = input.seq$data.name), class = "mappoly.edit.order")) +} + +#' Filter aneuploid chromosomes from progeny individuals +#' +#' @param input.data name of input object (class \code{mappoly.data}) +#' +#' @param aneuploid.info data.frame with ploidy information by chromosome (columns) for each individual +#' in progeny (rows). The chromosome and individuals names must match the ones in the file used as input +#' in mappoly. +#' +#' @param ploidy main ploidy +#' +#' @param rm_missing remove also genotype information from chromosomes with missing data (NA) in the aneuploid.info file +#' +#' @examples +#' aneuploid.info <- matrix(4, nrow=tetra.solcap$n.ind, ncol = 12) +#' set.seed(8080) +#' aneuploid.info[sample(1:length(aneuploid.info), round((4*length(aneuploid.info))/100),0)] <- 3 +#' aneuploid.info[sample(1:length(aneuploid.info), round((4*length(aneuploid.info))/100),0)] <- 5 +#' +#' colnames(aneuploid.info) <- paste0(1:12) +#' aneuploid.info <- cbind(inds = tetra.solcap$ind.names, aneuploid.info) +#' +#' filt.dat <- filter_aneuploid(input.data = tetra.solcap, +#' aneuploid.info = aneuploid.info, ploidy = 4) +#' +#' @author Cristiane Taniguti, \email{chtaniguti@tamu.edu} +#' +#' @return object of class \code{mappoly.data} +#' +#' @export +filter_aneuploid <- function(input.data, aneuploid.info, ploidy, rm_missing = TRUE){ + + if (!inherits(input.data, "mappoly.data")) { + stop(deparse(substitute(input.data)), " is not an object of class 'mappoly.data'") + } + + aneu.chroms <- colnames(aneuploid.info)[-1] + + keep.ind <- match(input.data$ind.names, aneuploid.info[,1]) + + if(length(keep.ind) != length(input.data$ind.names) | any(is.na(keep.ind))) + stop("The aneuploid information ploidy is missing at least one sample.") + + aneuploid.info.filt <- aneuploid.info[keep.ind,] + + idx.list <- apply(aneuploid.info.filt[,-1], 1, function(x) which(x != ploidy)) + names(idx.list) <- aneuploid.info.filt[,1] + + idx.list <- idx.list[-which(!sapply(idx.list, function(x) length(x) > 0))] + + if(rm_missing){ + idx.list.na <- apply(aneuploid.info.filt[,-1], 1, function(x) which(is.na(x))) + if(length(idx.list.na) > 0){ + names(idx.list.na) <- aneuploid.info.filt[,1] + idx.list.na <- idx.list.na[-which(!sapply(idx.list.na, function(x) length(x) > 0))] + idx.list <- c(idx.list, idx.list.na) + } + cat(round((length(unlist(idx.list))/(dim(aneuploid.info.filt)[1]*(dim(aneuploid.info.filt)[2]-1)))*100,2), "% of the chromosomes x individuals are aneuploids or has missing ploidy information\n") + } else + cat(round((length(unlist(idx.list))/(dim(aneuploid.info.filt)[1]*(dim(aneuploid.info.filt)[2]-1)))*100,2), "% of the chromosomes x individuals are aneuploids\n") + + for(i in 1:length(idx.list)){ + idx <- which(input.data$chrom %in% names(idx.list[[i]])) + input.data$geno.dose[idx,names(idx.list[i])] <- ploidy + 1 + } + + return(input.data) +} + diff --git a/man/filter_individuals.Rd b/man/filter_individuals.Rd index 1f1feae7..ef4495ec 100644 --- a/man/filter_individuals.Rd +++ b/man/filter_individuals.Rd @@ -8,6 +8,7 @@ filter_individuals( input.data, ind.to.remove = NULL, inter = TRUE, + type = c("Gmat", "PCA"), verbose = TRUE ) } @@ -20,6 +21,13 @@ selection} \item{inter}{if \code{TRUE}, expects user-input to proceed with filtering} +\item{type}{A character string specifying the procedure to be used for + detecting outlier offspring. Options include "Gmat", which + utilizes the genomic kinship matrix, and "PCA", which employs + principal component analysis on the dosage matrix. + + coefficient (or covariance) is to be computed. One of "pearson" (default), "kendall", or "spearman": can be abbreviated.} + \item{verbose}{if \code{TRUE} (default), shows the filtered out individuals} } \description{ diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index d9f0e2eb..7e21fe7e 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -1,106 +1,106 @@ -// Generated by using Rcpp::compileAttributes() -> do not edit by hand -// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 - -#include - -using namespace Rcpp; - -#ifdef RCPP_USE_GLOBAL_ROSTREAM -Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); -Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); -#endif - -// vcf_get_probabilities -Rcpp::List vcf_get_probabilities(Rcpp::StringMatrix& mat, int pl_pos); -RcppExport SEXP _mappoly_vcf_get_probabilities(SEXP matSEXP, SEXP pl_posSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Rcpp::StringMatrix& >::type mat(matSEXP); - Rcpp::traits::input_parameter< int >::type pl_pos(pl_posSEXP); - rcpp_result_gen = Rcpp::wrap(vcf_get_probabilities(mat, pl_pos)); - return rcpp_result_gen; -END_RCPP -} -// vcf_transform_dosage -Rcpp::NumericMatrix vcf_transform_dosage(Rcpp::StringMatrix& mat, int gt_pos); -RcppExport SEXP _mappoly_vcf_transform_dosage(SEXP matSEXP, SEXP gt_posSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Rcpp::StringMatrix& >::type mat(matSEXP); - Rcpp::traits::input_parameter< int >::type gt_pos(gt_posSEXP); - rcpp_result_gen = Rcpp::wrap(vcf_transform_dosage(mat, gt_pos)); - return rcpp_result_gen; -END_RCPP -} -// vcf_get_ploidy -Rcpp::NumericMatrix vcf_get_ploidy(Rcpp::StringMatrix& mat, int gt_pos); -RcppExport SEXP _mappoly_vcf_get_ploidy(SEXP matSEXP, SEXP gt_posSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Rcpp::StringMatrix& >::type mat(matSEXP); - Rcpp::traits::input_parameter< int >::type gt_pos(gt_posSEXP); - rcpp_result_gen = Rcpp::wrap(vcf_get_ploidy(mat, gt_pos)); - return rcpp_result_gen; -END_RCPP -} -// vcf_get_depth -Rcpp::NumericMatrix vcf_get_depth(Rcpp::StringMatrix& mat, int dp_pos); -RcppExport SEXP _mappoly_vcf_get_depth(SEXP matSEXP, SEXP dp_posSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< Rcpp::StringMatrix& >::type mat(matSEXP); - Rcpp::traits::input_parameter< int >::type dp_pos(dp_posSEXP); - rcpp_result_gen = Rcpp::wrap(vcf_get_depth(mat, dp_pos)); - return rcpp_result_gen; -END_RCPP -} - -RcppExport SEXP calc_genoprob(void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP calc_genoprob_prior(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP calc_genprob_haplo(void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP calc_genprob_haplo_highprec(void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP calc_genprob_single_parent(void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP est_haplotype_map(void *, void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP est_haplotype_map_highprec(void *, void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP est_hmm_map_single_parent(void *, void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP est_map_hmm(void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP est_map_hmm_highprec(void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP get_counts_single_parent_cpp(void *, void *, void *, void *, void *, void *); -RcppExport SEXP loglike_hmm(void *, void *, void *, void *, void *, void *); -RcppExport SEXP pairwise_rf_estimation_disc(void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP pairwise_rf_estimation_disc_rcpp(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP pairwise_rf_estimation_prob(void *, void *, void *, void *, void *, void *, void *, void *); -RcppExport SEXP poly_hmm_est_CPP(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); - -static const R_CallMethodDef CallEntries[] = { - {"_mappoly_vcf_get_probabilities", (DL_FUNC) &_mappoly_vcf_get_probabilities, 2}, - {"_mappoly_vcf_transform_dosage", (DL_FUNC) &_mappoly_vcf_transform_dosage, 2}, - {"_mappoly_vcf_get_ploidy", (DL_FUNC) &_mappoly_vcf_get_ploidy, 2}, - {"_mappoly_vcf_get_depth", (DL_FUNC) &_mappoly_vcf_get_depth, 2}, - {"calc_genoprob", (DL_FUNC) &calc_genoprob, 7}, - {"calc_genoprob_prior", (DL_FUNC) &calc_genoprob_prior, 12}, - {"calc_genprob_haplo", (DL_FUNC) &calc_genprob_haplo, 8}, - {"calc_genprob_haplo_highprec", (DL_FUNC) &calc_genprob_haplo_highprec, 8}, - {"calc_genprob_single_parent", (DL_FUNC) &calc_genprob_single_parent, 8}, - {"est_haplotype_map", (DL_FUNC) &est_haplotype_map, 9}, - {"est_haplotype_map_highprec", (DL_FUNC) &est_haplotype_map_highprec, 9}, - {"est_hmm_map_single_parent", (DL_FUNC) &est_hmm_map_single_parent, 9}, - {"est_map_hmm", (DL_FUNC) &est_map_hmm, 8}, - {"est_map_hmm_highprec", (DL_FUNC) &est_map_hmm_highprec, 8}, - {"get_counts_single_parent_cpp", (DL_FUNC) &get_counts_single_parent_cpp, 6}, - {"loglike_hmm", (DL_FUNC) &loglike_hmm, 6}, - {"pairwise_rf_estimation_disc", (DL_FUNC) &pairwise_rf_estimation_disc, 7}, - {"pairwise_rf_estimation_disc_rcpp", (DL_FUNC) &pairwise_rf_estimation_disc_rcpp, 13}, - {"pairwise_rf_estimation_prob", (DL_FUNC) &pairwise_rf_estimation_prob, 8}, - {"poly_hmm_est_CPP", (DL_FUNC) &poly_hmm_est_CPP, 13}, - {NULL, NULL, 0} -}; - -RcppExport void R_init_mappoly(DllInfo *dll) { - R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); - R_useDynamicSymbols(dll, FALSE); -} +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + +// vcf_get_probabilities +Rcpp::List vcf_get_probabilities(Rcpp::StringMatrix& mat, int pl_pos); +RcppExport SEXP _mappoly_vcf_get_probabilities(SEXP matSEXP, SEXP pl_posSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::StringMatrix& >::type mat(matSEXP); + Rcpp::traits::input_parameter< int >::type pl_pos(pl_posSEXP); + rcpp_result_gen = Rcpp::wrap(vcf_get_probabilities(mat, pl_pos)); + return rcpp_result_gen; +END_RCPP +} +// vcf_transform_dosage +Rcpp::NumericMatrix vcf_transform_dosage(Rcpp::StringMatrix& mat, int gt_pos); +RcppExport SEXP _mappoly_vcf_transform_dosage(SEXP matSEXP, SEXP gt_posSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::StringMatrix& >::type mat(matSEXP); + Rcpp::traits::input_parameter< int >::type gt_pos(gt_posSEXP); + rcpp_result_gen = Rcpp::wrap(vcf_transform_dosage(mat, gt_pos)); + return rcpp_result_gen; +END_RCPP +} +// vcf_get_ploidy +Rcpp::NumericMatrix vcf_get_ploidy(Rcpp::StringMatrix& mat, int gt_pos); +RcppExport SEXP _mappoly_vcf_get_ploidy(SEXP matSEXP, SEXP gt_posSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::StringMatrix& >::type mat(matSEXP); + Rcpp::traits::input_parameter< int >::type gt_pos(gt_posSEXP); + rcpp_result_gen = Rcpp::wrap(vcf_get_ploidy(mat, gt_pos)); + return rcpp_result_gen; +END_RCPP +} +// vcf_get_depth +Rcpp::NumericMatrix vcf_get_depth(Rcpp::StringMatrix& mat, int dp_pos); +RcppExport SEXP _mappoly_vcf_get_depth(SEXP matSEXP, SEXP dp_posSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::StringMatrix& >::type mat(matSEXP); + Rcpp::traits::input_parameter< int >::type dp_pos(dp_posSEXP); + rcpp_result_gen = Rcpp::wrap(vcf_get_depth(mat, dp_pos)); + return rcpp_result_gen; +END_RCPP +} + +RcppExport SEXP calc_genoprob(void *, void *, void *, void *, void *, void *, void *); +RcppExport SEXP calc_genoprob_prior(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); +RcppExport SEXP calc_genprob_haplo(void *, void *, void *, void *, void *, void *, void *, void *); +RcppExport SEXP calc_genprob_haplo_highprec(void *, void *, void *, void *, void *, void *, void *, void *); +RcppExport SEXP calc_genprob_single_parent(void *, void *, void *, void *, void *, void *, void *, void *); +RcppExport SEXP est_haplotype_map(void *, void *, void *, void *, void *, void *, void *, void *, void *); +RcppExport SEXP est_haplotype_map_highprec(void *, void *, void *, void *, void *, void *, void *, void *, void *); +RcppExport SEXP est_hmm_map_single_parent(void *, void *, void *, void *, void *, void *, void *, void *, void *); +RcppExport SEXP est_map_hmm(void *, void *, void *, void *, void *, void *, void *, void *); +RcppExport SEXP est_map_hmm_highprec(void *, void *, void *, void *, void *, void *, void *, void *); +RcppExport SEXP get_counts_single_parent_cpp(void *, void *, void *, void *, void *, void *); +RcppExport SEXP loglike_hmm(void *, void *, void *, void *, void *, void *); +RcppExport SEXP pairwise_rf_estimation_disc(void *, void *, void *, void *, void *, void *, void *); +RcppExport SEXP pairwise_rf_estimation_disc_rcpp(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); +RcppExport SEXP pairwise_rf_estimation_prob(void *, void *, void *, void *, void *, void *, void *, void *); +RcppExport SEXP poly_hmm_est_CPP(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); + +static const R_CallMethodDef CallEntries[] = { + {"_mappoly_vcf_get_probabilities", (DL_FUNC) &_mappoly_vcf_get_probabilities, 2}, + {"_mappoly_vcf_transform_dosage", (DL_FUNC) &_mappoly_vcf_transform_dosage, 2}, + {"_mappoly_vcf_get_ploidy", (DL_FUNC) &_mappoly_vcf_get_ploidy, 2}, + {"_mappoly_vcf_get_depth", (DL_FUNC) &_mappoly_vcf_get_depth, 2}, + {"calc_genoprob", (DL_FUNC) &calc_genoprob, 7}, + {"calc_genoprob_prior", (DL_FUNC) &calc_genoprob_prior, 12}, + {"calc_genprob_haplo", (DL_FUNC) &calc_genprob_haplo, 8}, + {"calc_genprob_haplo_highprec", (DL_FUNC) &calc_genprob_haplo_highprec, 8}, + {"calc_genprob_single_parent", (DL_FUNC) &calc_genprob_single_parent, 8}, + {"est_haplotype_map", (DL_FUNC) &est_haplotype_map, 9}, + {"est_haplotype_map_highprec", (DL_FUNC) &est_haplotype_map_highprec, 9}, + {"est_hmm_map_single_parent", (DL_FUNC) &est_hmm_map_single_parent, 9}, + {"est_map_hmm", (DL_FUNC) &est_map_hmm, 8}, + {"est_map_hmm_highprec", (DL_FUNC) &est_map_hmm_highprec, 8}, + {"get_counts_single_parent_cpp", (DL_FUNC) &get_counts_single_parent_cpp, 6}, + {"loglike_hmm", (DL_FUNC) &loglike_hmm, 6}, + {"pairwise_rf_estimation_disc", (DL_FUNC) &pairwise_rf_estimation_disc, 7}, + {"pairwise_rf_estimation_disc_rcpp", (DL_FUNC) &pairwise_rf_estimation_disc_rcpp, 13}, + {"pairwise_rf_estimation_prob", (DL_FUNC) &pairwise_rf_estimation_prob, 8}, + {"poly_hmm_est_CPP", (DL_FUNC) &poly_hmm_est_CPP, 13}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_mappoly(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +}