diff --git a/DESCRIPTION b/DESCRIPTION index 3f6f034..e267451 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: PureCN Type: Package Title: Copy number calling and SNV classification using targeted short read sequencing -Version: 1.23.17 +Version: 1.23.18 Date: 2021-08-20 Authors@R: c(person("Markus", "Riester", role = c("aut", "cre"), diff --git a/R/callMutationBurden.R b/R/callMutationBurden.R index b3f4ed5..a25528a 100644 --- a/R/callMutationBurden.R +++ b/R/callMutationBurden.R @@ -1,12 +1,12 @@ #' Call mutation burden -#' +#' #' This function provides detailed mutation burden information. -#' -#' +#' +#' #' @param res Return object of the \code{\link{runAbsoluteCN}} function. -#' @param id Candidate solution to extract mutation burden from. +#' @param id Candidate solution to extract mutation burden from. #' \code{id=1} will use the maximum likelihood solution. -#' @param remove.flagged Remove variants flagged by +#' @param remove.flagged Remove variants flagged by #' \code{\link{predictSomatic}}. #' @param min.prior.somatic Exclude variants with somatic prior #' probability lower than this cutoff. @@ -23,7 +23,7 @@ #' should be counted (\code{TRUE}) or not (\code{FALSE}). Default #' is to keep only single nucleotide variants. #' @param callable \code{GRanges} object with callable genomic regions, -#' for example obtained by \sQuote{GATK CallableLoci} BED file, imported +#' for example obtained by \sQuote{GATK CallableLoci} BED file, imported #' with \code{rtracklayer}. #' @param exclude \code{GRanges} object with genomic regions that #' should be excluded from the \code{callable} regions, for example @@ -33,26 +33,27 @@ #' @author Markus Riester #' @seealso \code{\link{runAbsoluteCN}} \code{\link{predictSomatic}} #' @examples -#' +#' #' data(purecn.example.output) #' callMutationBurden(purecn.example.output) #' #' # To calculate exact mutations per megabase, we can provide a BED #' # file containing all callable regions -#' callableBed <- import(system.file("extdata", "example_callable.bed.gz", +#' callableBed <- import(system.file("extdata", "example_callable.bed.gz", #' package = "PureCN")) #' -#' # We can exclude some regions for mutation burden calculation, +#' # We can exclude some regions for mutation burden calculation, #' # for example intronic regions. -#' exclude <- GRanges(seqnames="chr1", IRanges(start=1, -#' end=max(end(callableBed)))) -#' +#' exclude <- GRanges(seqnames = "chr1", IRanges(start = 1, +#' end = max(end(callableBed)))) +#' #' # We can also exclude specific mutations by filtering the input VCF #' myVcfFilter <- function(vcf) seqnames(vcf)!="chr2" #' -#' callsCallable <- callMutationBurden(purecn.example.output, -#' callable=callableBed, exclude=exclude, fun.countMutation=myVcfFilter) -#' +#' callsCallable <- callMutationBurden(purecn.example.output, +#' callable = callableBed, exclude = exclude, +#' fun.countMutation = myVcfFilter) +#' #' @export callMutationBurden #' @importFrom stats binom.test callMutationBurden <- function(res, id = 1, remove.flagged = TRUE, @@ -68,7 +69,7 @@ callMutationBurden <- function(res, id = 1, remove.flagged = TRUE, } p <- GRanges(predictSomatic(res, id)) - + callableBases <- NA callableBasesOntarget <- NA callableBasesFlanking <- NA @@ -77,11 +78,11 @@ callMutationBurden <- function(res, id = 1, remove.flagged = TRUE, if (!is.null(callable)) { if (!is(callable, "GRanges")) { .stopUserError("callable not a GRanges object.") - } + } if (!is.null(exclude)) { if (!is(exclude, "GRanges")) { .stopUserError("exclude not a GRanges object.") - } + } callable <- setdiff(callable, exclude) } @@ -100,62 +101,62 @@ callMutationBurden <- function(res, id = 1, remove.flagged = TRUE, callableBasesOntarget <- sum(as.numeric(width(reduce(callableOntarget)))) callableBasesFlanking <- sum(as.numeric(width(reduce(callableFlanking)))) p <- p[overlapsAny(p, callable)] - } - - # filter mutations, for example if the user wants to + } + + # filter mutations, for example if the user wants to # calculate missense burden if (!is.null(fun.countMutation)) { if (!is(fun.countMutation, "function")) { - .stopUserError("fun.countMutation not a function.") + .stopUserError("fun.countMutation not a function.") } vcf <- res$input$vcf vcf <- vcf[which(fun.countMutation(vcf))] p <- p[overlapsAny(p, vcf, type = "equal")] - } - - p <- p[p$prior.somatic >= min.prior.somatic & + } + + p <- p[p$prior.somatic >= min.prior.somatic & p$prior.somatic <= max.prior.somatic] - + if (remove.flagged) p <- p[!p$FLAGGED] - + ret <- data.frame( - somatic.ontarget=sum(p$ML.SOMATIC & p$on.target==1 & + somatic.ontarget = sum(p$ML.SOMATIC & p$on.target == 1 & p$CELLFRACTION>min.cellfraction), - somatic.all=sum(p$ML.SOMATIC & p$CELLFRACTION>min.cellfraction), - private.germline.ontarget=sum(!p$ML.SOMATIC & p$on.target==1), - private.germline.all=sum(!p$ML.SOMATIC), - callable.bases.ontarget=callableBasesOntarget, - callable.bases.flanking=callableBasesFlanking, - callable.bases.all=callableBases + somatic.all = sum(p$ML.SOMATIC & p$CELLFRACTION>min.cellfraction), + private.germline.ontarget = sum(!p$ML.SOMATIC & p$on.target == 1), + private.germline.all = sum(!p$ML.SOMATIC), + callable.bases.ontarget = callableBasesOntarget, + callable.bases.flanking = callableBasesFlanking, + callable.bases.all = callableBases ) - + if (!is.na(ret$callable.bases.ontarget) && ret$callable.bases.ontarget > 0) { - delta <- ret$callable.bases.ontarget/1e+6 - ci <- binom.test(x = ret$somatic.ontarget, + delta <- ret$callable.bases.ontarget / 1e+6 + ci <- binom.test(x = ret$somatic.ontarget, n = ret$callable.bases.ontarget)$conf.int ret <- cbind(ret, data.frame( - somatic.rate.ontarget = ret$somatic.ontarget/delta, + somatic.rate.ontarget = ret$somatic.ontarget / delta, somatic.rate.ontarget.95.lower = ci[1] * 1e+6, somatic.rate.ontarget.95.upper = ci[2] * 1e+6 )) # if multiple CI methods were provided, then ret contains multiple rows # now - ci <- binom.test(x = ret$private.germline.ontarget, + ci <- binom.test(x = ret$private.germline.ontarget, n = ret$callable.bases.ontarget)$conf.int ret <- cbind(ret, data.frame( - private.germline.rate.ontarget = ret$private.germline.ontarget/delta, + private.germline.rate.ontarget = ret$private.germline.ontarget / delta, private.germline.rate.ontarget.95.lower = ci[1] * 1e+6, private.germline.rate.ontarget.95.upper = ci[2] * 1e+6 )) - } - + } + ret } .padGranges <- function(target.granges, interval.padding) { target.granges.padding <- target.granges - start(target.granges.padding) <- start(target.granges.padding)-interval.padding - end(target.granges.padding) <- end(target.granges.padding)+interval.padding + start(target.granges.padding) <- start(target.granges.padding) - interval.padding + end(target.granges.padding) <- end(target.granges.padding) + interval.padding return(target.granges.padding) } @@ -164,6 +165,6 @@ callMutationBurden <- function(res, id = 1, remove.flagged = TRUE, flog.info("Estimating callable regions.") if (is(rds$input$log.ratio, "GRanges")) { callable <- reduce(rds$input$log.ratio[rds$input$log.ratio$on.target]) - } + } return(callable) -} +} diff --git a/R/filterIntervals.R b/R/filterIntervals.R index b785643..9e65f50 100644 --- a/R/filterIntervals.R +++ b/R/filterIntervals.R @@ -82,8 +82,7 @@ filterIntervals <- function(normal, tumor, log.ratio, seg.file, min.targeted.base, min.coverage) if (!is.null(normalDB)) { - min.coverage <- 0 - normal$average.coverage[is.na(normal$average.coverage)] <- min.coverage + min.coverage <- min.coverage / 10000 flog.info("normalDB provided. Setting minimum coverage for segmentation to %.4fX.", min.coverage) } else { flog.warn("No normalDB provided. Provide one for better results.") diff --git a/man/callMutationBurden.Rd b/man/callMutationBurden.Rd index 377422c..46befed 100644 --- a/man/callMutationBurden.Rd +++ b/man/callMutationBurden.Rd @@ -19,10 +19,10 @@ callMutationBurden( \arguments{ \item{res}{Return object of the \code{\link{runAbsoluteCN}} function.} -\item{id}{Candidate solution to extract mutation burden from. +\item{id}{Candidate solution to extract mutation burden from. \code{id=1} will use the maximum likelihood solution.} -\item{remove.flagged}{Remove variants flagged by +\item{remove.flagged}{Remove variants flagged by \code{\link{predictSomatic}}.} \item{min.prior.somatic}{Exclude variants with somatic prior @@ -44,7 +44,7 @@ should be counted (\code{TRUE}) or not (\code{FALSE}). Default is to keep only single nucleotide variants.} \item{callable}{\code{GRanges} object with callable genomic regions, -for example obtained by \sQuote{GATK CallableLoci} BED file, imported +for example obtained by \sQuote{GATK CallableLoci} BED file, imported with \code{rtracklayer}.} \item{exclude}{\code{GRanges} object with genomic regions that @@ -65,19 +65,20 @@ callMutationBurden(purecn.example.output) # To calculate exact mutations per megabase, we can provide a BED # file containing all callable regions -callableBed <- import(system.file("extdata", "example_callable.bed.gz", +callableBed <- import(system.file("extdata", "example_callable.bed.gz", package = "PureCN")) -# We can exclude some regions for mutation burden calculation, +# We can exclude some regions for mutation burden calculation, # for example intronic regions. -exclude <- GRanges(seqnames="chr1", IRanges(start=1, - end=max(end(callableBed)))) +exclude <- GRanges(seqnames = "chr1", IRanges(start = 1, + end = max(end(callableBed)))) # We can also exclude specific mutations by filtering the input VCF myVcfFilter <- function(vcf) seqnames(vcf)!="chr2" -callsCallable <- callMutationBurden(purecn.example.output, - callable=callableBed, exclude=exclude, fun.countMutation=myVcfFilter) +callsCallable <- callMutationBurden(purecn.example.output, + callable = callableBed, exclude = exclude, + fun.countMutation = myVcfFilter) } \seealso{