diff --git a/DESCRIPTION b/DESCRIPTION index 941b3d3..58d8f6e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: PureCN Type: Package Title: Copy number calling and SNV classification using targeted short read sequencing -Version: 1.23.12 +Version: 1.23.13 Date: 2021-08-12 Authors@R: c(person("Markus", "Riester", role = c("aut", "cre"), diff --git a/NEWS b/NEWS index 00fd636..0183dd2 100755 --- a/NEWS +++ b/NEWS @@ -10,6 +10,9 @@ SIGNIFICANT USER-VISIBLE CHANGES sometimes exceeded the noise. o Added min.variants argument to runAbsoluteCN o Added PureCN version to runAbsoluteCN results object (ret$version) + o Do not attempt two-step segmentation in PSCBS when off-target noise is still + very small (< 0.15). Happens with super clean data and can lead to over-segmentation + due to other biases. BUGFIXES diff --git a/R/runAbsoluteCN.R b/R/runAbsoluteCN.R index 08f1d91..0b11a19 100644 --- a/R/runAbsoluteCN.R +++ b/R/runAbsoluteCN.R @@ -572,7 +572,7 @@ runAbsoluteCN <- function(normal.coverage.file = NULL, seg = segProvided, plot.cnv = plot.cnv, sampleid = sampleid, vcf = vcf.germline, tumor.id.in.vcf = tumor.id.in.vcf, normal.id.in.vcf = normal.id.in.vcf, max.segments = max.segments, chr.hash = chr.hash, - centromeres = centromeres), args.segmentation) + min.logr.sdev = min.logr.sdev, centromeres = centromeres), args.segmentation) vcf.germline <- NULL seg <- do.call(fun.segmentation, diff --git a/R/segmentationCBS.R b/R/segmentationCBS.R index 34282c0..2aa9613 100644 --- a/R/segmentationCBS.R +++ b/R/segmentationCBS.R @@ -27,6 +27,9 @@ #' @param normal.id.in.vcf Id of normal in in VCF. Currently not used. #' @param max.segments If not \code{NULL}, try a higher \code{undo.SD} #' parameter if number of segments exceeds the threshold. +#' @param min.logr.sdev Minimum log-ratio standard deviation used in the +#' model. Useful to make fitting more robust to outliers in very clean +#' data (currently not used in this segmentation function). #' @param prune.hclust.h Height in the \code{hclust} pruning step. Increasing #' this value will merge segments more aggressively. If NULL, try to find a #' sensible default. @@ -76,7 +79,8 @@ segmentationCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, sampleid, weight.flag.pvalue = 0.01, alpha = 0.005, undo.SD = NULL, vcf = NULL, tumor.id.in.vcf = 1, normal.id.in.vcf = NULL, - max.segments = NULL, prune.hclust.h = NULL, prune.hclust.method = "ward.D", + max.segments = NULL, min.logr.sdev = 0.15, + prune.hclust.h = NULL, prune.hclust.method = "ward.D", chr.hash = NULL, additional.cmd.args = "", centromeres = NULL) { if (is.null(chr.hash)) chr.hash <- .getChrHash(seqlevels(tumor)) diff --git a/R/segmentationGATK4.R b/R/segmentationGATK4.R index 4228497..bc23004 100644 --- a/R/segmentationGATK4.R +++ b/R/segmentationGATK4.R @@ -18,6 +18,9 @@ #' @param tumor.id.in.vcf Id of tumor in case multiple samples are stored in #' VCF. #' @param normal.id.in.vcf Id of normal in in VCF. Currently not used. +#' @param min.logr.sdev Minimum log-ratio standard deviation used in the +#' model. Useful to make fitting more robust to outliers in very clean +#' data (currently not used in this segmentation function). #' @param prune.hclust.h Ignored in this function. #' @param prune.hclust.method Ignored in this function. #' @param changepoints.penality The \code{--number-of-changepoints-penalty-factor}. @@ -59,7 +62,7 @@ #' @export segmentationGATK4 segmentationGATK4 <- function(normal, tumor, log.ratio, seg, vcf = NULL, tumor.id.in.vcf = 1, normal.id.in.vcf = NULL, - prune.hclust.h = NULL, prune.hclust.method = NULL, + min.logr.sdev = 0.15, prune.hclust.h = NULL, prune.hclust.method = NULL, changepoints.penality = NULL, additional.cmd.args = "", chr.hash = NULL, ...) { diff --git a/R/segmentationHclust.R b/R/segmentationHclust.R index 6c5e868..7ad3194 100644 --- a/R/segmentationHclust.R +++ b/R/segmentationHclust.R @@ -17,6 +17,9 @@ #' @param tumor.id.in.vcf Id of tumor in case multiple samples are stored in #' VCF. #' @param normal.id.in.vcf Id of normal in in VCF. Currently not used. +#' @param min.logr.sdev Minimum log-ratio standard deviation used in the +#' model. Useful to make fitting more robust to outliers in very clean +#' data (currently not used in this segmentation function). #' @param prune.hclust.h Height in the \code{hclust} pruning step. Increasing #' this value will merge segments more aggressively. If NULL, try to find a #' sensible default. @@ -50,7 +53,7 @@ #' @export segmentationHclust segmentationHclust <- function(seg, vcf = NULL, tumor.id.in.vcf = 1, normal.id.in.vcf = NULL, - prune.hclust.h = NULL, prune.hclust.method = "ward.D", + min.logr.sdev = 0.15, prune.hclust.h = NULL, prune.hclust.method = "ward.D", chr.hash = NULL, ...) { if (is.null(seg)) { .stopUserError("segmentationHclust requires an input segmentation.") diff --git a/R/segmentationPSCBS.R b/R/segmentationPSCBS.R index b084583..1b811d3 100644 --- a/R/segmentationPSCBS.R +++ b/R/segmentationPSCBS.R @@ -1,11 +1,11 @@ #' PSCBS segmentation -#' +#' #' Alternative segmentation function using the \code{PSCBS} package. This #' function is called via the \code{fun.segmentation} argument of #' \code{\link{runAbsoluteCN}}. The arguments are passed via #' \code{args.segmentation}. -#' -#' +#' +#' #' @param normal Coverage data for normal sample. Ignored in this function. #' @param tumor Coverage data for tumor sample. #' @param log.ratio Copy number log-ratios, one for each exon in coverage file. @@ -26,64 +26,67 @@ #' @param vcf Optional VCF object with germline allelic ratios. #' @param tumor.id.in.vcf Id of tumor in case multiple samples are stored in #' VCF. -#' @param normal.id.in.vcf Id of normal in in VCF. If \code{NULL}, +#' @param normal.id.in.vcf Id of normal in in VCF. If \code{NULL}, #' use unpaired PSCBS. -#' @param max.segments If not \code{NULL}, try a higher \code{undo.SD} +#' @param max.segments If not \code{NULL}, try a higher \code{undo.SD} #' parameter if number of segments exceeds the threshold. #' @param boost.on.target.max.size When off-target regions are noisy #' compared to on-target, try to find small segments of specified #' maximum size that might be missed to due the increased noise. #' Set to 0 to turn boosting off. +#' @param min.logr.sdev Minimum log-ratio standard deviation used in the +#' model. Useful to make fitting more robust to outliers in very clean +#' data. #' @param prune.hclust.h Height in the \code{hclust} pruning step. Increasing -#' this value will merge segments more aggressively. If \code{NULL}, try to +#' this value will merge segments more aggressively. If \code{NULL}, try to #' find a sensible default. #' @param prune.hclust.method Cluster method used in the \code{hclust} pruning #' step. See documentation for the \code{hclust} function. #' @param chr.hash Mapping of non-numerical chromsome names to numerical names -#' (e.g. chr1 to 1, chr2 to 2, etc.). If \code{NULL}, assume chromsomes are +#' (e.g. chr1 to 1, chr2 to 2, etc.). If \code{NULL}, assume chromsomes are #' properly ordered. #' @param additional.cmd.args \code{character(1)}. Ignored. #' @param centromeres A \code{GRanges} with centromere positions. -#' If not \code{NULL}, add breakpoints at centromeres. -#' @param \dots Additional parameters passed to the +#' If not \code{NULL}, add breakpoints at centromeres. +#' @param \dots Additional parameters passed to the #' \code{segmentByNonPairedPSCBS} function. #' @return \code{data.frame} containing the segmentation. #' @author Markus Riester -#' @references Olshen, A. B., Venkatraman, E. S., Lucito, R., Wigler, M. -#' (2004). Circular binary segmentation for the analysis of array-based DNA +#' @references Olshen, A. B., Venkatraman, E. S., Lucito, R., Wigler, M. +#' (2004). Circular binary segmentation for the analysis of array-based DNA #' copy number data. Biostatistics 5: 557-572. #' -#' Venkatraman, E. S., Olshen, A. B. (2007). A faster circular binary -#' segmentation algorithm for the analysis of array CGH data. Bioinformatics +#' Venkatraman, E. S., Olshen, A. B. (2007). A faster circular binary +#' segmentation algorithm for the analysis of array CGH data. Bioinformatics #' 23: 657-63. #' -#' Olshen et al. (2011). Parent-specific copy number in paired tumor-normal +#' Olshen et al. (2011). Parent-specific copy number in paired tumor-normal #' studies using circular binary segmentation. Bioinformatics. #' @seealso \code{\link{runAbsoluteCN}} #' @examples -#' -#' normal.coverage.file <- system.file("extdata", "example_normal_tiny.txt", -#' package="PureCN") -#' tumor.coverage.file <- system.file("extdata", "example_tumor_tiny.txt", -#' package="PureCN") +#' +#' normal.coverage.file <- system.file("extdata", "example_normal_tiny.txt", +#' package = "PureCN") +#' tumor.coverage.file <- system.file("extdata", "example_tumor_tiny.txt", +#' package = "PureCN") #' vcf.file <- system.file("extdata", "example.vcf.gz", -#' package="PureCN") -#' +#' package = "PureCN") +#' #' # The max.candidate.solutions, max.ploidy and test.purity parameters are set to #' # non-default values to speed-up this example. This is not a good idea for real #' # samples. -#' ret <-runAbsoluteCN(normal.coverage.file=normal.coverage.file, -#' tumor.coverage.file=tumor.coverage.file, vcf.file=vcf.file, -#' sampleid="Sample1", genome="hg19", -#' fun.segmentation=segmentationPSCBS, max.ploidy=4, -#' test.purity=seq(0.3,0.7,by=0.05), max.candidate.solutions=1) -#' +#' ret <-runAbsoluteCN(normal.coverage.file = normal.coverage.file, +#' tumor.coverage.file = tumor.coverage.file, vcf.file = vcf.file, +#' sampleid = "Sample1", genome = "hg19", +#' fun.segmentation = segmentationPSCBS, max.ploidy = 4, +#' test.purity = seq(0.3, 0.7, by = 0.05), max.candidate.solutions = 1) +#' #' @export segmentationPSCBS -segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, - sampleid, weight.flag.pvalue = 0.01, alpha = 0.005, +segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, + sampleid, weight.flag.pvalue = 0.01, alpha = 0.005, undo.SD = NULL, flavor = "tcn&dh", tauA = 0.03, vcf = NULL, tumor.id.in.vcf = 1, normal.id.in.vcf = NULL, max.segments = NULL, - boost.on.target.max.size = 30, + boost.on.target.max.size = 30, min.logr.sdev = 0.15, prune.hclust.h = NULL, prune.hclust.method = "ward.D", chr.hash = NULL, additional.cmd.args = "", centromeres = NULL, ...) { @@ -92,13 +95,13 @@ segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, } if (is.null(chr.hash)) chr.hash <- .getChrHash(seqlevels(tumor)) - + use.weights <- FALSE - if (!is.null(tumor$weights) && length(unique(tumor$weights)) > 1 ) { + if (!is.null(tumor$weights) && length(unique(tumor$weights)) > 1) { flog.info("Interval weights found, will use weighted PSCBS.") use.weights <- TRUE } - input <- .PSCBSinput(tumor, log.ratio, vcf, tumor.id.in.vcf, + input <- .PSCBSinput(tumor, log.ratio, vcf, tumor.id.in.vcf, normal.id.in.vcf, chr.hash) knownSegmentsCentromeres <- .PSCBSgetKnownSegments(centromeres, chr.hash) @@ -111,39 +114,41 @@ segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, knownSegments <- knownSegmentsCentromeres if (any(!input$on.target) && boost.on.target.max.size > 0 && - .robustSd(input$CT[input$on.target]) * 1.5 < - .robustSd(input$CT[!input$on.target])) { + .robustSd(input$CT[input$on.target]) * 1.5 < + .robustSd(input$CT[!input$on.target]) && + .robustSd(input$CT[!input$on.target]) > min.logr.sdev + ) { flog.info("On-target much cleaner than off-target, finding on-target breakpoints first...") idxot <- input$on.target if (use.weights) { idxot <- input$on.target & input$weights >= median(input$weights, na.rm = TRUE) if (!is.null(input$mappability) && any(!is.na(input$mappability))) { idxot <- idxot & !is.na(input$mappability) & input$mappability > 0.9 - } + } } flog.info("Using %i high quality (out of %i) on-target intervals for initial breakpoint calculation.", - sum(idxot), sum(input$on.target)) - segPSCBSot <- PSCBS::segmentByNonPairedPSCBS(input[idxot,], tauA=tauA, - flavor=flavor, undoTCN=undo.SD, knownSegments=knownSegments, - min.width=3,alphaTCN=alpha/2, ...) + sum(idxot), sum(input$on.target)) + segPSCBSot <- PSCBS::segmentByNonPairedPSCBS(input[idxot, ], tauA = tauA, + flavor = flavor, undoTCN = undo.SD, knownSegments = knownSegments, + min.width = 3,alphaTCN = alpha / 2, ...) segot <- .PSCBSoutput2DNAcopy(segPSCBSot, sampleid) if (!is.null(vcf)) { - segot <- .pruneByHclust(segot, vcf, tumor.id.in.vcf, h=prune.hclust.h, - method=prune.hclust.method, chr.hash=chr.hash) + segot <- .pruneByHclust(segot, vcf, tumor.id.in.vcf, h = prune.hclust.h, + method = prune.hclust.method, chr.hash = chr.hash) } - segot <- segot[segot$num.mark > 3 & + segot <- segot[segot$num.mark > 3 & segot$num.mark <= boost.on.target.max.size, 2:4] colnames(segot) <- colnames(knownSegments)[1:3] knownSegments <- .PSCBSgetKnownSegments(centromeres, chr.hash, segot) - } - segPSCBS <- PSCBS::segmentByNonPairedPSCBS(input, tauA=tauA, - flavor=flavor, undoTCN=undo.SD, knownSegments=knownSegments, - min.width=3,alphaTCN=alpha, ...) + } + segPSCBS <- PSCBS::segmentByNonPairedPSCBS(input, tauA = tauA, + flavor = flavor, undoTCN = undo.SD, knownSegments = knownSegments, + min.width = 3,alphaTCN = alpha, ...) try(flog.debug("Kappa: %f", PSCBS::estimateKappa(segPSCBS)), silent = TRUE) seg <- .PSCBSoutput2DNAcopy(segPSCBS, sampleid) if (undo.SD <= 0 || is.null(max.segments) || nrow(seg) < max.segments) break flog.info("Found %i segments, exceeding max.segments threshold of %i.", - nrow(seg), max.segments) + nrow(seg), max.segments) undo.SD <- undo.SD * 1.5 try.again <- try.again + 1 } @@ -152,40 +157,40 @@ segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, segPSCBS <- NULL if (!is.null(vcf)) { - seg <- .pruneByHclust(seg, vcf, tumor.id.in.vcf, h=prune.hclust.h, - method=prune.hclust.method, chr.hash=chr.hash) + seg <- .pruneByHclust(seg, vcf, tumor.id.in.vcf, h = prune.hclust.h, + method = prune.hclust.method, chr.hash = chr.hash) } seg <- .addAverageWeights(seg, weight.flag.pvalue, tumor, chr.hash) seg <- .fixBreakpointsInBaits(tumor, log.ratio, seg, chr.hash) seg } -.PSCBSinput <- function(tumor, log.ratio, vcf, tumor.id.in.vcf, +.PSCBSinput <- function(tumor, log.ratio, vcf, tumor.id.in.vcf, normal.id.in.vcf, chr.hash) { if (is.null(tumor$weights)) tumor$weights <- 1 if (is.null(vcf)) { .stopUserError("segmentationPSCBS requires VCF file.") - } + } ov <- findOverlaps(vcf, tumor) - d.f <- cbind(as.data.frame(tumor[subjectHits(ov)]), - CT = 2 ^ (log.ratio+1)[subjectHits(ov)], - betaT = unlist(geno(vcf[queryHits(ov)])$FA[,tumor.id.in.vcf]), + d.f <- cbind(as.data.frame(tumor[subjectHits(ov)]), + CT = 2 ^ (log.ratio+1)[subjectHits(ov)], + betaT = unlist(geno(vcf[queryHits(ov)])$FA[, tumor.id.in.vcf]), betaN = NA, x = start(vcf[queryHits(ov)]), w = tumor$weights[subjectHits(ov)]) - + if (!is.null(normal.id.in.vcf)) { - d.f$betaN <- unlist(geno(vcf[queryHits(ov)])$FA[,normal.id.in.vcf]) + d.f$betaN <- unlist(geno(vcf[queryHits(ov)])$FA[, normal.id.in.vcf]) } - d.f.2 <- cbind(as.data.frame(tumor[-subjectHits(ov)]), - CT = 2 ^ (log.ratio+1)[-subjectHits(ov)], betaT=NA, betaN=NA, + d.f.2 <- cbind(as.data.frame(tumor[-subjectHits(ov)]), + CT = 2 ^ (log.ratio+1)[-subjectHits(ov)], betaT = NA, betaN = NA, x = start(tumor[-subjectHits(ov)]), w = tumor$weights[-subjectHits(ov)]) d.f <- rbind(d.f, d.f.2) colnames(d.f)[1] <- "chromosome" - d.f <- d.f[order(.strip.chr.name(d.f[,1], chr.hash), d.f$x),] + d.f <- d.f[order(.strip.chr.name(d.f[, 1], chr.hash), d.f$x), ] d.f$chromosome <- .strip.chr.name(d.f$chromosome, chr.hash) if (min(tumor$weights) == max(tumor$weights)) { @@ -199,21 +204,21 @@ segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, if (is.null(centromeres)) return(NULL) knownSegments <- data.frame(centromeres) colnames(knownSegments)[1] <- "chromosome" - knownSegments$length <- knownSegments$end-knownSegments$start+1 + knownSegments$length <- knownSegments$end - knownSegments$start + 1 knownSegments$chromosome <- .strip.chr.name(knownSegments$chromosome, chr.hash) if (!is.null(breakpoints)) { knownSegments <- rbind(knownSegments[, 1:3], breakpoints) - } + } PSCBS::gapsToSegments(knownSegments) } .PSCBSoutput2DNAcopy <- function(seg, sampleid) { - sx <- cbind(ID=sampleid, seg$output[!is.na(seg$output$tcnMean),]) - sx <- sx[,c("ID", "chromosome", "tcnStart", "tcnEnd", "tcnNbrOfLoci", + sx <- cbind(ID = sampleid, seg$output[!is.na(seg$output$tcnMean), ]) + sx <- sx[, c("ID", "chromosome", "tcnStart", "tcnEnd", "tcnNbrOfLoci", "tcnMean")] - colnames(sx) <- c("ID", "chrom", "loc.start", "loc.end", "num.mark", + colnames(sx) <- c("ID", "chrom", "loc.start", "loc.end", "num.mark", "seg.mean") - sx$seg.mean <- log2(sx$seg.mean/2) + sx$seg.mean <- log2(sx$seg.mean / 2) sx } diff --git a/man/segmentationCBS.Rd b/man/segmentationCBS.Rd index 44b83bf..3217989 100644 --- a/man/segmentationCBS.Rd +++ b/man/segmentationCBS.Rd @@ -18,6 +18,7 @@ segmentationCBS( tumor.id.in.vcf = 1, normal.id.in.vcf = NULL, max.segments = NULL, + min.logr.sdev = 0.15, prune.hclust.h = NULL, prune.hclust.method = "ward.D", chr.hash = NULL, @@ -61,6 +62,10 @@ VCF.} \item{max.segments}{If not \code{NULL}, try a higher \code{undo.SD} parameter if number of segments exceeds the threshold.} +\item{min.logr.sdev}{Minimum log-ratio standard deviation used in the +model. Useful to make fitting more robust to outliers in very clean +data (currently not used in this segmentation function).} + \item{prune.hclust.h}{Height in the \code{hclust} pruning step. Increasing this value will merge segments more aggressively. If NULL, try to find a sensible default.} diff --git a/man/segmentationGATK4.Rd b/man/segmentationGATK4.Rd index c2f0ba0..8b879d2 100644 --- a/man/segmentationGATK4.Rd +++ b/man/segmentationGATK4.Rd @@ -12,6 +12,7 @@ segmentationGATK4( vcf = NULL, tumor.id.in.vcf = 1, normal.id.in.vcf = NULL, + min.logr.sdev = 0.15, prune.hclust.h = NULL, prune.hclust.method = NULL, changepoints.penality = NULL, @@ -39,6 +40,10 @@ VCF.} \item{normal.id.in.vcf}{Id of normal in in VCF. Currently not used.} +\item{min.logr.sdev}{Minimum log-ratio standard deviation used in the +model. Useful to make fitting more robust to outliers in very clean +data (currently not used in this segmentation function).} + \item{prune.hclust.h}{Ignored in this function.} \item{prune.hclust.method}{Ignored in this function.} diff --git a/man/segmentationHclust.Rd b/man/segmentationHclust.Rd index 3af6858..1758459 100644 --- a/man/segmentationHclust.Rd +++ b/man/segmentationHclust.Rd @@ -9,6 +9,7 @@ segmentationHclust( vcf = NULL, tumor.id.in.vcf = 1, normal.id.in.vcf = NULL, + min.logr.sdev = 0.15, prune.hclust.h = NULL, prune.hclust.method = "ward.D", chr.hash = NULL, @@ -28,6 +29,10 @@ VCF.} \item{normal.id.in.vcf}{Id of normal in in VCF. Currently not used.} +\item{min.logr.sdev}{Minimum log-ratio standard deviation used in the +model. Useful to make fitting more robust to outliers in very clean +data (currently not used in this segmentation function).} + \item{prune.hclust.h}{Height in the \code{hclust} pruning step. Increasing this value will merge segments more aggressively. If NULL, try to find a sensible default.} diff --git a/man/segmentationPSCBS.Rd b/man/segmentationPSCBS.Rd index 34761a7..400bbfc 100644 --- a/man/segmentationPSCBS.Rd +++ b/man/segmentationPSCBS.Rd @@ -21,6 +21,7 @@ segmentationPSCBS( normal.id.in.vcf = NULL, max.segments = NULL, boost.on.target.max.size = 30, + min.logr.sdev = 0.15, prune.hclust.h = NULL, prune.hclust.method = "ward.D", chr.hash = NULL, @@ -63,10 +64,10 @@ function.} \item{tumor.id.in.vcf}{Id of tumor in case multiple samples are stored in VCF.} -\item{normal.id.in.vcf}{Id of normal in in VCF. If \code{NULL}, +\item{normal.id.in.vcf}{Id of normal in in VCF. If \code{NULL}, use unpaired PSCBS.} -\item{max.segments}{If not \code{NULL}, try a higher \code{undo.SD} +\item{max.segments}{If not \code{NULL}, try a higher \code{undo.SD} parameter if number of segments exceeds the threshold.} \item{boost.on.target.max.size}{When off-target regions are noisy @@ -74,15 +75,19 @@ compared to on-target, try to find small segments of specified maximum size that might be missed to due the increased noise. Set to 0 to turn boosting off.} +\item{min.logr.sdev}{Minimum log-ratio standard deviation used in the +model. Useful to make fitting more robust to outliers in very clean +data.} + \item{prune.hclust.h}{Height in the \code{hclust} pruning step. Increasing -this value will merge segments more aggressively. If \code{NULL}, try to +this value will merge segments more aggressively. If \code{NULL}, try to find a sensible default.} \item{prune.hclust.method}{Cluster method used in the \code{hclust} pruning step. See documentation for the \code{hclust} function.} \item{chr.hash}{Mapping of non-numerical chromsome names to numerical names -(e.g. chr1 to 1, chr2 to 2, etc.). If \code{NULL}, assume chromsomes are +(e.g. chr1 to 1, chr2 to 2, etc.). If \code{NULL}, assume chromsomes are properly ordered.} \item{additional.cmd.args}{\code{character(1)}. Ignored.} @@ -90,7 +95,7 @@ properly ordered.} \item{centromeres}{A \code{GRanges} with centromere positions. If not \code{NULL}, add breakpoints at centromeres.} -\item{\dots}{Additional parameters passed to the +\item{\dots}{Additional parameters passed to the \code{segmentByNonPairedPSCBS} function.} } \value{ @@ -104,33 +109,33 @@ function is called via the \code{fun.segmentation} argument of } \examples{ -normal.coverage.file <- system.file("extdata", "example_normal_tiny.txt", - package="PureCN") -tumor.coverage.file <- system.file("extdata", "example_tumor_tiny.txt", - package="PureCN") +normal.coverage.file <- system.file("extdata", "example_normal_tiny.txt", + package = "PureCN") +tumor.coverage.file <- system.file("extdata", "example_tumor_tiny.txt", + package = "PureCN") vcf.file <- system.file("extdata", "example.vcf.gz", - package="PureCN") + package = "PureCN") # The max.candidate.solutions, max.ploidy and test.purity parameters are set to # non-default values to speed-up this example. This is not a good idea for real # samples. - ret <-runAbsoluteCN(normal.coverage.file=normal.coverage.file, - tumor.coverage.file=tumor.coverage.file, vcf.file=vcf.file, - sampleid="Sample1", genome="hg19", - fun.segmentation=segmentationPSCBS, max.ploidy=4, - test.purity=seq(0.3,0.7,by=0.05), max.candidate.solutions=1) + ret <-runAbsoluteCN(normal.coverage.file = normal.coverage.file, + tumor.coverage.file = tumor.coverage.file, vcf.file = vcf.file, + sampleid = "Sample1", genome = "hg19", + fun.segmentation = segmentationPSCBS, max.ploidy = 4, + test.purity = seq(0.3, 0.7, by = 0.05), max.candidate.solutions = 1) } \references{ -Olshen, A. B., Venkatraman, E. S., Lucito, R., Wigler, M. -(2004). Circular binary segmentation for the analysis of array-based DNA +Olshen, A. B., Venkatraman, E. S., Lucito, R., Wigler, M. +(2004). Circular binary segmentation for the analysis of array-based DNA copy number data. Biostatistics 5: 557-572. -Venkatraman, E. S., Olshen, A. B. (2007). A faster circular binary -segmentation algorithm for the analysis of array CGH data. Bioinformatics +Venkatraman, E. S., Olshen, A. B. (2007). A faster circular binary +segmentation algorithm for the analysis of array CGH data. Bioinformatics 23: 657-63. -Olshen et al. (2011). Parent-specific copy number in paired tumor-normal +Olshen et al. (2011). Parent-specific copy number in paired tumor-normal studies using circular binary segmentation. Bioinformatics. } \seealso{