Removed defunct functions and arguments.

ddrichel · Jul 5, 2020 · b5957a1 · b5957a1
1 parent 55f1843
commit b5957a1
Show file tree

Hide file tree

Showing 18 changed files with 78 additions and 260 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -3,7 +3,6 @@
 export(annotateTargets)
 export(bootstrapResults)
 export(calculateBamCoverageByInterval)
-export(calculateGCContentByInterval)
 export(calculateIntervalWeights)
 export(calculateLogRatio)
 export(calculateMappingBiasGatk4)
@@ -19,9 +18,7 @@ export(callMutationBurden)
 export(correctCoverageBias)
 export(createCurationFile)
 export(createNormalDatabase)
-export(createTargetWeights)
 export(filterIntervals)
-export(filterTargets)
 export(filterVcfBasic)
 export(filterVcfMuTect)
 export(filterVcfMuTect2)

diff --git a/NEWS b/NEWS
@@ -5,7 +5,6 @@ NEW FEATURES
 
     o Support for GATK4 GenomicsDB import for mapping bias calculation
 
-
 SIGNIFICANT USER-VISIBLE CHANGES
 
     o We now check if POP_AF or POPAF is -log10 scaled as new Mutect2 versions
@@ -16,6 +15,9 @@ SIGNIFICANT USER-VISIBLE CHANGES
     o Updated Mutect2 failure flags: "strand_bias", "slippage", "weak_evidence",
         "orientation", "haplotype"
     o Removed defunct normal.panel.vcf.file from setMappingBiasVcf
+    o Removed defunct interval.weight.file from segmentationPSCBS,
+      segmentationCBS and processMultipleSamples
+    o Made calculateIntervalWeights defunct
 
 BUGFIXES
 

diff --git a/R/calculateIntervalWeights.R b/R/calculateIntervalWeights.R
@@ -1,122 +1,11 @@
 #' Calculate interval weights
 #' 
-#' Creates an interval weight file useful for segmentation. Requires a set of 
-#' coverage files from normal samples. Interval weights will be
-#' set proportional to the inverse of coverage standard deviation across all
-#' normals. Intervals with high variance in coverage in the pool of normals are
-#' thus down-weighted.
-#' 
 #' This function is now automatically called by \code{\link{createNormalDatabase}}
-#' and is thus deprecated.
+#' and is thus defunct.
 #' 
-#' @param normalDB Database of normal samples, created with
-#' \code{\link{createNormalDatabase}}.
-#' @param interval.weight.file Output filename.
-#' @param top.quantile Cap weight at the specified quantile. Intervals
-#' with standard deviation smaller than this value won't have a higher weight
-#' than intervals at this quantile.
-#' @param plot Diagnostics plot, useful to tune parameters.
-#' @param normal.coverage.files Deprecated.
-#' @return A normalDB object with following slots added
-#' \item{sd$log.ratios}{\code{GRanges} with all log.ratios.}
-#' \item{sd$weights}{\code{GRanges} with interval weights.}
 #' @author Markus Riester
 #' 
 #' @export calculateIntervalWeights
-calculateIntervalWeights <- function(normalDB,
-interval.weight.file = NULL, top.quantile = 0.7, plot = FALSE, 
-normal.coverage.files = NULL) {
-    .Deprecated("createNormalDatabase")
-
-    # TODO, defunct in 1.18
-    old_method <- FALSE
-    if (!is.null(normal.coverage.files) && missing(normalDB)) {
-        flog.warn("normal.coverage.files is deprecated, provide normalDB instead.")
-        old_method = TRUE
-    } else if (class(normalDB) == "character" && all(sapply(normalDB, file.exists)))  {
-        flog.warn("Providing normal coverage files is deprecated. Provide a normalDB instead.")
-        normal.coverage.files <- normalDB
-        old_method = TRUE
-    } else {
-        normal.coverage.files <- normalDB[["normal.coverage.files"]]
-    }    
-    flog.info("Loading coverage data...")
-    normal.coverage <- lapply(normal.coverage.files,  readCoverageFile)
-    .calculateIntervalWeights(normalDB, normal.coverage, interval.weight.file, top.quantile,
-        plot, old_method) 
+calculateIntervalWeights <- function() {
+    .Defunct("createNormalDatabase")
 }    
-
-.calculateIntervalWeights <- function(normalDB, normal.coverage, 
-interval.weight.file = NULL, top.quantile = 0.7, plot = FALSE, 
-old_method = FALSE) {
-
-    tumor.coverage <- list(poolCoverage(normal.coverage, 
-        w = rep(1, length(normal.coverage)) / length(normal.coverage)))
-
-    if (old_method) {
-        lrs <- lapply(tumor.coverage, function(tc) sapply(normal.coverage, 
-                function(nc) calculateLogRatio(nc, tc)))
-    } else {
-        lrs <- lapply(normal.coverage, function(x) calculateTangentNormal(x, normalDB)$log.ratio)
-    }
-
-    lrs <- do.call(cbind, lrs)
-
-    lrs[is.infinite(lrs)] <- NA
-
-    intervals <- normal.coverage[[1]]
-    mcols(intervals) <- NULL
-
-    lrs.sd <- apply(lrs, 1, sd, na.rm = TRUE)
-    lrs.cnt.na <- apply(lrs, 1, function(x) sum(is.na(x)))
-    # get the top.quantile % of sd by chromosome and use this to normalize weight=1
-    chrom <-  as.character(seqnames(intervals))
-    sdCutoffByChr <- sapply(split(lrs.sd, chrom), quantile, probs = top.quantile, 
-        names = FALSE, na.rm = TRUE)[chrom]
-
-    zz <- sdCutoffByChr / lrs.sd
-    zz[zz > 1] <- 1
-    idx <- is.na(zz) | lrs.cnt.na > ncol(lrs) / 3
-    zz[idx] <- min(zz, na.rm = TRUE)
-
-    ret <- list(
-                log.ratios = GRanges(intervals,,, DataFrame(lrs)),
-                weights = GRanges(intervals,,, DataFrame(weights = zz)))
-
-    if (!is.null(interval.weight.file)) {
-        ret_output <- data.frame(
-            Target = as.character(ret$weights), 
-            weights = ret$weights$weights)
-
-        fwrite(ret_output, file = interval.weight.file, row.names = FALSE, 
-                    quote = FALSE, sep = "\t")
-    }
-    if (plot) .plotIntervalWeights(lrs.sd, width(tumor.coverage[[1]]), 
-        tumor.coverage[[1]]$on.target)
-    if (old_method) return(NULL)    
-    normalDB$sd <- ret
-    normalDB
-}
-
-#' Calculate target weights
-#' 
-#' This function is defunct, use \code{\link{calculateIntervalWeights}}
-#' instead.
-#'
-#' @author Markus Riester
-#'
-#' @export createTargetWeights
-createTargetWeights <- function() {
-    .Defunct("calculateIntervalWeights")
-}
-
-.plotIntervalWeights <- function(lrs.sd, width, on.target) {
-    par(mfrow = c(1, 2))
-    plot(width[on.target], lrs.sd[on.target], ylim = c(0,2),
-        xlab = "Interval Width", ylab = "log2 ratio sd.", main = "On-Target")
-    if (sum(!on.target)) {
-        plot(width[!on.target], lrs.sd[!on.target], col = "red", 
-             ylim = c(0, 2), xlab = "Interval Width", ylab = "log2 ratio sd.",
-             main = "Off-Target")
-    }
-}
diff --git a/R/createNormalDatabase.R b/R/createNormalDatabase.R
@@ -352,3 +352,66 @@ min.coverage, max.missing) {
 
     intervals.used
 }
+
+.calculateIntervalWeights <- function(normalDB, normal.coverage, 
+interval.weight.file = NULL, top.quantile = 0.7, plot = FALSE, 
+old_method = FALSE) {
+
+    tumor.coverage <- list(poolCoverage(normal.coverage, 
+        w = rep(1, length(normal.coverage)) / length(normal.coverage)))
+
+    if (old_method) {
+        lrs <- lapply(tumor.coverage, function(tc) sapply(normal.coverage, 
+                function(nc) calculateLogRatio(nc, tc)))
+    } else {
+        lrs <- lapply(normal.coverage, function(x) calculateTangentNormal(x, normalDB)$log.ratio)
+    }
+
+    lrs <- do.call(cbind, lrs)
+
+    lrs[is.infinite(lrs)] <- NA
+
+    intervals <- normal.coverage[[1]]
+    mcols(intervals) <- NULL
+
+    lrs.sd <- apply(lrs, 1, sd, na.rm = TRUE)
+    lrs.cnt.na <- apply(lrs, 1, function(x) sum(is.na(x)))
+    # get the top.quantile % of sd by chromosome and use this to normalize weight=1
+    chrom <-  as.character(seqnames(intervals))
+    sdCutoffByChr <- sapply(split(lrs.sd, chrom), quantile, probs = top.quantile, 
+        names = FALSE, na.rm = TRUE)[chrom]
+
+    zz <- sdCutoffByChr / lrs.sd
+    zz[zz > 1] <- 1
+    idx <- is.na(zz) | lrs.cnt.na > ncol(lrs) / 3
+    zz[idx] <- min(zz, na.rm = TRUE)
+
+    ret <- list(
+                log.ratios = GRanges(intervals,,, DataFrame(lrs)),
+                weights = GRanges(intervals,,, DataFrame(weights = zz)))
+
+    if (!is.null(interval.weight.file)) {
+        ret_output <- data.frame(
+            Target = as.character(ret$weights), 
+            weights = ret$weights$weights)
+
+        fwrite(ret_output, file = interval.weight.file, row.names = FALSE, 
+                    quote = FALSE, sep = "\t")
+    }
+    if (plot) .plotIntervalWeights(lrs.sd, width(tumor.coverage[[1]]), 
+        tumor.coverage[[1]]$on.target)
+    if (old_method) return(NULL)    
+    normalDB$sd <- ret
+    normalDB
+}
+
+.plotIntervalWeights <- function(lrs.sd, width, on.target) {
+    par(mfrow = c(1, 2))
+    plot(width[on.target], lrs.sd[on.target], ylim = c(0,2),
+        xlab = "Interval Width", ylab = "log2 ratio sd.", main = "On-Target")
+    if (sum(!on.target)) {
+        plot(width[!on.target], lrs.sd[!on.target], col = "red", 
+             ylim = c(0, 2), xlab = "Interval Width", ylab = "log2 ratio sd.",
+             main = "Off-Target")
+    }
+}
diff --git a/R/filterIntervals.R b/R/filterIntervals.R
@@ -86,15 +86,6 @@ filterIntervals <- function(normal, tumor, log.ratio, seg.file,
     return(intervalsUsed)
 }
 
-#' Remove low quality targets
-#' 
-#' This function is defunct and was renamed to 
-#' \code{\link{filterIntervals}}.
-#' @export filterTargets
-filterTargets <- function() {
-    .Defunct("filterIntervals")
-}  
-
 .checkNormalDB <- function(tumor, normalDB) {
     if (!is(normalDB, "list")) {
         .stopUserError("normalDB not a valid normalDB object. ",

diff --git a/R/preprocessIntervals.R b/R/preprocessIntervals.R
@@ -140,15 +140,6 @@ preprocessIntervals <- function(interval.file, reference.file,
     invisible(interval.gr)
 }
 
-#' Calculates GC content by interval
-#'
-#' This function was renamed to \code{\link{preprocessIntervals}}.
-#'
-#' @export calculateGCContentByInterval
-calculateGCContentByInterval <- function() {
-    .Defunct("preprocessIntervals")
-}    
-
 # this function removes short chromosomes that have no probes (mainly a
 # general way to remove chrM)
 .dropShortUntargetedSeqLevels <- function(offRegions, interval.gr, minSize) {

diff --git a/R/processMultipleSamples.R b/R/processMultipleSamples.R
@@ -12,8 +12,6 @@
 #' @param genome Genome version, for example hg19. Needed to get centromere
 #' positions.
 #' @param plot.cnv Segmentation plots.
-#' @param interval.weight.file Deprecated. For \code{normalDB} objects generated
-#' with PureCN versions older than 1.16, re-run \code{\link{createNormalDatabase}}.
 #' @param min.interval.weight Can be used to ignore intervals with low weights.
 #' @param w Weight of samples. Can be used to downweight poor quality samples.
 #' If \code{NULL}, sets to inverse of median on-target duplication rate if
@@ -57,7 +55,7 @@
 #' @export processMultipleSamples
 processMultipleSamples <- function(tumor.coverage.files, sampleids, normalDB, 
     num.eigen = 20, genome, plot.cnv = TRUE, w = NULL,
-    interval.weight.file = NULL, min.interval.weight = 1/3,
+    min.interval.weight = 1/3,
     max.segments = NULL, chr.hash = NULL, centromeres = NULL, ...) {
 
     if (!requireNamespace("copynumber", quietly = TRUE)) {
@@ -69,11 +67,6 @@ processMultipleSamples <- function(tumor.coverage.files, sampleids, normalDB,
 
     interval.weights <- NULL
     intervalsUsed <- rep(TRUE, length(tumors[[1]]))
-    # TODO defunct in 1.18
-    if (!is.null(interval.weight.file) && 
-        !is.null(min.interval.weight)) {
-        normalDB <- .add_weights_to_normaldb(interval.weight.file, normalDB)
-    }    
     if (!is.null(normalDB$sd$weights) && !is.null(min.interval.weight)) {
        interval.weights <- normalDB$sd$weights$weights
        if (length(interval.weights) != length(tumors[[1]])) {
@@ -134,18 +127,3 @@ processMultipleSamples <- function(tumor.coverage.files, sampleids, normalDB,
     colnames(m) <- c("ID", "chrom", "loc.start", "loc.end", "num.mark", "seg.mean")
     data.frame(m)
 }
-
-.add_weights_to_normaldb <- function(interval.weight.file, normalDB = NULL) {
-    flog.warn("interval.weight.file is deprecated.")
-    if (!is.null(normalDB$sd$weights)) return(normalDB)
-
-    interval.weights <- read.delim(interval.weight.file, as.is = TRUE)
-    if (!is.null(normalDB)) {
-        interval.weights <- interval.weights[match(normalDB$intervals, 
-            interval.weights[,1]),]
-    }    
-    weights <- GRanges(interval.weights[,1],,, DataFrame(weights = interval.weights[,2]))
-    if (is.null(normalDB)) normalDB <- list()
-    normalDB$sd$weights <- weights
-    normalDB
-}            
diff --git a/R/segmentationCBS.R b/R/segmentationCBS.R
@@ -15,7 +15,6 @@
 #' ignores this user provided segmentation.
 #' @param plot.cnv Segmentation plots.
 #' @param sampleid Sample id, used in output files.
-#' @param interval.weight.file Deprecated.
 #' @param weight.flag.pvalue Flag values with one-sided p-value smaller than
 #' this cutoff.
 #' @param alpha Alpha value for CBS, see documentation for the \code{segment}
@@ -72,18 +71,13 @@
 #' @export segmentationCBS
 #' @importFrom stats t.test hclust cutree dist
 segmentationCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, 
-    sampleid, interval.weight.file = NULL, weight.flag.pvalue = 0.01, alpha = 0.005, 
+    sampleid, weight.flag.pvalue = 0.01, alpha = 0.005, 
     undo.SD = NULL, vcf = NULL, tumor.id.in.vcf = 1, normal.id.in.vcf = NULL,
     max.segments = NULL, prune.hclust.h = NULL, prune.hclust.method = "ward.D",
     chr.hash = NULL, centromeres = NULL) {
 
     if (is.null(chr.hash)) chr.hash <- .getChrHash(seqlevels(tumor))
 
-    # TODO defunct in 1.18
-    if (!is.null(interval.weight.file)) {
-        normalDB <- .add_weights_to_normaldb(interval.weight.file)
-        tumor$weights <- subsetByOverlaps(normalDB$sd$weights, tumor)$weights
-    }
     if (!is.null(tumor$weights) && length(unique(tumor$weights)) > 1 ) {
         flog.info("Interval weights found, will use weighted CBS.")
     }

diff --git a/R/segmentationPSCBS.R b/R/segmentationPSCBS.R
@@ -15,8 +15,6 @@
 #' ignores this user provided segmentation.
 #' @param plot.cnv Segmentation plots.
 #' @param sampleid Sample id, used in output files.
-#' @param interval.weight.file Can be used to assign weights to intervals. 
-#' Currently requires patched version of PSCBS. Deprecated.
 #' @param weight.flag.pvalue Flag values with one-sided p-value smaller than
 #' this cutoff.
 #' @param alpha Alpha value for CBS, see documentation for the \code{segment}
@@ -77,7 +75,7 @@
 #' 
 #' @export segmentationPSCBS
 segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv, 
-    sampleid, interval.weight.file = NULL, weight.flag.pvalue = 0.01, alpha = 0.005, 
+    sampleid, weight.flag.pvalue = 0.01, alpha = 0.005, 
     undo.SD = NULL, flavor = "tcn&dh", tauA = 0.03, vcf = NULL,
     tumor.id.in.vcf = 1, normal.id.in.vcf = NULL, max.segments = NULL,
     prune.hclust.h = NULL, prune.hclust.method = "ward.D", chr.hash = NULL,
@@ -89,11 +87,6 @@ segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv,
 
     if (is.null(chr.hash)) chr.hash <- .getChrHash(seqlevels(tumor))
 
-    # TODO defunct in 1.18
-    if (!is.null(interval.weight.file)) {
-        normalDB <- .add_weights_to_normaldb(interval.weight.file)
-        tumor$weights <- subsetByOverlaps(normalDB$sd$weights, tumor)$weights
-    }
     if (!is.null(tumor$weights) && length(unique(tumor$weights)) > 1 ) {
         flog.info("Interval weights found, will use weighted PSCBS.")
     }

diff --git a/man/PureCN-defunct.Rd b/man/PureCN-defunct.Rd
@@ -12,6 +12,7 @@
   \itemize{
     \item{autoCurateResults: no replacement}
     \item{calculateGCContentByInterval: \code{\link{preprocessIntervals}}}
+    \item{calculateIntervalWeights: \code{\link{createNormalDatabase}}}
     \item{createExonWeightFile: \code{\link{createNormalDatabase}}}
     \item{createSNPBlacklist: \code{\link{setMappingBiasVcf}}}
     \item{createTargetWeights: \code{\link{createNormalDatabase}}}

diff --git a/man/PureCN-deprecated.Rd b/man/PureCN-deprecated.Rd
@@ -10,7 +10,7 @@
 \details{
   The following functions are deprecated and will be made defunct; use
   the replacement indicated below:
-  \itemize{
-    \item{calculateIntervalWeights: \code{\link{createNormalDatabase}}}
-  }
+%  \itemize{
+%
+%  }
 }
diff --git a/man/calculateGCContentByInterval.Rd b/man/calculateGCContentByInterval.Rd