Skip to content

Commit

Permalink
Removed defunct functions and arguments.
Browse files Browse the repository at this point in the history
  • Loading branch information
lima1 committed Jul 5, 2020
1 parent 55f1843 commit b5957a1
Show file tree
Hide file tree
Showing 18 changed files with 78 additions and 260 deletions.
3 changes: 0 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
export(annotateTargets)
export(bootstrapResults)
export(calculateBamCoverageByInterval)
export(calculateGCContentByInterval)
export(calculateIntervalWeights)
export(calculateLogRatio)
export(calculateMappingBiasGatk4)
Expand All @@ -19,9 +18,7 @@ export(callMutationBurden)
export(correctCoverageBias)
export(createCurationFile)
export(createNormalDatabase)
export(createTargetWeights)
export(filterIntervals)
export(filterTargets)
export(filterVcfBasic)
export(filterVcfMuTect)
export(filterVcfMuTect2)
Expand Down
4 changes: 3 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ NEW FEATURES

o Support for GATK4 GenomicsDB import for mapping bias calculation


SIGNIFICANT USER-VISIBLE CHANGES

o We now check if POP_AF or POPAF is -log10 scaled as new Mutect2 versions
Expand All @@ -16,6 +15,9 @@ SIGNIFICANT USER-VISIBLE CHANGES
o Updated Mutect2 failure flags: "strand_bias", "slippage", "weak_evidence",
"orientation", "haplotype"
o Removed defunct normal.panel.vcf.file from setMappingBiasVcf
o Removed defunct interval.weight.file from segmentationPSCBS,
segmentationCBS and processMultipleSamples
o Made calculateIntervalWeights defunct

BUGFIXES

Expand Down
117 changes: 3 additions & 114 deletions R/calculateIntervalWeights.R
Original file line number Diff line number Diff line change
@@ -1,122 +1,11 @@
#' Calculate interval weights
#'
#' Creates an interval weight file useful for segmentation. Requires a set of
#' coverage files from normal samples. Interval weights will be
#' set proportional to the inverse of coverage standard deviation across all
#' normals. Intervals with high variance in coverage in the pool of normals are
#' thus down-weighted.
#'
#' This function is now automatically called by \code{\link{createNormalDatabase}}
#' and is thus deprecated.
#' and is thus defunct.
#'
#' @param normalDB Database of normal samples, created with
#' \code{\link{createNormalDatabase}}.
#' @param interval.weight.file Output filename.
#' @param top.quantile Cap weight at the specified quantile. Intervals
#' with standard deviation smaller than this value won't have a higher weight
#' than intervals at this quantile.
#' @param plot Diagnostics plot, useful to tune parameters.
#' @param normal.coverage.files Deprecated.
#' @return A normalDB object with following slots added
#' \item{sd$log.ratios}{\code{GRanges} with all log.ratios.}
#' \item{sd$weights}{\code{GRanges} with interval weights.}
#' @author Markus Riester
#'
#' @export calculateIntervalWeights
calculateIntervalWeights <- function(normalDB,
interval.weight.file = NULL, top.quantile = 0.7, plot = FALSE,
normal.coverage.files = NULL) {
.Deprecated("createNormalDatabase")

# TODO, defunct in 1.18
old_method <- FALSE
if (!is.null(normal.coverage.files) && missing(normalDB)) {
flog.warn("normal.coverage.files is deprecated, provide normalDB instead.")
old_method = TRUE
} else if (class(normalDB) == "character" && all(sapply(normalDB, file.exists))) {
flog.warn("Providing normal coverage files is deprecated. Provide a normalDB instead.")
normal.coverage.files <- normalDB
old_method = TRUE
} else {
normal.coverage.files <- normalDB[["normal.coverage.files"]]
}
flog.info("Loading coverage data...")
normal.coverage <- lapply(normal.coverage.files, readCoverageFile)
.calculateIntervalWeights(normalDB, normal.coverage, interval.weight.file, top.quantile,
plot, old_method)
calculateIntervalWeights <- function() {
.Defunct("createNormalDatabase")
}

.calculateIntervalWeights <- function(normalDB, normal.coverage,
interval.weight.file = NULL, top.quantile = 0.7, plot = FALSE,
old_method = FALSE) {

tumor.coverage <- list(poolCoverage(normal.coverage,
w = rep(1, length(normal.coverage)) / length(normal.coverage)))

if (old_method) {
lrs <- lapply(tumor.coverage, function(tc) sapply(normal.coverage,
function(nc) calculateLogRatio(nc, tc)))
} else {
lrs <- lapply(normal.coverage, function(x) calculateTangentNormal(x, normalDB)$log.ratio)
}

lrs <- do.call(cbind, lrs)

lrs[is.infinite(lrs)] <- NA

intervals <- normal.coverage[[1]]
mcols(intervals) <- NULL

lrs.sd <- apply(lrs, 1, sd, na.rm = TRUE)
lrs.cnt.na <- apply(lrs, 1, function(x) sum(is.na(x)))
# get the top.quantile % of sd by chromosome and use this to normalize weight=1
chrom <- as.character(seqnames(intervals))
sdCutoffByChr <- sapply(split(lrs.sd, chrom), quantile, probs = top.quantile,
names = FALSE, na.rm = TRUE)[chrom]

zz <- sdCutoffByChr / lrs.sd
zz[zz > 1] <- 1
idx <- is.na(zz) | lrs.cnt.na > ncol(lrs) / 3
zz[idx] <- min(zz, na.rm = TRUE)

ret <- list(
log.ratios = GRanges(intervals,,, DataFrame(lrs)),
weights = GRanges(intervals,,, DataFrame(weights = zz)))

if (!is.null(interval.weight.file)) {
ret_output <- data.frame(
Target = as.character(ret$weights),
weights = ret$weights$weights)

fwrite(ret_output, file = interval.weight.file, row.names = FALSE,
quote = FALSE, sep = "\t")
}
if (plot) .plotIntervalWeights(lrs.sd, width(tumor.coverage[[1]]),
tumor.coverage[[1]]$on.target)
if (old_method) return(NULL)
normalDB$sd <- ret
normalDB
}

#' Calculate target weights
#'
#' This function is defunct, use \code{\link{calculateIntervalWeights}}
#' instead.
#'
#' @author Markus Riester
#'
#' @export createTargetWeights
createTargetWeights <- function() {
.Defunct("calculateIntervalWeights")
}

.plotIntervalWeights <- function(lrs.sd, width, on.target) {
par(mfrow = c(1, 2))
plot(width[on.target], lrs.sd[on.target], ylim = c(0,2),
xlab = "Interval Width", ylab = "log2 ratio sd.", main = "On-Target")
if (sum(!on.target)) {
plot(width[!on.target], lrs.sd[!on.target], col = "red",
ylim = c(0, 2), xlab = "Interval Width", ylab = "log2 ratio sd.",
main = "Off-Target")
}
}
63 changes: 63 additions & 0 deletions R/createNormalDatabase.R
Original file line number Diff line number Diff line change
Expand Up @@ -352,3 +352,66 @@ min.coverage, max.missing) {

intervals.used
}

.calculateIntervalWeights <- function(normalDB, normal.coverage,
interval.weight.file = NULL, top.quantile = 0.7, plot = FALSE,
old_method = FALSE) {

tumor.coverage <- list(poolCoverage(normal.coverage,
w = rep(1, length(normal.coverage)) / length(normal.coverage)))

if (old_method) {
lrs <- lapply(tumor.coverage, function(tc) sapply(normal.coverage,
function(nc) calculateLogRatio(nc, tc)))
} else {
lrs <- lapply(normal.coverage, function(x) calculateTangentNormal(x, normalDB)$log.ratio)
}

lrs <- do.call(cbind, lrs)

lrs[is.infinite(lrs)] <- NA

intervals <- normal.coverage[[1]]
mcols(intervals) <- NULL

lrs.sd <- apply(lrs, 1, sd, na.rm = TRUE)
lrs.cnt.na <- apply(lrs, 1, function(x) sum(is.na(x)))
# get the top.quantile % of sd by chromosome and use this to normalize weight=1
chrom <- as.character(seqnames(intervals))
sdCutoffByChr <- sapply(split(lrs.sd, chrom), quantile, probs = top.quantile,
names = FALSE, na.rm = TRUE)[chrom]

zz <- sdCutoffByChr / lrs.sd
zz[zz > 1] <- 1
idx <- is.na(zz) | lrs.cnt.na > ncol(lrs) / 3
zz[idx] <- min(zz, na.rm = TRUE)

ret <- list(
log.ratios = GRanges(intervals,,, DataFrame(lrs)),
weights = GRanges(intervals,,, DataFrame(weights = zz)))

if (!is.null(interval.weight.file)) {
ret_output <- data.frame(
Target = as.character(ret$weights),
weights = ret$weights$weights)

fwrite(ret_output, file = interval.weight.file, row.names = FALSE,
quote = FALSE, sep = "\t")
}
if (plot) .plotIntervalWeights(lrs.sd, width(tumor.coverage[[1]]),
tumor.coverage[[1]]$on.target)
if (old_method) return(NULL)
normalDB$sd <- ret
normalDB
}

.plotIntervalWeights <- function(lrs.sd, width, on.target) {
par(mfrow = c(1, 2))
plot(width[on.target], lrs.sd[on.target], ylim = c(0,2),
xlab = "Interval Width", ylab = "log2 ratio sd.", main = "On-Target")
if (sum(!on.target)) {
plot(width[!on.target], lrs.sd[!on.target], col = "red",
ylim = c(0, 2), xlab = "Interval Width", ylab = "log2 ratio sd.",
main = "Off-Target")
}
}
9 changes: 0 additions & 9 deletions R/filterIntervals.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,6 @@ filterIntervals <- function(normal, tumor, log.ratio, seg.file,
return(intervalsUsed)
}

#' Remove low quality targets
#'
#' This function is defunct and was renamed to
#' \code{\link{filterIntervals}}.
#' @export filterTargets
filterTargets <- function() {
.Defunct("filterIntervals")
}

.checkNormalDB <- function(tumor, normalDB) {
if (!is(normalDB, "list")) {
.stopUserError("normalDB not a valid normalDB object. ",
Expand Down
9 changes: 0 additions & 9 deletions R/preprocessIntervals.R
Original file line number Diff line number Diff line change
Expand Up @@ -140,15 +140,6 @@ preprocessIntervals <- function(interval.file, reference.file,
invisible(interval.gr)
}

#' Calculates GC content by interval
#'
#' This function was renamed to \code{\link{preprocessIntervals}}.
#'
#' @export calculateGCContentByInterval
calculateGCContentByInterval <- function() {
.Defunct("preprocessIntervals")
}

# this function removes short chromosomes that have no probes (mainly a
# general way to remove chrM)
.dropShortUntargetedSeqLevels <- function(offRegions, interval.gr, minSize) {
Expand Down
24 changes: 1 addition & 23 deletions R/processMultipleSamples.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
#' @param genome Genome version, for example hg19. Needed to get centromere
#' positions.
#' @param plot.cnv Segmentation plots.
#' @param interval.weight.file Deprecated. For \code{normalDB} objects generated
#' with PureCN versions older than 1.16, re-run \code{\link{createNormalDatabase}}.
#' @param min.interval.weight Can be used to ignore intervals with low weights.
#' @param w Weight of samples. Can be used to downweight poor quality samples.
#' If \code{NULL}, sets to inverse of median on-target duplication rate if
Expand Down Expand Up @@ -57,7 +55,7 @@
#' @export processMultipleSamples
processMultipleSamples <- function(tumor.coverage.files, sampleids, normalDB,
num.eigen = 20, genome, plot.cnv = TRUE, w = NULL,
interval.weight.file = NULL, min.interval.weight = 1/3,
min.interval.weight = 1/3,
max.segments = NULL, chr.hash = NULL, centromeres = NULL, ...) {

if (!requireNamespace("copynumber", quietly = TRUE)) {
Expand All @@ -69,11 +67,6 @@ processMultipleSamples <- function(tumor.coverage.files, sampleids, normalDB,

interval.weights <- NULL
intervalsUsed <- rep(TRUE, length(tumors[[1]]))
# TODO defunct in 1.18
if (!is.null(interval.weight.file) &&
!is.null(min.interval.weight)) {
normalDB <- .add_weights_to_normaldb(interval.weight.file, normalDB)
}
if (!is.null(normalDB$sd$weights) && !is.null(min.interval.weight)) {
interval.weights <- normalDB$sd$weights$weights
if (length(interval.weights) != length(tumors[[1]])) {
Expand Down Expand Up @@ -134,18 +127,3 @@ processMultipleSamples <- function(tumor.coverage.files, sampleids, normalDB,
colnames(m) <- c("ID", "chrom", "loc.start", "loc.end", "num.mark", "seg.mean")
data.frame(m)
}

.add_weights_to_normaldb <- function(interval.weight.file, normalDB = NULL) {
flog.warn("interval.weight.file is deprecated.")
if (!is.null(normalDB$sd$weights)) return(normalDB)

interval.weights <- read.delim(interval.weight.file, as.is = TRUE)
if (!is.null(normalDB)) {
interval.weights <- interval.weights[match(normalDB$intervals,
interval.weights[,1]),]
}
weights <- GRanges(interval.weights[,1],,, DataFrame(weights = interval.weights[,2]))
if (is.null(normalDB)) normalDB <- list()
normalDB$sd$weights <- weights
normalDB
}
8 changes: 1 addition & 7 deletions R/segmentationCBS.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#' ignores this user provided segmentation.
#' @param plot.cnv Segmentation plots.
#' @param sampleid Sample id, used in output files.
#' @param interval.weight.file Deprecated.
#' @param weight.flag.pvalue Flag values with one-sided p-value smaller than
#' this cutoff.
#' @param alpha Alpha value for CBS, see documentation for the \code{segment}
Expand Down Expand Up @@ -72,18 +71,13 @@
#' @export segmentationCBS
#' @importFrom stats t.test hclust cutree dist
segmentationCBS <- function(normal, tumor, log.ratio, seg, plot.cnv,
sampleid, interval.weight.file = NULL, weight.flag.pvalue = 0.01, alpha = 0.005,
sampleid, weight.flag.pvalue = 0.01, alpha = 0.005,
undo.SD = NULL, vcf = NULL, tumor.id.in.vcf = 1, normal.id.in.vcf = NULL,
max.segments = NULL, prune.hclust.h = NULL, prune.hclust.method = "ward.D",
chr.hash = NULL, centromeres = NULL) {

if (is.null(chr.hash)) chr.hash <- .getChrHash(seqlevels(tumor))

# TODO defunct in 1.18
if (!is.null(interval.weight.file)) {
normalDB <- .add_weights_to_normaldb(interval.weight.file)
tumor$weights <- subsetByOverlaps(normalDB$sd$weights, tumor)$weights
}
if (!is.null(tumor$weights) && length(unique(tumor$weights)) > 1 ) {
flog.info("Interval weights found, will use weighted CBS.")
}
Expand Down
9 changes: 1 addition & 8 deletions R/segmentationPSCBS.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
#' ignores this user provided segmentation.
#' @param plot.cnv Segmentation plots.
#' @param sampleid Sample id, used in output files.
#' @param interval.weight.file Can be used to assign weights to intervals.
#' Currently requires patched version of PSCBS. Deprecated.
#' @param weight.flag.pvalue Flag values with one-sided p-value smaller than
#' this cutoff.
#' @param alpha Alpha value for CBS, see documentation for the \code{segment}
Expand Down Expand Up @@ -77,7 +75,7 @@
#'
#' @export segmentationPSCBS
segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv,
sampleid, interval.weight.file = NULL, weight.flag.pvalue = 0.01, alpha = 0.005,
sampleid, weight.flag.pvalue = 0.01, alpha = 0.005,
undo.SD = NULL, flavor = "tcn&dh", tauA = 0.03, vcf = NULL,
tumor.id.in.vcf = 1, normal.id.in.vcf = NULL, max.segments = NULL,
prune.hclust.h = NULL, prune.hclust.method = "ward.D", chr.hash = NULL,
Expand All @@ -89,11 +87,6 @@ segmentationPSCBS <- function(normal, tumor, log.ratio, seg, plot.cnv,

if (is.null(chr.hash)) chr.hash <- .getChrHash(seqlevels(tumor))

# TODO defunct in 1.18
if (!is.null(interval.weight.file)) {
normalDB <- .add_weights_to_normaldb(interval.weight.file)
tumor$weights <- subsetByOverlaps(normalDB$sd$weights, tumor)$weights
}
if (!is.null(tumor$weights) && length(unique(tumor$weights)) > 1 ) {
flog.info("Interval weights found, will use weighted PSCBS.")
}
Expand Down
1 change: 1 addition & 0 deletions man/PureCN-defunct.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
\itemize{
\item{autoCurateResults: no replacement}
\item{calculateGCContentByInterval: \code{\link{preprocessIntervals}}}
\item{calculateIntervalWeights: \code{\link{createNormalDatabase}}}
\item{createExonWeightFile: \code{\link{createNormalDatabase}}}
\item{createSNPBlacklist: \code{\link{setMappingBiasVcf}}}
\item{createTargetWeights: \code{\link{createNormalDatabase}}}
Expand Down
6 changes: 3 additions & 3 deletions man/PureCN-deprecated.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
\details{
The following functions are deprecated and will be made defunct; use
the replacement indicated below:
\itemize{
\item{calculateIntervalWeights: \code{\link{createNormalDatabase}}}
}
% \itemize{
%
% }
}
11 changes: 0 additions & 11 deletions man/calculateGCContentByInterval.Rd

This file was deleted.

Loading

0 comments on commit b5957a1

Please sign in to comment.