Removed defunct copynumber dependency.

ddrichel · Apr 30, 2023 · 6835a04 · 6835a04
1 parent 8bec98b
commit 6835a04
Show file tree

Hide file tree

Showing 7 changed files with 33 additions and 177 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,8 +2,8 @@ Package: PureCN
 Type: Package
 Title: Copy number calling and SNV classification using
     targeted short read sequencing
-Version: 2.7.0
-Date: 2022-02-25
+Version: 2.7.1
+Date: 2023-04-30
 Authors@R: c(person("Markus", "Riester",
                     role = c("aut", "cre"),
                     email = "[email protected]",
@@ -52,7 +52,6 @@ Suggests:
     PSCBS,
     R.utils,
     TxDb.Hsapiens.UCSC.hg19.knownGene,
-    copynumber,
     covr,
     knitr,
     optparse,
@@ -70,4 +69,4 @@ biocViews: CopyNumberVariation, Software, Sequencing,
     VariantAnnotation, VariantDetection, Coverage, ImmunoOncology
 NeedsCompilation: no
 ByteCompile: yes
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.3
diff --git a/NEWS b/NEWS
@@ -1,3 +1,12 @@
+Changes in version 2.8.0
+------------------------
+
+SIGNIFICANT USER-VISIBLE CHANGES
+
+    o Make processMultipleSamples temporarily defunct because the 
+      copynumber packages was removed from Bioconductor
+
+
 Changes in version 2.2.0
 ------------------------
 

diff --git a/R/processMultipleSamples.R b/R/processMultipleSamples.R
@@ -3,6 +3,9 @@
 #' This function performs normalization and segmentation when multiple
 #' for the same patient are available.
 #'
+#' CURRENTLY DEFUNCT BECAUSE IT DEPENDS ON THE DEFUNCT COPYNUMBER PACKAGE.
+#' We are working on a replacement.
+#'
 #'
 #' @param tumor.coverage.files Coverage data for tumor samples.
 #' @param sampleids Sample ids, used in output files.
@@ -47,85 +50,15 @@
 #'
 #' normalDB <- createNormalDatabase(normal.coverage.files)
 #'
-#' seg <- processMultipleSamples(tumor.coverage.files,
-#'          sampleids = c("Sample1", "Sample2"),
-#'          normalDB = normalDB,
-#'          genome = "hg19")
+#' # seg <- processMultipleSamples(tumor.coverage.files,
+#' #         sampleids = c("Sample1", "Sample2"),
+#' #         normalDB = normalDB,
+#' #         genome = "hg19")
 #'
 #' @export processMultipleSamples
 processMultipleSamples <- function(tumor.coverage.files, sampleids, normalDB,
     num.eigen = 20, genome, plot.cnv = TRUE, w = NULL,
     min.interval.weight = 1 / 3,
     max.segments = NULL, chr.hash = NULL, centromeres = NULL, ...) {
-
-    if (!requireNamespace("copynumber", quietly = TRUE)) {
-        .stopUserError("processMultipleSamples requires the copynumber package.")
-    }
-    tumors <- lapply(.readNormals(tumor.coverage.files),
-        calculateTangentNormal, normalDB, num.eigen = num.eigen)
-
-    interval.weights <- NULL
-    intervalsUsed <- rep(TRUE, length(tumors[[1]]))
-    if (!is.null(normalDB$sd$weights) && !is.null(min.interval.weight)) {
-       interval.weights <- normalDB$sd$weights$weights
-       if (length(interval.weights) != length(tumors[[1]])) {
-            # should not happen because it is checked upstream
-            .stopRuntimeError("interval.weights and tumors does not align.")
-       }
-       flog.info("Interval weights found, but currently not supported by copynumber. %s",
-            "Will simply exclude intervals with low weight.")
-       lowWeightIntervals <- interval.weights < min.interval.weight
-       intervalsUsed[which(lowWeightIntervals)] <- FALSE
-    }
-    #MR: fix for missing chrX/Y
-    intervalsUsed[is.na(intervalsUsed)] <- FALSE
-    if (is.null(chr.hash)) chr.hash <- .getChrHash(seqlevels(tumors[[1]]))
-    intervalsUsed <- .filterIntervalsChrHash(intervalsUsed, tumors[[1]], chr.hash)
-    centromeres <- .getCentromerePositions(centromeres, genome,
-            if (is.null(tumors[[1]])) NULL else .getSeqlevelsStyle(tumors[[1]]))
-    if (is.null(centromeres)) {
-        .stopUserError("Cannot find centromeres for ", genome,
-            ". Provide them manually or select a supported genome.")
-    }
-    armLocations <- .getArmLocations(tumors[[1]], chr.hash, centromeres)
-    armLocationsGr <- GRanges(armLocations)
-    arms <- armLocationsGr$arm[findOverlaps(tumors[[1]], armLocationsGr, select = "first")]
-    lrs <- data.frame(do.call(cbind, lapply(tumors, function(x) unlist(x$log.ratio))))
-    colnames(lrs) <- sampleids
-    lrs <- data.frame(chrom = .strip.chr.name(seqnames(tumors[[1]]), chr.hash),
-                      pos = start(tumors[[1]]), lrs)
-    # ignore arms with only 2 or fewer probes
-    carms <- paste0(lrs[, 1], arms)
-    intervalsUsed <- as.logical(intervalsUsed & complete.cases(lrs)
-                                & !is.na(arms)) & table(carms)[carms] > 2
-
-    lrs <- lrs[intervalsUsed, ]
-    arms <- arms[intervalsUsed]
-    lrsw <- copynumber::winsorize(lrs, arms = arms, verbose = FALSE)
-    if (is.null(w)) {
-        w <- 1
-        dupr <- vapply(tumors, function(x)
-                       median(x[x$on.target]$duplication.rate, na.rm = TRUE),
-                       double(1))
-        if (!sum(is.na(dupr)) && min(dupr, na.rm = TRUE) > 0) {
-            w <- (1 / dupr)
-            w <- w / max(w)
-
-            flog.info("Setting weights by duplication rate. Lowest weight for %s (%.2f), heighest for %s.",
-                sampleids[which.min(w)], min(w), sampleids[which.max(w)])
-        }
-    }
-    lrsm <- copynumber::multipcf(lrsw, arms = arms, w = w, ...)
-    if (plot.cnv) {
-        copynumber::plotHeatmap(segments = lrsm, upper.lim = 1)
-        copynumber::plotGenome(lrsw, segments = lrsm, onefile = TRUE)
-    }
-    idx.enough.markers <- lrsm$n.probes > 1
-    rownames(lrsm) <- NULL
-    lrsm[idx.enough.markers, ]
-    #transform to DNAcopy format
-    m <- data.table::melt(data.table(lrsm), id.vars = 1:5)
-    m <- m[, c(6, 1, 3, 4, 5, 7)]
-    colnames(m) <- c("ID", "chrom", "loc.start", "loc.end", "num.mark", "seg.mean")
-    data.frame(m)
+    .Defunct(msg="preprocessMultipleSamples is temporarily defunct")
 }
diff --git a/man/filterVcfMuTect2.Rd b/man/filterVcfMuTect2.Rd
diff --git a/man/processMultipleSamples.Rd b/man/processMultipleSamples.Rd
diff --git a/tests/testthat/test_processMultipleSamples.R b/tests/testthat/test_processMultipleSamples.R
diff --git a/vignettes/PureCN.Rnw b/vignettes/PureCN.Rnw
@@ -1108,50 +1108,6 @@ It is highly recommended to compare the log2-ratios obtained by
 \Biocpkg{PureCN} and the third-party tool, since some pipelines automatically
 adjust log2-ratios for a default purity value. 
 
-\subsubsection{Multi-sample segmentation}
-\label{multisampleseg}
-When multiple biopsies from the same patient are available, it might be
-beneficial to use a multi-sample segmentation that attempts to find a single
-segmentation for all biopsies. The idea is to share information, most
-importantly from higher quality biopsies, and align breakpoints.
-\Biocpkg{PureCN} supports the \Rfunction{multipcf} segmentation from the
-\Biocpkg{copynumber} package:
-
-<<figuremultiplesamples, fig.show='hide', fig.width=6, fig.height=3>>=
-
-tumor2.coverage.file <- system.file("extdata", "example_tumor2.txt.gz",
-    package="PureCN")
-
-tumor.coverage.files <- c(tumor.coverage.file, tumor2.coverage.file)
-
-seg <- processMultipleSamples(tumor.coverage.files,
-         sampleids = c("Sample1", "Sample2"),
-         normalDB = normalDB,
-         genome = "hg19", verbose = FALSE)
-seg.file <- tempfile(fileext = ".seg")
-write.table(seg, seg.file, row.names = FALSE, sep = "\t")
-retMulti <- runAbsoluteCN(tumor.coverage.file = tumor.coverage.file, 
-    normal.coverage.file = pool, 
-    seg.file = seg.file, vcf.file = vcf.file, max.candidate.solutions = 1, 
-    fun.segmentation = segmentationHclust, plot.cnv = FALSE,
-    genome = "hg19", min.ploidy = 1.5, max.ploidy = 2.1, 
-    test.purity = seq(0.4, 0.7, by = 0.05), sampleid = "Sample1",
-    post.optimize = TRUE)
-@
-\incfig{figure/figuremultiplesamples-1}{0.75\textwidth}{\Biocpkg{copynumber} output
-via \Rfunction{processMultipleSamples}.}
-Again, the \Rcode{min.ploidy}, \Rcode{max.ploidy} and \Rcode{test.purity}
-arguments are set to reduce the runtime of this toy example and should not be
-used for real data. The \Rfunction{segmentationHclust} function clusters
-segments using B-allele frequencies and joins adjacent segments when they are in
-the same cluster. Providing the \Rfunction{calculateTangentNormal} output
-\Rcode{pool} via \Rcode{normal.coverage.file} gives \Rfunction{runAbsoluteCN}
-access to the copy number ratios of all intervals, not only the segment-level
-ones. This function also supports weighting samples. By default, when coverages
-were calculated using \Biocpkg{PureCN}, samples are weighted by the inverse of
-the read duplication rates. This usually dramatically reduces the number of
-spurious segments.
-
 \subsection{COSMIC annotation}
 \label{seccosmic}
 If a matched normal is not available, it is also helpful to provide
@@ -1527,10 +1483,8 @@ Figure~\ref{figure/figureexample2-1}:
 
 	\item This tool was originally designed for single sample pipelines. It
 becomes more common to have multiple biopsies per patient and sharing
-information across biopsies might be beneficial. While there is basic
-functionality for multi-sample segmentation (see \ref{multisampleseg}), we
-currently have no plans to support multiple biopsies in the likelihood model
-directly. Other tools worth checking out are lumosVar 2.0 \cite{Halperin2019}
+information across biopsies might be beneficial. 
+Other tools worth checking out are lumosVar 2.0 \cite{Halperin2019}
 and SuperFreq \cite{Flensburg2020}.
 \end{itemize}