Version bump to make next stable 2.0; linting of some older code.

ddrichel · Sep 19, 2021 · 346ab89 · 346ab89
1 parent eb54df4
commit 346ab89
Show file tree

Hide file tree

Showing 27 changed files with 484 additions and 487 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,8 +2,8 @@ Package: PureCN
 Type: Package
 Title: Copy number calling and SNV classification using
     targeted short read sequencing
-Version: 1.23.30
-Date: 2021-09-18
+Version: 1.99.31
+Date: 2021-09-19
 Authors@R: c(person("Markus", "Riester",
                     role = c("aut", "cre"),
                     email = "[email protected]",

diff --git a/NEWS b/NEWS
@@ -1,5 +1,5 @@
-Changes in version 1.24.0
--------------------------
+Changes in version 2.0.0
+------------------------
 
 NEW FEATURES
 
@@ -26,7 +26,7 @@ SIGNIFICANT USER-VISIBLE CHANGES
         still very small (< 0.15, min.logr.sdev in runAbsoluteCN)
       - Increase automatically determined undo.SD in all segmentation functions
         when noise is very small (< min.logr.sdev)
-      - min.logr.sdev is now accessible in PureCN.R via --minlogrsdev
+      - min.logr.sdev is now accessible in PureCN.R via --min-logr-sdev
     o Added pairwise sample distances to normalDB output object helpful for
       finding noisy samples or batches in normal databases  
     o Do not error out readCurationFile when CSV is missing and directory

diff --git a/R/bootstrapResults.R b/R/bootstrapResults.R
@@ -1,16 +1,16 @@
 #' Bootstrapping variant fits
 #'
-#' This function bootstraps variants, then optionally re-ranks solutions by 
+#' This function bootstraps variants, then optionally re-ranks solutions by
 #' using the bootstrap estimate of the likelihood score, and then optionally
-#' removes solutions that never ranked high in any bootstrap replicate. 
+#' removes solutions that never ranked high in any bootstrap replicate.
 #'
 #'
 #' @param res Return object of the \code{\link{runAbsoluteCN}} function.
 #' @param n Number of bootstrap replicates.
 #' @param top Include solution if it appears in the top \code{n} solutions of
 #' any bootstrap replicate. If \code{NULL}, do not filter solutions.
 #' @param reorder Reorder results by bootstrap value.
-#' @return Returns a \code{\link{runAbsoluteCN}} object with added bootstrap 
+#' @return Returns a \code{\link{runAbsoluteCN}} object with added bootstrap
 #' value to each solution. This value
 #' is the fraction of bootstrap replicates in which the solution ranked first.
 #' @author Markus Riester
@@ -25,8 +25,8 @@
 #' @importFrom utils head
 bootstrapResults <- function(res, n = 500, top = NULL, reorder = FALSE) {
     if (length(res$results) < 2) return(res)
-    if (is.null(top)) top <- length(res$results)    
-    res$results <- .bootstrapResults(res$results, n = n, top = top, 
+    if (is.null(top)) top <- length(res$results)
+    res$results <- .bootstrapResults(res$results, n = n, top = top,
         reorder = reorder)
     res
 }
@@ -56,7 +56,7 @@ bootstrapResults <- function(res, n = 500, top = NULL, reorder = FALSE) {
     if (reorder) {
         results <- results[order(sapply(results, function(x) x$bootstrap.value),
             decreasing = TRUE)]
-    }    
+    }
     .flagBootstrap(results)
 }
 

diff --git a/R/callAlterations.R b/R/callAlterations.R
@@ -1,9 +1,9 @@
 #' Calling of amplifications and deletions
-#' 
+#'
 #' Function to extract major copy number alterations from a
 #' \code{\link{runAbsoluteCN}} return object.
-#' 
-#' 
+#'
+#'
 #' @param res Return object of the \code{\link{runAbsoluteCN}} function.
 #' @param id Candidate solutions to be used. \code{id=1} will use the maximum
 #' likelihood (or curated) solution.
@@ -20,33 +20,34 @@
 #' @author Markus Riester
 #' @seealso \code{\link{runAbsoluteCN}}
 #' @examples
-#' 
+#'
 #' data(purecn.example.output)
 #' callAlterations(purecn.example.output)
 #' callAlterations(purecn.example.output, all.genes=TRUE)["ESR2",]
-#' 
+#'
 #' @export callAlterations
 callAlterations <- function(res, id = 1, cutoffs = c(0.5, 6, 7),
-log.ratio.cutoffs = c(-0.9, 0.9), failed = NULL, all.genes = FALSE) {
+                            log.ratio.cutoffs = c(-0.9, 0.9),
+                            failed = NULL, all.genes = FALSE) {
 
     if (!is(res$results[[id]]$gene.calls, "data.frame")) {
         .stopUserError("This function requires gene-level calls.\n",
             "Please add a column 'Gene' containing gene symbols to the ",
             "interval.file.")
     }
-     
+
     amp.ids <- (res$results[[id]]$gene.calls$focal &
                 res$results[[id]]$gene.calls$C >= cutoffs[2]) |
-                res$results[[id]]$gene.calls$C >= cutoffs[3] 
+                res$results[[id]]$gene.calls$C >= cutoffs[3]
 
     del.ids <- res$results[[id]]$gene.calls$C < cutoffs[1]
 
     if (is.null(failed)) failed <- res$results[[id]]$failed
 
     if (failed) {
-        amp.ids <- res$results[[id]]$gene.calls$gene.mean >= 
-            log.ratio.cutoffs[2] 
-        del.ids <- res$results[[id]]$gene.calls$gene.mean < 
+        amp.ids <- res$results[[id]]$gene.calls$gene.mean >=
+            log.ratio.cutoffs[2]
+        del.ids <- res$results[[id]]$gene.calls$gene.mean <
             log.ratio.cutoffs[1]
     }
 
@@ -58,30 +59,29 @@ log.ratio.cutoffs = c(-0.9, 0.9), failed = NULL, all.genes = FALSE) {
     bm <- res$results[[id]]$SNV.posterior
     if (!is.null(bm)) {
         segids <- bm$posteriors$seg.id
-        calls$num.snps <- sapply(calls$seg.id, function(i) 
-            sum(segids==i,na.rm=TRUE))
-        calls$M <- bm$posteriors$ML.M.SEGMENT[match(calls$seg.id, segids)] 
-        calls$M.flagged <- bm$posteriors$M.SEGMENT.FLAGGED[match(calls$seg.id, segids)] 
-        calls$loh <- bm$posteriors$ML.M.SEGMENT[match(calls$seg.id, segids)] == 0 
+        calls$num.snps <- sapply(calls$seg.id, function(i)
+            sum(segids == i, na.rm = TRUE))
+        calls$M <- bm$posteriors$ML.M.SEGMENT[match(calls$seg.id, segids)]
+        calls$M.flagged <- bm$posteriors$M.SEGMENT.FLAGGED[match(calls$seg.id, segids)]
+        calls$loh <- bm$posteriors$ML.M.SEGMENT[match(calls$seg.id, segids)] == 0
     }
-    
-    calls <- calls[, !grepl("^\\.",colnames(calls))]
-       
+
+    calls <- calls[, !grepl("^\\.", colnames(calls))]
+
     if (!all.genes) {
-        return(calls[!is.na(calls$type),])
+        return(calls[!is.na(calls$type), ])
     }
     calls
 }
 
-
 #' Calling of amplifications and deletions from segmentations
-#' 
+#'
 #' This function can be used to obtain gene-level copy number calls from
 #' segmentations. This is useful for comparing PureCN's segmentations with
 #' segmentations obtained by different tools on the gene-level.  Segmentation
 #' file can contain multiple samples.
-#' 
-#' 
+#'
+#'
 #' @param sampleid The sampleid column in the segmentation file.
 #' @param chr The chromosome column.
 #' @param start The start positions of the segments.
@@ -92,7 +92,7 @@ log.ratio.cutoffs = c(-0.9, 0.9), failed = NULL, all.genes = FALSE) {
 #' @param interval.file A mapping file that assigns GC content and gene symbols
 #' to each exon in the coverage files. Used for generating gene-level calls.
 #' First column in format CHR:START-END. Second column GC content (0 to 1).
-#' Third column gene symbol. This file is generated with the 
+#' Third column gene symbol. This file is generated with the
 #' \code{\link{preprocessIntervals}} function.
 #' @param fun.focal Function for identifying focal amplifications. Defaults to
 #' \code{\link{findFocal}}.
@@ -102,29 +102,29 @@ log.ratio.cutoffs = c(-0.9, 0.9), failed = NULL, all.genes = FALSE) {
 #' one for each sample.
 #' @author Markus Riester
 #' @examples
-#' 
+#'
 #' data(purecn.example.output)
 #' seg <- purecn.example.output$results[[1]]$seg
-#' interval.file <- system.file("extdata", "example_intervals.txt", 
+#' interval.file <- system.file("extdata", "example_intervals.txt",
 #'         package = "PureCN")
-#' 
-#' calls <- callAlterationsFromSegmentation(sampleid=seg$ID, chr=seg$chrom,
-#'     start=seg$loc.start, end=seg$loc.end, num.mark=seg$num.mark,
-#'     seg.mean=seg$seg.mean, C=seg$C, interval.file=interval.file)
-#' 
+#'
+#' calls <- callAlterationsFromSegmentation(sampleid = seg$ID, chr = seg$chrom,
+#'     start = seg$loc.start, end = seg$loc.end, num.mark = seg$num.mark,
+#'     seg.mean = seg$seg.mean, C = seg$C, interval.file = interval.file)
+#'
 #' @export callAlterationsFromSegmentation
-callAlterationsFromSegmentation <- function(sampleid, chr, start, end, 
-    num.mark = NA, seg.mean, C, interval.file, fun.focal=findFocal,
-    args.focal=list(), ...){
+callAlterationsFromSegmentation <- function(sampleid, chr, start, end,
+    num.mark = NA, seg.mean, C, interval.file, fun.focal = findFocal,
+    args.focal = list(), ...) {
     seg <- data.frame(
-        ID=sampleid,
-        chrom=chr,
-        loc.start=start,
-        loc.end=end,
-        num.mark=num.mark,
-        seg.mean=seg.mean
-    )    
-    seg.adjusted <- data.frame(seg, 
+        ID = sampleid,
+        chrom = chr,
+        loc.start = start,
+        loc.end = end,
+        num.mark = num.mark,
+        seg.mean = seg.mean
+    )
+    seg.adjusted <- data.frame(seg,
                                C = C,
                                weight.flagged = NA,
                                size = seg$loc.end - seg$loc.start + 1)
@@ -133,9 +133,11 @@ callAlterationsFromSegmentation <- function(sampleid, chr, start, end,
     chr.hash <- .getChrHash(seqlevels(tumor))
     segs <- split(seg.adjusted, seg$ID)
     gene.calls <- lapply(segs, function(s) {
-        log.ratio <- .createFakeLogRatios(tumor, s[,1:6], s$ID[1], chr.hash)
+        log.ratio <- .createFakeLogRatios(tumor, s[, 1:6], s$ID[1], chr.hash)
         .getGeneCalls(s, tumor, log.ratio, fun.focal, args.focal, chr.hash)
     })
-    res <- lapply(gene.calls, function(x) list(results=list(list(gene.calls=x, failed=FALSE))))
+    res <- lapply(gene.calls, function(x) list(
+        results = list(list(gene.calls = x, failed = FALSE))
+    ))
     lapply(res, callAlterations, ...)
-}    
+}
diff --git a/R/callCIN.R b/R/callCIN.R
@@ -1,8 +1,8 @@
 #' Call Chromosomal Instability
-#' 
+#'
 #' This function provides detailed CIN information.
-#' 
-#' 
+#'
+#'
 #' @param res Return object of the \code{\link{runAbsoluteCN}} function.
 #' @param id Candidate solution to extract CIN from. \code{id=1} will use the
 #' maximum likelihood solution.
@@ -18,12 +18,12 @@
 #' @author Markus Riester
 #' @seealso \code{\link{runAbsoluteCN}}
 #' @examples
-#' 
+#'
 #' data(purecn.example.output)
 #' head(callCIN(purecn.example.output))
-#' 
+#'
 #' @export callCIN
-callCIN <- function(res, id = 1, allele.specific = TRUE, reference.state = 
+callCIN <- function(res, id = 1, allele.specific = TRUE, reference.state =
                     c("dominant", "normal")) {
     loh <- callLOH(res, id)
     loh$size <- loh$end - loh$start + 1
@@ -33,7 +33,7 @@ callCIN <- function(res, id = 1, allele.specific = TRUE, reference.state =
     reference.state <- match.arg(reference.state)
     loh$state <- if (allele.specific) paste0(loh$C, "/", loh$M) else loh$C
     dominant.state <-  sort(sapply(split(loh$size, loh$state), sum),
-                            decreasing=TRUE)[1]
+                            decreasing = TRUE)[1]
     reference.state.cn <- names(dominant.state)
     if (reference.state == "normal") {
         reference.state.cn <- if (allele.specific) "2/1" else "2"