Formatting of function names and code in manpages.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/PureCN@119236 bc3139a8-67e5-0310-9ffc-ced21a209358
ddrichel · Jul 9, 2016 · 566e4a3 · 566e4a3
1 parent 9aab87a
commit 566e4a3
Show file tree

Hide file tree

Showing 50 changed files with 269 additions and 192 deletions.
diff --git a/R/bootstrapResults.R b/R/bootstrapResults.R
@@ -33,7 +33,8 @@ function(# Filter unlikely purity/ploidy solutions
 ### Large-scale copy number artifacts can cause true purity/ploidy 
 ### solutions rank low.
 res,
-### Return object of the runAbsoluteCN() function.
+### Return object of the \code{\link{runAbsoluteCN}} function.
+##seealso<< \code{\link{runAbsoluteCN}}
 n=500,
 ### Number of bootstrap replicates.
 top=2
@@ -43,10 +44,10 @@ top=2
     if (length(res$results) < 2) return(res)
     res$results <- .bootstrapResults(res$results, n=n, top=top)
     res
-### Returns the runAbsoluteCN object with low likelihood solutions
-### removed. Also adds a bootstrap value to each solution. This value is
-### the fraction of bootstrap replicates in which the solution ranked
-### first.                
+### Returns the \code{\link{runAbsoluteCN}} object with low 
+### likelihood solutions removed. Also adds a bootstrap value 
+### to each solution. This value is the fraction of bootstrap replicates 
+### in which the solution ranked first.                
 }, ex=function() {
 data(purecn.example.output)
 ret.boot <- bootstrapResults(purecn.example.output, n=100)

diff --git a/R/calculateBamCoverageByInterval.R b/R/calculateBamCoverageByInterval.R
@@ -1,15 +1,19 @@
 calculateBamCoverageByInterval <- structure(
 function(# Function to calculate coverage from BAM file
 ### Takes a BAM file and an interval file as input and 
-### returns coverage for each interval.
+### returns coverage for each interval. Coverage should be GC-normalized
+### using the \code{\link{correctCoverageBias}} function before determining
+### purity and ploidy with \code{\link{runAbsoluteCN}}.
+##seealso<< \code{\link{calculateGCContentByInterval} 
+## \link{correctCoverageBias} \link{runAbsoluteCN}}
 bam.file, 
 ### Filename of a BAM file.
 interval.file,
 ### File specifying the intervals. Interval is expected in 
 ### first column in format CHR:START-END. The gc.gene.file can be used.
 output.file=NULL
 ### Optionally, write minimal coverage file. Can be read with the
-### readCoverageGatk function.
+### \code{\link{readCoverageGatk}} function.
 ) {
     interval <- read.delim(interval.file, as.is=TRUE)
     colnames(interval)[1] <- "Target"

diff --git a/R/calculateGCContentByInterval.R b/R/calculateGCContentByInterval.R
@@ -1,6 +1,6 @@
 calculateGCContentByInterval <- structure(
 function(# Calculates GC content by interval
-### Uses scanFa from the Rsamtools package to retrieve GC 
+### Uses \code{scanFa} from the Rsamtools package to retrieve GC 
 ### content of intervals in a reference FASTA file.
 interval.file,
 ### File specifying the intervals. Interval is expected in 

diff --git a/R/callAlterations.R b/R/callAlterations.R
@@ -1,11 +1,12 @@
 callAlterations <- structure(
 function(# Calling of amplifications and deletions
 ### Function to extract major copy number alterations from a 
-### runAbsoluteCN return object.
+### \code{\link{runAbsoluteCN}} return object.
 res,
-### Return object of the runAbsoluteCN() function.
+### Return object of the \code{\link{runAbsoluteCN}} function.
+##seealso<< \code{\link{runAbsoluteCN}}
 id=1,
-### Candidate solutions to be used. id=1 will use the 
+### Candidate solutions to be used. \code{id=1} will use the 
 ### maximum likelihood (or curated) solution.
 cutoffs=c(0.5,6,7),
 ### Copy numbers cutoffs to call losses, focal amplifications 

diff --git a/R/callLOH.R b/R/callLOH.R
@@ -1,9 +1,10 @@
 callLOH <- structure(function(# Get regions of LOH
 ### This function provides detailed LOH information by region.
 res, 
-### Return object of the runAbsoluteCN() function.
+### Return object of the \code{\link{runAbsoluteCN}} function.
+##seealso<< \code{\link{runAbsoluteCN}}
 id=1, 
-### Candidate solution to extract LOH from. id=1 will  
+### Candidate solution to extract LOH from. \code{id=1} will  
 ### use the maximum likelihood solution.
 arm.cutoff=0.9
 ### Min fraction LOH on a chromosome arm to call 

diff --git a/R/correctCoverageBias.R b/R/correctCoverageBias.R
@@ -1,19 +1,22 @@
 correctCoverageBias <- structure(function(# Correct for GC bias
 ### Takes as input coverage data in GATK format (or data 
-### read by readCoverageGatk) and a mapping file for GC content, and then uses
-### a loess normalization for bias correction. Largely follows the GC 
-### correction of the TitanCNA package.
+### read by \code{\link{readCoverageGatk}}) and a mapping file for 
+### GC content, and then uses a loess normalization for bias correction. 
+### Largely follows the GC correction of the TitanCNA package.
 gatk.coverage.file, 
 ### Exon coverage file as produced by GATK. Either a file name
 ### or data parsed with the readCoverageGatk function.
 gc.gene.file,
 ### File providing GC content for each exon in the coverage files.
 ### First column in format CHR:START-END. Second column GC content (0 to 1). 
 ### Third column provides gene symbols, which are optional, but used in
-### runAbsoluteCN to generate gene level calls.
+### \code{\link{runAbsoluteCN}} to generate gene level calls. This file
+### can be generated with GATK GCContentByInterval tool or with the
+### \code{\link{calculateGCContentByInterval}} function.
+##seealso<< \code{\link{calculateGCContentByInterval}}
 output.file=NULL
 ### Optionally, write file with GC corrected coverage. Can be read with
-### the readCoverageGatk function.
+### the \code{\link{readCoverageGatk}} function.
 ) {
     if (is.character(gatk.coverage.file)) {
         tumor  <- readCoverageGatk(gatk.coverage.file)

diff --git a/R/createCurationFile.R b/R/createCurationFile.R
@@ -1,8 +1,10 @@
 createCurationFile <- structure(function(# Create file to curate PureCN results
 ### Function to create a CSV file that can be used to mark the correct solution
-### in the output of a runAbsoluteCN() run.
+### in the output of a \code{\link{runAbsoluteCN}} run.
 file.rds,
-### Output of the runAbsoluteCN() function, serialized with saveRDS()
+### Output of the \code{\link{runAbsoluteCN}} function, serialized 
+### with \code{saveRDS}.
+##seealso<< \code{\link{runAbsoluteCN}}
 overwrite.uncurated=TRUE
 ### Overwrite existing files unless flagged as "Curated".
 ) {
@@ -37,7 +39,7 @@ overwrite.uncurated=TRUE
     }
     invisible(d.f.curation)
 ###A data.frame with the tumor purity and ploidy of the maximum likelihood
-###solution 
+###solution.
 },ex=function() {
 data(purecn.example.output)
 file.rds <- 'Sample1_PureCN.rds'

diff --git a/R/createNormalDatabase.R b/R/createNormalDatabase.R
@@ -1,14 +1,17 @@
 createNormalDatabase <- structure(function(#Create database of normal samples
 ### Function to create a database of normal samples, used to find 
-### a good match for tumor copy number normalization.
+### a good match for tumor copy number normalization. Internally, this 
+### function determines the sex of the samples and trains a PCA
+### that is later used for clustering a tumor file with all normal samples 
+### in the database.
 gatk.normal.files,
 ### Vector with file names pointing to GATK coverage files 
 ### of normal samples. 
 sex=NULL,
 ### Vector of sex."F" for female, "M" for male. If all chromosomes are diploid, specify "diploid". 
 ### If NULL determine from coverage.
 ...
-### Arguments passed to the prcomp function.
+### Arguments passed to the \code{prcomp} function.
 ) {
     gatk.normal.files <- normalizePath(gatk.normal.files)
     normals <- lapply(gatk.normal.files, readCoverageGatk)
@@ -35,6 +38,7 @@ sex=NULL,
             }    
         }    
     }
+##seealso<< \code{\link{findBestNormal}}
     list(
         gatk.normal.files=gatk.normal.files, 
         pca=normals.pca, 
@@ -44,8 +48,8 @@ sex=NULL,
         exon.log2.sd.coverage=apply(log2(normals.m+1),1,sd, na.rm=TRUE),
         sex=sex
     )
-### A normal database that can be used in the findBestNormal function to 
-### retrieve good matching normal samples for a given tumor sample.
+### A normal database that can be used in the \code{\link{findBestNormal}} 
+### function to retrieve good matching normal samples for a given tumor sample.
 },ex=function() {
 gatk.normal.file <- system.file("extdata", "example_normal.txt", 
     package="PureCN")
@@ -67,7 +71,7 @@ gatk.tumor.files,
 gatk.normal.files,
 ### A large number of GATK normal coverage samples (>20) 
 ### to estimate exon log-ratio standard deviations.
-### Should not overlap with files in gatk.tumor.files.
+### Should not overlap with files in \code{gatk.tumor.files}.
 exon.weight.file
 ### Output filename.
 ) {

diff --git a/R/createSNPBlacklist.R b/R/createSNPBlacklist.R
@@ -17,7 +17,8 @@ chr.hash=NULL,
 ### (e.g. chr1 to 1, chr2 to 2, etc.). If NULL, assume chromsomes
 ### are properly ordered.   
 genome="hg19"
-### Version of the reference genome, required for the readVcf() function.
+### Version of the reference genome, required for the \code{readVcf} 
+### function.
 ) {
     vcfs <- lapply(vcf.files, .readAndCheckVcf, genome)
     vcfs <- lapply(vcfs, function(x) x[info(x)$DB & 
@@ -79,11 +80,14 @@ genome="hg19"
     if (is.null(chr.hash)) chr.hash <- .getChrHash(d.f$seqnames)
 
     snp.bl.segmented <- snp.bl.segmented[order(.strip.chr.name(snp.bl.segmented$chrom, chr.hash)),]
-
-    list(snp.black.list=snp.bl, segmented=snp.bl.segmented[,-1])
-### A list with elements snp.black.list and segmented. 
-### "snp.black.list" is just a list of SNP ids.
-### "segmented" blacklists whole regions.
+
+    ##value<< A list with elements
+    list(
+        snp.blacklist=snp.bl, ##<< A data.frame with blacklisted SNPs.
+        segmented=snp.bl.segmented[,-1] ##<< A data.frame with blacklisted
+## regions.
+    )
+##end<<
 }, ex=function() {
 # Assume VCF files of normals (for example obtained by a MuTect artifact
 # detection run) are in directory poolofnormals:

diff --git a/R/curateResults.R b/R/curateResults.R
@@ -9,7 +9,8 @@ getDiploid <- structure(function(# Function to extract diploid solutions.
 ### likelihood solution is diploid, it is always returned; all other solutions
 ### must pass the more stringent criteria as defined in the function arguments.
 res, 
-### Return object of the runAbsoluteCN() function.
+### Return object of the \code{\link{runAbsoluteCN}} function.
+##seealso<< \code{\link{runAbsoluteCN}}
 min.diploid=0.5, 
 ### Minimum fraction of genome with normal copy number 2.
 min.single.gain.loss=0.05,
@@ -64,7 +65,8 @@ function(# Heuristics to find the best purity/ploidy solution.
 ### may evolve over time and might produce different rankings
 ### after PureCN updates.
 res,
-### Return object of the runAbsoluteCN() function.
+### Return object of the \code{\link{runAbsoluteCN}} function.
+##seealso<< \code{\link{runAbsoluteCN}}
 bootstrap=TRUE,
 ### Try to reduce the number of local optima by using the
 ### bootstrapResults function.
@@ -102,7 +104,7 @@ verbose=TRUE
         res$results <- c(res$results[ids], res$results[-ids])        
     }
     res    
-### The provided runAbsoluteCN return object with unlikely 
+### The provided \code{\link{runAbsoluteCN}} return object with unlikely 
 ### purity and ploidy solutions filtered out.
 }, ex=function() {
 data(purecn.example.output)

diff --git a/R/filterVcf.R b/R/filterVcf.R
@@ -1,7 +1,7 @@
 filterVcfBasic <-
 structure(function(#Basic VCF filter function
 ### Function to remove artifacts and low confidence/quality 
-### variant calls.
+### variant calls. 
 vcf, 
 ### CollapsedVCF object, read in with the readVcf function 
 ### from the VariantAnnotation package.
@@ -32,7 +32,8 @@ min.supporting.reads=NULL,
 ### of 10^-3.
 error=0.001,
 ### Estimated sequencing error rate. Used to calculate minimum
-### number of supporting reads using calculatePowerDetectSomatic.
+### number of supporting reads using \code{\link{calculatePowerDetectSomatic}}.
+##seealso<< \code{\link{calculatePowerDetectSomatic}}
 verbose=TRUE
 ) {
     flag <- NA
@@ -163,9 +164,10 @@ vcf.filtered <- filterVcfBasic(vcf)
 
 filterVcfMuTect <- structure(function(#Filter VCF MuTect
 ### Function to remove artifacts and low confidence/quality calls from
-### a MuTect generated VCF file.
+### a MuTect generated VCF file. Also applies filters defined in
+### \code{filterVcfBasic}.
 vcf, 
-### VCF object, read in with the readVcf function from the 
+### VCF object, read in with the \code{readVcf} function from the 
 ### VariantAnnotation package.
 tumor.id.in.vcf=NULL, 
 ### The tumor id in the VCF file, optional.
@@ -178,7 +180,8 @@ ignore=c("clustered_read_position", "fstar_tumor_lod", "nearby_gap_events",
 verbose=TRUE,
 ### Verbose output.
 ...
-### Additional arguments passed to filterVcfBasic
+### Additional arguments passed to \code{\link{filterVcfBasic}}.
+##seealso<< \code{\link{filterVcfBasic}}
 ){
     if (is.null(stats.file)) return(
         filterVcfBasic(vcf, tumor.id.in.vcf, verbose=verbose, ...))
@@ -220,7 +223,7 @@ setPriorVcf <- structure(function(# Set Somatic Prior VCF
 ### Function to set prior for somatic mutation status for each
 ### variant in the provided CollapsedVCF object.
 vcf,
-### CollapsedVCF object, read in with the readVcf function 
+### CollapsedVCF object, read in with the \code{readVcf} function 
 ### from the VariantAnnotation package.
 prior.somatic=c(0.5, 0.0005, 0.999, 0.0001, 0.995, 0.01), 
 ### Prior probabilities for somatic mutations. First value is for 

diff --git a/R/findBestNormal.R b/R/findBestNormal.R
@@ -5,22 +5,23 @@ gatk.tumor.file,
 ### GATK coverage file of a tumor sample.
 normalDB,
 ### Database of normal samples, created with 
-### createNormalDatabase().
+### \code{\link{createNormalDatabase}}.
+##seealso<< \code{\link{createNormalDatabase} \link{getSexFromCoverage}}
 pcs=1:3,
 ### Principal components to use for distance calculation.
 num.normals=1,
-### Return the num.normals best normals.
+### Return the \code{num.normals} best normals.
 ignore.sex=FALSE,
 ### If FALSE, detects sex of sample and returns best normals
 ### with matching sex.
 sex=NULL,
-### Sex of sample. If NULL, determine with getSexFromCoverage
+### Sex of sample. If NULL, determine with \code{\link{getSexFromCoverage}}
 ### and default parameters.
 ### Valid values are "F" for female, "M" for male. If all 
 ### chromosomes are diploid, specify "diploid". 
 gatk.normal.files=NULL,
 ### Only consider these normal samples. If NULL, use all in 
-### the database. Must match normalDB$gatk.normal.files. 
+### the database. Must match \code{normalDB$gatk.normal.files}. 
 verbose=TRUE
 ### Verbose output.
 ) {
@@ -82,16 +83,18 @@ gatk.best.normal.file <- findBestNormal(gatk.tumor.file, normalDB)
 plotBestNormal <- structure(
     function(#Plot the PCA of tumor and its best normal(s)
 ### This function can be used to understand how a best normal is chosen
-### by the findBestNormal function. It can be also used to tune the
-### best normal selection by finding good parameter values for
+### by the \code{\link{findBestNormal}} function. It can be also used 
+### to tune the best normal selection by finding good parameter values for
 ### num.normals and pcs.
 gatk.normal.files,
 ### GATK coverage file of normal files, typically identified via 
-### findBestNormal.
+### \code{\link{findBestNormal}}.
 gatk.tumor.file,
 ### GATK coverage file of a tumor sample.
 normalDB,
-### Database of normal samples, created with createNormalDatabase().
+### Database of normal samples, created with 
+### \code{\link{createNormalDatabase}}.
+##seealso<< \code{\link{createNormalDatabase} \link{findBestNormal}}
 x=1,
 ### PC to be plotted on x-axis.
 y=2,
@@ -119,7 +122,7 @@ col.other.normals="black",
         ifelse( normalDB$gatk.normal.files %in% 
         gatk.normal.files, col.best.normal, col.other.normals)),
         xlab=paste("PC",x), ylab=paste("PC",y),...)
-### Returns NULL
+### Returns NULL.
 },ex=function() {
 gatk.normal.file <- system.file("extdata", "example_normal.txt", 
     package="PureCN")

diff --git a/R/getSex.R b/R/getSex.R
@@ -5,7 +5,7 @@ getSexFromCoverage <- structure(function(# Get sample sex from coverage
 ### when sufficient sex marker genes such as AMELY are covered.
 ### For optimal results, parameters might need to be tuned for the assay.
 gatk.coverage, 
-### GATK coverage file or data read with readCoverageGatk.
+### GATK coverage file or data read with \code{\link{readCoverageGatk}}.
 min.ratio=25,
 ### Min chrX/chrY coverage ratio to call sample as female.
 min.ratio.na=20,
@@ -59,6 +59,7 @@ verbose=TRUE
                 " chr1-22: ",round(avg.autosome.coverage, digits=2),"."
         )
     }     
+##seealso<< \code{\link{getSexFromVcf}}
     if (XY.ratio > min.ratio) return("F")
     if (XY.ratio > min.ratio.na) return(NA)
     return("M") 
@@ -86,7 +87,7 @@ getSexFromVcf <- structure(function(# Get sample sex from a VCF file
 ### sanity check when a VCF is provided. It is also useful for determining
 ### sex when no sex marker genes on chrY (e.g. AMELY) are available.
 vcf,
-### CollapsedVCF object, read in with the readVcf function 
+### CollapsedVCF object, read in with the \code{readVcf} function 
 ### from the VariantAnnotation package.
 tumor.id.in.vcf=NULL, 
 ### The tumor id in the CollapsedVCF (optional).
@@ -142,6 +143,7 @@ verbose=TRUE
     if (res$estimate >= min.or.na) sex <- NA
     if (res$estimate >= min.or && res$p.value > max.pv) sex <- NA
     if (res$p.value <= max.pv && res$estimate >= min.or) sex <- "M"
+##seealso<< \code{\link{getSexFromCoverage}}
     if (verbose) { 
         message("Sex from VCF: ", sex, " (Fisher's p-value: ", 
             ifelse(res$p.value < 0.0001, "< 0.0001", 

diff --git a/R/plotAbs.R b/R/plotAbs.R
@@ -3,7 +3,8 @@ structure(function(# Plots for analyzing PureCN solutions
 ### This function provides various plots for finding correct 
 ### purity and ploidy combinations in the results of a runAbsoluteCN call.
 res, 
-### Return object of the runAbsoluteCN() function.
+### Return object of the \code{\link{runAbsoluteCN}} function.
+##seealso<< \code{\link{runAbsoluteCN}}
 ids=NULL, 
 ### Candidate solutions to be plotted. ids=1 will draw the 
 ### plot for the maximum likelihood solution.