Skip to content

Commit

Permalink
Added centomere positions.
Browse files Browse the repository at this point in the history
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/PureCN@119121 bc3139a8-67e5-0310-9ffc-ced21a209358
  • Loading branch information
lima1 committed Jul 4, 2016
1 parent 020c022 commit f108863
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 4 deletions.
17 changes: 15 additions & 2 deletions R/runAbsoluteCN.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ vcf.file=NULL,
### Again, do not expect very useful results without a VCF file.
genome,
### Genome version, required for the readVcf function.
centromeres=NULL,
### Data.frame with centromere positions in first three columns.
### If NULL, use pre-stored positions for genome versions hg18, hg19 and hg38.
sex=c("?","F","M","diploid"),
### Sex of sample. If ?, detect using getSexFromCoverage function and default
### parameters.
Expand Down Expand Up @@ -164,6 +167,14 @@ post.optimize=FALSE,
message("Default of genome=hg19 is deprecated. Please specify genome.")
}

if (is.null(centromeres)) {
data(centromeres, envir = environment())
if (genome %in% names(centromeres)) {
centromeres <- centromeres[[genome]]
} else {
centromeres <- NULL
}
}
# argument checking
.checkParameters(test.purity, min.ploidy, max.ploidy, max.non.clonal)

Expand Down Expand Up @@ -811,8 +822,10 @@ post.optimize=FALSE,
log.ratio.sdev=sd.seg,
vcf=vcf,
sampleid=sampleid,
sex=sex, sex.vcf=sex.vcf,
chr.hash=chr.hash)
sex=sex,
sex.vcf=sex.vcf,
chr.hash=chr.hash,
centromeres=centromeres)
)
##end<<
},ex=function(){
Expand Down
Binary file added data/centromeres.rda
Binary file not shown.
31 changes: 31 additions & 0 deletions inst/extdata/downloadCentromeres.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
library(rtracklayer)
library(data.table)
library(PureCN)

data(chr.hash)
mySession <- browserSession("UCSC")
genomes <- c("hg18", "hg19", "hg38")
centromeres <- list()

for (genome in genomes) {
genome(mySession) <- genome
if (genome == "hg38") {
tbl.gaps <- getTable( ucscTableQuery(mySession,track="Centromeres",
table="centromeres"))
} else {
tbl.gaps <- getTable( ucscTableQuery(mySession, track="Gap",
table="gap"))
tbl.gaps <- tbl.gaps[tbl.gaps$type=="centromere",]
}
tbl.gaps.dt <- data.table(tbl.gaps)
tbl.centromeres <- as.data.frame(tbl.gaps.dt[,
list(chromStart=min(chromStart),chromEnd=max(chromEnd)),by=chrom])
centromeres[[genome]] <- tbl.centromeres
}

centromeres <- lapply(centromeres, function(x) {
x$chromNumerical <- chr.hash$number[match(x$chrom, chr.hash$chr)]
x[order(x$chromNumerical),1:3]
})

save(centromeres, file="data/centromeres.rda", compress="xz")
28 changes: 28 additions & 0 deletions man/centromeres.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
\name{centromeres}
\alias{centromeres}
\docType{data}
\title{
A list of data.frames containing centromere positions.
}
\description{
A list of data.frames containing centromere positions for hg18, hg19 and hg38.
Downloaded from the UCSC genome browser.
}
\usage{data(centromeres)}
\value{
A list with three data frames, "hg18", "hg19", and "hg38". Each containes
three columns
\describe{
\item{\code{chrom}}{a factor with levels \code{chr1} \code{chr10} \code{chr11} \code{chr12} \code{chr13} \code{chr14} \code{chr15} \code{chr16} \code{chr17} \code{chr18} \code{chr19} \code{chr2} \code{chr20} \code{chr21} \code{chr22} \code{chr3} \code{chr4} \code{chr5} \code{chr6} \code{chr7} \code{chr8} \code{chr9} \code{chrX} \code{chrY}}
\item{\code{chromStart}}{a numeric vector}
\item{\code{chromEnd}}{a numeric vector}
}
}
\references{
The script downloadCentromeres.R in the extdata directory was used to generate
the data.frames.
}
\examples{
data(centromeres)
}
\keyword{datasets}
6 changes: 4 additions & 2 deletions man/runAbsoluteCN.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ Returns purity and ploidy combinations, sorted by likelihood score.
Provides copy number and LOH data, by both gene and genomic region. }
\usage{runAbsoluteCN(gatk.normal.file = NULL, gatk.tumor.file,
log.ratio = NULL, seg.file = NULL, seg.file.sdev = 0.4,
vcf.file = NULL, genome, sex = c("?", "F", "M",
"diploid"), fun.filterVcf = filterVcfMuTect,
vcf.file = NULL, genome, centromeres = NULL, sex = c("?",
"F", "M", "diploid"), fun.filterVcf = filterVcfMuTect,
args.filterVcf = list(), fun.setPriorVcf = setPriorVcf,
args.setPriorVcf = list(), fun.segmentation = segmentationCBS,
args.segmentation = list(), fun.focal = findFocal,
Expand Down Expand Up @@ -48,6 +48,8 @@ flag for dbSNP membership. The default fun.setPriorVcf function will also
look for a Cosmic.CNT slot, containing the hits in the COSMIC database.
Again, do not expect very useful results without a VCF file.}
\item{genome}{Genome version, required for the readVcf function.}
\item{centromeres}{Data.frame with centromere positions in first three columns.
If NULL, use pre-stored positions for genome versions hg18, hg19 and hg38. }
\item{sex}{Sex of sample. If ?, detect using getSexFromCoverage function and default
parameters.
Default parameters might not work well with every assay and
Expand Down

0 comments on commit f108863

Please sign in to comment.