forked from lima1/PureCN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
filterIntervals.Rd
97 lines (83 loc) · 3.47 KB
/
filterIntervals.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/filterIntervals.R
\name{filterIntervals}
\alias{filterIntervals}
\title{Remove low quality intervals}
\usage{
filterIntervals(
normal,
tumor,
log.ratio,
seg.file,
filter.lowhigh.gc = 0.001,
min.coverage = 15,
min.total.counts = 100,
min.targeted.base = 5,
min.mappability = c(0.6, 0.1),
min.fraction.offtarget = 0.05,
normalDB = NULL
)
}
\arguments{
\item{normal}{Coverage data for normal sample.}
\item{tumor}{Coverage data for tumor sample.}
\item{log.ratio}{Copy number log-ratios, one for each interval in the
coverage file.}
\item{seg.file}{If not \code{NULL}, then do not filter intervals, because data
is already segmented via the provided segmentation file.}
\item{filter.lowhigh.gc}{Quantile q (defines lower q and upper 1-q) for
removing intervals with outlier GC profile. Assuming that GC correction might
not have been worked on those. Requires \code{interval.file}.}
\item{min.coverage}{Minimum coverage in both normal and tumor. Intervals with
lower coverage are ignored. If a \code{normalDB} is provided, then this
database already provides information about low quality intervals and the
\code{min.coverage} is set to \code{min.coverage/10000}.}
\item{min.total.counts}{Exclude intervals with fewer than that many reads
in combined tumor and normal.}
\item{min.targeted.base}{Exclude intervals with targeted base (size in bp)
smaller than this cutoff. This is useful when the same interval file was
used to calculate GC content. For such small targets, the GC content is
likely very different from the true GC content of the probes.}
\item{min.mappability}{\code{double(2)} specifying the minimum mappability score
for on-target, off-target in that order.}
\item{min.fraction.offtarget}{Skip off-target regions when less than the
specified fraction of all intervals passes all filters}
\item{normalDB}{Normal database, created with
\code{\link{createNormalDatabase}}.}
}
\value{
\code{logical(length(log.ratio))} specifying which intervals should be
used in segmentation.
}
\description{
This function determines which intervals in the coverage files should be
included or excluded in the segmentation. It is called via the
\code{fun.filterIntervals} argument of \code{\link{runAbsoluteCN}}. The
arguments are passed via \code{args.filterIntervals}.
}
\examples{
normal.coverage.file <- system.file("extdata", "example_normal.txt.gz",
package = "PureCN")
normal2.coverage.file <- system.file("extdata", "example_normal2.txt.gz",
package = "PureCN")
normal.coverage.files <- c(normal.coverage.file, normal2.coverage.file)
normalDB <- createNormalDatabase(normal.coverage.files)
tumor.coverage.file <- system.file("extdata", "example_tumor.txt.gz",
package = "PureCN")
vcf.file <- system.file("extdata", "example.vcf.gz",
package = "PureCN")
interval.file <- system.file("extdata", "example_intervals.txt",
package = "PureCN")
# The max.candidate.solutions, max.ploidy and test.purity parameters are set to
# non-default values to speed-up this example. This is not a good idea for real
# samples.
ret <-runAbsoluteCN(normal.coverage.file = normal.coverage.file,
tumor.coverage.file = tumor.coverage.file,
genome = "hg19", vcf.file = vcf.file, normalDB = normalDB,
sampleid = "Sample1", interval.file = interval.file,
args.filterIntervals = list(min.targeted.base = 10), max.ploidy = 4,
test.purity = seq(0.3, 0.7, by = 0.05), max.candidate.solutions = 1)
}
\author{
Markus Riester
}