Skip to content

Commit

Permalink
fix: peak shape quality calculation for gap filling
Browse files Browse the repository at this point in the history
- Peak shape quality (similarity to gaussian shape) calculation is now performed
  using the EIC representation of a chromatographic peak, i.e. with intensities
  of mass peaks for the same retention time (but different m/z) summed.
  • Loading branch information
jorainer committed Sep 18, 2024
1 parent 2c57df3 commit dacd665
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 51 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
- Address issue #765: peak detection on chromatographic data: report a
chromatogram's `"mz"`, `"mzmin"` and `"mzmax"` as the mean m/z and lower and
upper m/z in the `chromPeaks()` matrix.
- Fix calculation of the correlation coefficient for peak shape similarity with
an idealized bell shape (*beta*) during gap filling for centWave-based
chromatographic peak detection with parameter `verboseBetaColumns = TRUE`.

## Changes in version 4.3.3

Expand Down
9 changes: 6 additions & 3 deletions R/XcmsExperiment-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -520,8 +520,9 @@
## consider adding 0 or NA intensity for those.
mat <- do.call(rbind, xsub)
if (nrow(mat)) {
nr <- vapply(xsub, nrow, NA_integer_)
## can have 0, 1 or x values per rt; repeat rt accordingly
rts <- rep(rt[keep], vapply(xsub, nrow, integer(1L)))
rts <- rep(rt[keep], nr)
maxi <- which.max(mat[, 2L])[1L]
mmz <- do.call(mzCenterFun, list(mat[, 1L], mat[, 2L]))
if (is.na(mmz)) mmz <- mat[maxi, 1L]
Expand All @@ -530,9 +531,11 @@
sum(mat[, 2L], na.rm = TRUE) *
((rtr[2L] - rtr[1L]) / max(1L, (length(keep) - 1L)))
)
if ("beta_cor" %in% cn)
if ("beta_cor" %in% cn) {
res[i, c("beta_cor", "beta_snr")] <- .get_beta_values(
mat[, 2L], rts)
vapply(xsub[nr > 0], sum, NA_real_),
rt[keep][nr > 0])
}
}
}
}
Expand Down
29 changes: 18 additions & 11 deletions R/do_findChromPeaks-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -3720,14 +3720,20 @@ peaksWithCentWave <- function(int, rt,
#' requires at least 5 scans or it will return NA for both parameters.
#'
#' @param intensity A numeric vector corresponding to the peak intensities
#'
#' @param rtime A numeric vector corresponding to the retention times of each
#' intensity. If not provided, intensities will be assumed to be equally spaced.
#' intensity. Retention times are expected to be in increasing order
#' without duplicates. If not provided, intensities will be assumed to be
#' equally spaced.
#'
#' @param skews A numeric vector of the skews to try, corresponding to the
#' shape1 of dbeta with a shape2 of 5. Values less than 5 will be increasingly
#' right-skewed, while values greater than 5 will be left-skewed.
#' shape1 of dbeta with a shape2 of 5. Values less than 5 will be
#' increasingly right-skewed, while values greater than 5 will be
#' left-skewed.
#'
#' @param zero.rm Boolean value controlling whether "missing" scans are dropped
#' prior to curve fitting. The default, TRUE, will remove intensities of zero
#' or NA
#' prior to curve fitting. The default, TRUE, will remove intensities of
#' zero or NA
#'
#' @author William Kumler
#'
Expand All @@ -3740,21 +3746,22 @@ peaksWithCentWave <- function(int, rt,
rtime <- rtime[keep]
intensity <- intensity[keep]
}
if(length(intensity)<5){
if (length(intensity) < 5) {
best_cor <- NA
beta_snr <- NA
} else {
beta_sequence <- rep(.scale_zero_one(rtime), each=length(skews))
beta_sequence <- rep(.scale_zero_one(rtime), each = length(skews))
beta_vals <- t(matrix(dbeta(beta_sequence, shape1 = skews, shape2 = 5),
nrow = length(skews)))
# matplot(beta_vals)
beta_cors <- cor(intensity, beta_vals)
best_cor <- max(beta_cors)
best_curve <- beta_vals[, which.max(beta_cors)]
noise_level <- sd(diff(.scale_zero_one(best_curve)-.scale_zero_one(intensity)))
beta_snr <- log10(max(intensity)/noise_level)
noise_level <- sd(diff(.scale_zero_one(best_curve) -
.scale_zero_one(intensity)))
beta_snr <- log10(max(intensity) / noise_level)
}
c(best_cor=best_cor, beta_snr=beta_snr)
c(best_cor = best_cor, beta_snr = beta_snr)
}


Expand All @@ -3769,5 +3776,5 @@ peaksWithCentWave <- function(int, rt,
#'
#' @noRd
.scale_zero_one <- function(num_vec){
(num_vec-min(num_vec))/(max(num_vec)-min(num_vec))
(num_vec-min(num_vec)) / (max(num_vec) - min(num_vec))
}
12 changes: 6 additions & 6 deletions R/functions-XCMSnExp.R
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ dropGenericProcessHistory <- function(x, fun) {
valsPerSpect = valsPerSpect, rtrange = rtr,
mzrange = mzr)
if (length(mtx)) {
## mtx: time, mz, intensity
if (any(!is.na(mtx[, 3]))) {
## How to calculate the area: (1)sum of all intensities / (2)by
## the number of data points (REAL ones, considering also NAs)
Expand All @@ -290,21 +291,20 @@ dropGenericProcessHistory <- function(x, fun) {
## as e.g. centWave. Using max(1, ... to avoid getting Inf in
## case the signal is based on a single data point.
## (3) rtr[2] - rtr[1]
res[i, "into"] <- sum(mtx[, 3], na.rm = TRUE) *
res[i, "into"] <- sum(mtx[, 3L], na.rm = TRUE) *
((rtr[2] - rtr[1]) /
max(1, (sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1)))
maxi <- which.max(mtx[, 3])
maxi <- which.max(mtx[, 3L])
res[i, c("rt", "maxo")] <- mtx[maxi[1], c(1, 3)]
res[i, c("rtmin", "rtmax")] <- rtr
## Calculate the intensity weighted mean mz
meanMz <- do.call(mzCenterFun, list(mtx[, 2], mtx[, 3]))
if (is.na(meanMz)) meanMz <- mtx[maxi[1], 2]
res[i, "mz"] <- meanMz

if ("beta_cor" %in% cn) {
if ("beta_cor" %in% cn)
res[i, c("beta_cor", "beta_snr")] <- .get_beta_values(
mtx[, 3L], mtx[, 1L])
}
vapply(split(mtx[, 3L], mtx[, 1L]), sum, NA_real_),
unique(mtx[, 1L]))
} else {
res[i, ] <- rep(NA_real_, ncols)
}
Expand Down
77 changes: 46 additions & 31 deletions tests/testthat/test_do_findChromPeaks-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,29 +46,29 @@ test_that("beta calculation returns expected values", {
expect_lt(.get_beta_values(1:10, zero.rm = FALSE)["beta_snr"], 2)
expect_lt(.get_beta_values(1:10)["best_cor"], 0.0001)
expect_lt(.get_beta_values(1:10)["beta_snr"], 2)

ideal_beta <- dbeta(seq(0, 1, length.out=10), 5, 5)
expect_gte(.get_beta_values(ideal_beta, zero.rm = FALSE)["best_cor"], 1)
expect_gte(.get_beta_values(ideal_beta, zero.rm = FALSE)["beta_snr"], 16)
expect_gte(.get_beta_values(ideal_beta)["best_cor"], 0.97)
expect_gte(.get_beta_values(ideal_beta)["beta_snr"], 1)

skew_beta <- dbeta(seq(0, 1, length.out=10), 3, 5)
expect_gte(.get_beta_values(ideal_beta, zero.rm = FALSE)["best_cor"], 1)
expect_gte(.get_beta_values(ideal_beta, zero.rm = FALSE)["beta_snr"], 16)
expect_gte(.get_beta_values(ideal_beta)["best_cor"], 0.97)
expect_gte(.get_beta_values(ideal_beta)["beta_snr"], 1)

rightskew_beta <- dbeta(seq(0, 1, length.out=10), 7, 5)
expect_gt(.get_beta_values(rightskew_beta, skews = c(3,5,7))["best_cor"], 0.95)

noise_beta <- dbeta(seq(0, 1, length.out=21), 5, 5)*10+runif(21)
expect_gt(.get_beta_values(noise_beta)["best_cor"], 0.9)

expect_no_error(.get_beta_values(runif(1)))
expect_no_error(.get_beta_values(runif(10)))
expect_no_error(.get_beta_values(runif(100)))

expect_length(.get_beta_values(1), 2)
expect_true(is.na(.get_beta_values(1)["best_cor"]))
expect_true(is.na(.get_beta_values(1)["beta_snr"]))
Expand All @@ -78,31 +78,31 @@ test_that("New beta columns perform as expected", {
# faahko_xod comes from testthat.R
# faahko_xod <- findChromPeaks(
# faahko_od, param = CentWaveParam(noise = 10000, snthresh = 40,
# prefilter = c(3, 10000)))
# prefilter = c(3, 10000)))
# Same params as before but with verboseBetaColumns = TRUE
beta_cwp <- CentWaveParam(noise = 10000, snthresh = 40,
prefilter = c(3, 10000),
beta_cwp <- CentWaveParam(noise = 10000, snthresh = 40,
prefilter = c(3, 10000),
verboseBetaColumns = TRUE)
faahko_xod_beta <- findChromPeaks(faahko_od, beta_cwp)

# Unit test - check that the new object contains expected columns
expect_contains(colnames(chromPeaks(faahko_xod_beta)),
expect_contains(colnames(chromPeaks(faahko_xod_beta)),
c("beta_cor", "beta_snr"))

# Unit test - check that everything else in the object is the same
orig_chrompeaks <- chromPeaks(faahko_xod)
beta_chrompeaks <- chromPeaks(faahko_xod_beta)
expect_identical(orig_chrompeaks, beta_chrompeaks[,colnames(orig_chrompeaks)])

# Object will contain NAs because there are peaks <5 scans wide
expect_true(any(is.na(beta_chrompeaks[,"beta_snr"])))
beta_chrompeaks <- beta_chrompeaks[!is.na(beta_chrompeaks[,"beta_cor"]),]

# Unit test - check that beta values make sense
expect_true(all(beta_chrompeaks[,"beta_cor"]<=1))
expect_true(all(beta_chrompeaks[,"beta_cor"]>=-1))
expect_true(all(beta_chrompeaks[,"beta_snr"]>=0))

# Unit test - finds a single good peak (beta_cor>0.8, beta_snr>7)
# Skinny peak copied from below peaksWithCentWave tests
skinny_peak <- c(9107, 3326, 9523, 3245, 3429, 9394, 1123, 935, 5128, 8576,
Expand All @@ -113,33 +113,33 @@ test_that("New beta columns perform as expected", {
8283, 3410, 5935, 3332, 7041, 3284, 7478, 76, 3739, 2158, 5507)
skinny_peak_rt <- seq_along(skinny_peak)+100
cw_output_beta <- .centWave_orig(int = skinny_peak, scantime = skinny_peak_rt,
mz=sort(rnorm(60)/1000+100),
mz=sort(rnorm(60)/1000+100),
valsPerSpect = rep(1, length(skinny_peak)),
peakwidth = c(20, 80), extendLengthMSW = TRUE,
peakwidth = c(20, 80), extendLengthMSW = TRUE,
verboseBetaColumns = TRUE, snthresh = 0)
expect_equal(nrow(cw_output_beta), 1)
# Known values to ensure performance doesn't degrade unexpectedly
expect_gt(cw_output_beta[,"beta_cor"], 0.8)
expect_gt(cw_output_beta[,"beta_snr"], 7)

# Unit test - finds a single noise peak (beta_cor < 0.5, beta_snr < 6)
# set.seed(123)
# noise_peak <- round(runif(100), 3)
noise_peak <- c(0.288, 0.788, 0.409, 0.883, 0.94, 0.046, 0.528, 0.892, 0.551,
0.457, 0.957, 0.453, 0.678, 0.573, 0.103, 0.9, 0.246, 0.042,
0.328, 0.955, 0.89, 0.693, 0.641, 0.994, 0.656, 0.709, 0.544,
0.594, 0.289, 0.147, 0.963, 0.902, 0.691, 0.795, 0.025, 0.478,
0.758, 0.216, 0.318, 0.232, 0.143, 0.415, 0.414, 0.369, 0.152,
0.139, 0.233, 0.466, 0.266, 0.858, 0.046, 0.442, 0.799, 0.122,
0.561, 0.207, 0.128, 0.753, 0.895, 0.374, 0.665, 0.095, 0.384,
0.274, 0.815, 0.449, 0.81, 0.812, 0.794, 0.44, 0.754, 0.629,
0.71, 0.001, 0.475, 0.22, 0.38, 0.613, 0.352, 0.111, 0.244,
0.668,
0.418, 0.788, 0.103, 0.435, 0.985, 0.893, 0.886, 0.175, 0.131,
noise_peak <- c(0.288, 0.788, 0.409, 0.883, 0.94, 0.046, 0.528, 0.892, 0.551,
0.457, 0.957, 0.453, 0.678, 0.573, 0.103, 0.9, 0.246, 0.042,
0.328, 0.955, 0.89, 0.693, 0.641, 0.994, 0.656, 0.709, 0.544,
0.594, 0.289, 0.147, 0.963, 0.902, 0.691, 0.795, 0.025, 0.478,
0.758, 0.216, 0.318, 0.232, 0.143, 0.415, 0.414, 0.369, 0.152,
0.139, 0.233, 0.466, 0.266, 0.858, 0.046, 0.442, 0.799, 0.122,
0.561, 0.207, 0.128, 0.753, 0.895, 0.374, 0.665, 0.095, 0.384,
0.274, 0.815, 0.449, 0.81, 0.812, 0.794, 0.44, 0.754, 0.629,
0.71, 0.001, 0.475, 0.22, 0.38, 0.613, 0.352, 0.111, 0.244,
0.668,
0.418, 0.788, 0.103, 0.435, 0.985, 0.893, 0.886, 0.175, 0.131,
0.653, 0.344, 0.657, 0.32, 0.188, 0.782, 0.094, 0.467, 0.512)
cw_output_beta <- .centWave_orig(int = noise_peak*100000,
cw_output_beta <- .centWave_orig(int = noise_peak*100000,
scantime = seq_along(noise_peak),
mz=rep(530.1, length(noise_peak)),
mz=rep(530.1, length(noise_peak)),
valsPerSpect = rep(1, length(noise_peak)),
peakwidth = c(20, 80), extendLengthMSW = TRUE,
verboseBetaColumns = TRUE, snthresh = 0)
Expand All @@ -149,6 +149,21 @@ test_that("New beta columns perform as expected", {
expect_lt(cw_output_beta[,"beta_snr"], 6)
})

test_that(".get_beta_values works with chromatographic and MS data", {
vals <- c(2, 3, 4, 6, 8, 10, 7, 6, 4, 3, 2)
res <- .get_beta_values(vals)
## intensity values with duplicated retention times
vals <- c(2, 3, 4, 6, 2, 8, 10, 7, 6, 4, 3, 2)
rts <- c(1, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10)
res_a <- .get_beta_values(vals)
expect_true(res_a[1L] < res[1L])
## WARNING: does not work if we have duplicated retentio times!
expect_warning(
res_b <- .get_beta_values(vals, rts)
)
expect_true(is.na(res_b[1L]))
})

test_that("do_findChromPeaks_centWaveWithPredIsoROIs works", {
skip_on_os(os = "windows", arch = "i386")

Expand Down

0 comments on commit dacd665

Please sign in to comment.