diff --git a/R/remove_noise.R b/R/remove_noise.R index d0e96a2..e4ebc74 100644 --- a/R/remove_noise.R +++ b/R/remove_noise.R @@ -60,6 +60,7 @@ load_data <- function(filename, #' @param intensity_weighted Whether to use intensity to weight mass density estimation. #' @param do.plot Indicates whether plot should be drawn. #' @param cache Whether to use cache +#' @param grouping_threshold The maximum difference between two scans to be considered the same EIC. Default is Inf. #' @return A matrix with four columns: m/z value, retention time, intensity, and group number. #' @export remove_noise <- function(filename, @@ -71,7 +72,7 @@ remove_noise <- function(filename, intensity_weighted, do.plot, cache, - grouping_threshold = 0) { + grouping_threshold = Inf) { raw.data <- load_file(filename) raw.prof <- adaptive.bin( @@ -93,7 +94,7 @@ remove_noise <- function(filename, newprof <- newprof[newprof[, 4] %in% run.sel, ] - if (grouping_threshold > 0) { + if (grouping_threshold < Inf) { sorted_newprof <- newprof[order(newprof[,2]),] new_grps <- cumsum(c(0, diff(sorted_newprof[,2])) > grouping_threshold) sorted_newprof <- cbind(sorted_newprof, new_grps, deparse.level = 0) diff --git a/conda/environment-dev.yaml b/conda/environment-dev.yaml index ba6f375..521e95e 100644 --- a/conda/environment-dev.yaml +++ b/conda/environment-dev.yaml @@ -8,7 +8,7 @@ dependencies: - icu <=70.1 - r-mass - r-rgl - - bioconductor-mzR ==2.28.0 + - bioconductor-mzR ==2.36.0 - r-splines2 - r-doparallel - r-foreach diff --git a/tests/remote-files/input.txt b/tests/remote-files/input.txt index 4afb4bf..80f98a8 100644 --- a/tests/remote-files/input.txt +++ b/tests/remote-files/input.txt @@ -6,4 +6,5 @@ https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/ https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/input/RCX_08_shortened.mzML https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/input/single_eic.mzml https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/input/alg3.mzdata -https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/input/test_file.mzXML \ No newline at end of file +https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/input/test_file.mzXML +https://gitlab.ics.muni.cz/umsa/umsa-files/-/raw/master/testdata/recetox-aplcms/input/Tribrid_201106_009-QC1_1_NEG_FISABIO_single_eic.raw.mzML \ No newline at end of file diff --git a/tests/testthat/test-remove_noise.R b/tests/testthat/test-remove_noise.R index 2691fec..7464fbc 100644 --- a/tests/testthat/test-remove_noise.R +++ b/tests/testthat/test-remove_noise.R @@ -74,3 +74,33 @@ patrick::with_parameters_test_that( ) ) ) + +test_that("remove noise works with grouping threshold", { + testdata <- file.path("..", "testdata") + input_path <- file.path(testdata, + "input", + "Tribrid_201106_009-QC1_1_NEG_FISABIO_single_eic.raw.mzML") + + expected <- tibble(group_number = c(1, 2, 3, 5, 6, 7, 8, 9), + n = c(67, 73, 3, 39, 2, 6, 3, 7)) + + sut <- remove_noise( + input_path, + min_pres = 0.8, + min_run = 0.2, + mz_tol = 5e-05, + baseline_correct = 0.0, + baseline_correct_noise_percentile = 0.05, + intensity_weighted = FALSE, + do.plot = FALSE, + cache = FALSE, + grouping_threshold = 4 + ) + + actual <- sut %>% + mutate(group = factor(group_number)) %>% + group_by(group_number) %>% + summarize(n = n()) + + expect_equal(actual, expected) +}) \ No newline at end of file