Skip to content

Commit

Permalink
fixing Recal/processKnown function
Browse files Browse the repository at this point in the history
  • Loading branch information
KristinaGomoryova committed Sep 11, 2024
1 parent a866d78 commit 98078af
Show file tree
Hide file tree
Showing 7 changed files with 11,873 additions and 36 deletions.
59 changes: 23 additions & 36 deletions R/RecalFunctions.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
O_mass <- 15.9949146223
H2_mass <- 2.01565

#' Plot Spectrum Function
#'
#' This function generates a mass spectrum plot using ggplot2.
Expand Down Expand Up @@ -72,21 +75,27 @@ processMassList <- function(data) {
#' @param kmd_col The column name for KMD in both datasets.
#' @param z_col The column name for Z in both datasets.
#' @param num_col The column name to store the calculated number of compounds.
#' @param type The column name to store the type of compound.
#' @param type_col The column name to store the type of compound.
#' @param type A molecule type of the annotation.
#' @param multiplier The multiplier for the num column.
#' @param element_mass The mass of the group used for correction.
#' @param step_limit The limit for considering compounds.
#' @param remove_indices Indices of rows to be removed from the result.
#'
#' @return A processed dataset with updated abundance and type columns.
processKnown <- function(rest, known, kmd_col, z_col, num_col, type, step_limit, remove_indices) {
processKnown <- function(rest, known, kmd_col, z_col, num_col, type_col, type, multiplier, element_mass, step_limit, remove_indices) {
names(known)[2] <- "base_mass"

step_result <- merge(rest, known, by.x = c(kmd_col, z_col), by.y = c(kmd_col, z_col))
step_result[[num_col]] <- round((step_result$Exp_mass - step_result$base_mass) / step_limit)
step_result[[type]] <- step_result[[type]] + step_result[[num_col]]
step_result$Type <- type
step_result$form <- paste(step_result[c("C", "H", "O", "N", "S", "P", "E", "S34", "N15", "D", "Cl", "Fl", "Cl37", "M", "NH4", "POE", "NOE")], sep = "_")
step_result[[num_col]] <- round((step_result$Exp_mass - step_result$base_mass) / element_mass)
step_result[[type_col]] <- step_result[[type_col]] + step_result[[num_col]]*multiplier
step_result$Type <- type
step_result <- dplyr::mutate(
step_result,
form=paste(C, H, O, N, S, P, E, S34, N15, D, Cl, Fl, Cl37, M, NH4, POE, NOE, sep="_")
)
step_result <- step_result[abs(step_result[[num_col]]) <= step_limit, ]
step_result <- step_result[-remove_indices, ]
step_result <- step_result[-remove_indices]
return(step_result)
}

Expand Down Expand Up @@ -217,10 +226,10 @@ Recal <- function(df,
############
# Picking recalibrants with series
# Process known_O
Step2 <- processKnown(Rest, RecalList[c(1:21, 24, 25)], "KMD_O", "z_O", "O_num", "O", step_O, c(10, 31))
Step2 <- processKnown(Rest, RecalList[c(1:21, 24, 25)], "KMD_O", "z_O", "O_num", "O", "O", 1, O_mass, step_O, c(10, 31))

# Process known_H2
Step3 <- processKnown(Rest, RecalList[c(1:21, 27, 28)], "KMD_H2", "z_H2", "H2_num", "H2", step_H2, c(10, 31))
Step3 <- processKnown(Rest, RecalList[c(1:21, 27, 28)], "KMD_H2", "z_H2", "H2_num", "H", "H2", 2, H2_mass, step_H2, c(10, 31))

Out <- rbind(Step2, Step3)
Out2 <- dplyr::distinct(Out, Exp_mass)
Expand Down Expand Up @@ -287,33 +296,11 @@ Recal <- function(df,
############
# Picking recalibrants with series

knownO <- RecalList[c(1:22, 25, 26)]
names(knownO)[2] <- "base_mass"
Step2 <- merge(Rest, knownO, by.x = c("KMD_O", "z_O"), by.y = c("KMD_O", "z_O"))
Step2$O_num <- round(((Step2$Exp_mass - Step2$base_mass)) / 15.9949146223)
Step2$O <- Step2$O + Step2$O_num
Step2$Type <- "O"
Step2$form <- paste(Step2$C, Step2$H, Step2$O, Step2$N, Step2$S, Step2$P, Step2$E, Step2$S34,
Step2$N15, Step2$D, Step2$Cl, Step2$Fl, Step2$Cl37, Step2$M, Step2$NH4,
Step2$POE, Step2$NOE,
sep = "_"
)
Step2 <- Step2[abs(Step2$O_num) <= step_O, ]
Step2 <- Step2[-c(11, 13, 33)] #

knownH2 <- RecalList[c(1:22, 28, 29)]
names(knownH2)[2] <- "base_mass"
Step3 <- merge(Rest, knownH2, by.x = c("KMD_H2", "z_H2"), by.y = c("KMD_H2", "z_H2"))
Step3$H2_num <- round(((Step3$Exp_mass - Step3$base_mass)) / 2.01565)
Step3$H <- Step3$H + 2 * Step3$H2_num
Step3$Type <- "H2"
Step3$form <- paste(Step3$C, Step3$H, Step3$O, Step3$N, Step3$S, Step3$P, Step3$E, Step3$S34,
Step3$N15, Step3$D, Step3$Cl, Step3$Fl, Step3$Cl37, Step3$M, Step3$NH4,
Step3$POE, Step3$NOE,
sep = "_"
)
Step3 <- Step3[abs(Step3$H2_num) <= step_H2, ]
Step3 <- Step3[-c(11, 13, 33)]
# Process known O16
Step2 <- processKnown(Rest, RecalList[c(1:22, 25, 26)], "KMD_O", "z_O", "O_num", "O", "O", 1, O_mass, step_O, c(11, 13, 33))

# Process known_H2
Step3 <- processKnown(Rest, RecalList[c(1:22, 28, 29)], "KMD_H2", "z_H2", "H2_num", "H", "H2", 2, H2_mass, step_H2, c(11, 13, 33))

Out <- rbind(Step2, Step3)
Out2 <- dplyr::distinct(Out, Exp_mass, .keep_all = TRUE)
Expand Down
26 changes: 26 additions & 0 deletions tests/testthat/test-RecalFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,29 @@ patrick::with_parameters_test_that("RecalList works",
},
mode = c("neg", "pos"),
)

test_that("Replicate the Recal bug", {
unambig <- read.delim("test-data/bug_unambig.tabular")
mono <- read.delim("test-data/bug_mono.tabular")
iso <- read.delim("test-data/bug_iso.tabular")
recallist <- read.delim("test-data/bug_recalseries.tabular")

actual <- MFAssignR::Recal(
df = unambig,
peaks = mono,
isopeaks = iso,
mode = "neg",
SN = 6*346.0706,
series1 = recallist$Series[1],
series2 = recallist$Series[2],
series3 = recallist$Series[3],
series4 = recallist$Series[4],
series5 = recallist$Series[5],
step_O = 3,
step_H2 = 5,
mzRange = 50,
CalPeak = 150
)
expected <- readRDS("test-data/bug_recal_expected.rds")
expect_equal(actual, expected)
})
Loading

0 comments on commit 98078af

Please sign in to comment.