Skip to content

Commit

Permalink
provided new versions of adjust_time
Browse files Browse the repository at this point in the history
  • Loading branch information
hechth committed Jul 23, 2024
1 parent 7889ed5 commit aa37e31
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 59 deletions.
116 changes: 89 additions & 27 deletions R/adjust.time.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,29 +34,60 @@ compute_template_adjusted_rt <- function(combined, sel, j) {
return(all_features)
}

compute_corrected_features_v2 <- function(features, template_rt, delta_rt) {
features <- features |> dplyr::arrange_at(c("rt", "mz"))
idx <- dplyr::between(features$rt, min(template_rt), max(template_rt))
to_correct <- (features |> dplyr::filter(idx))$rt

this.smooth <- ksmooth(
template_rt,
delta_rt,
kernel = "normal",
bandwidth = (max(delta_rt) - min(delta_rt)) / 5,
x.points = to_correct
)

lower_bound_adjustment <- mean(this.smooth$y[this.smooth$x == min(this.smooth$x)])
upper_bound_adjustment <- mean(this.smooth$y[this.smooth$x == max(this.smooth$x)])

features <- features |>
dplyr::mutate(rt = dplyr::case_when(
rt < min(template_rt) ~ rt + lower_bound_adjustment,
rt > max(template_rt) ~ rt + upper_bound_adjustment
))
features[idx, "rt"] <- to_correct + this.smooth$y
return(features |> dplyr::arrange_at(c("mz", "rt")))
}

#' @export
compute_corrected_features <- function(features, delta_rt, avg_time) {
features <- features[order(features$rt, features$mz), ]
features <- features |> dplyr::arrange_at(c("rt", "mz"))

corrected <- features$rt
original <- features$rt
to_correct <- original[original >= min(delta_rt) &
original <= max(delta_rt)]

this.smooth <- ksmooth(delta_rt, avg_time,
idx <- dplyr::between(original, min(delta_rt), max(delta_rt))
to_correct <- original[idx]
this.smooth <- ksmooth(
delta_rt,
avg_time,
kernel = "normal",
bandwidth = (max(delta_rt) - min(delta_rt)) / 5,
x.points = to_correct
)

corrected[dplyr::between(original, min(delta_rt), max(delta_rt))] <-
this.smooth$y + to_correct
corrected[original < min(delta_rt)] <- corrected[original < min(delta_rt)] +
mean(this.smooth$y[this.smooth$x == min(this.smooth$x)])
corrected[original > max(delta_rt)] <- corrected[original > max(delta_rt)] +
mean(this.smooth$y[this.smooth$x == max(this.smooth$x)])
corrected[idx] <- this.smooth$y + to_correct
lower_bound_adjustment <- mean(this.smooth$y[this.smooth$x == min(this.smooth$x)])
upper_bound_adjustment <- mean(this.smooth$y[this.smooth$x == max(this.smooth$x)])

idx_lower <- original < min(delta_rt)
idx_upper <- original > max(delta_rt)

corrected[idx_lower] <- corrected[idx_lower] + lower_bound_adjustment
corrected[idx_upper] <- corrected[idx_upper] + upper_bound_adjustment
features$rt <- corrected
features <- features[order(features$mz, features$rt), ]

return(features)
}

Expand All @@ -73,18 +104,6 @@ fill_missing_values <- function(orig.feature, this.feature) {
return(this.feature)
}

#' @export
compute_template <- function(extracted_features) {
num.ftrs <- sapply(extracted_features, nrow)
template_id <- which.max(num.ftrs)
template <- extracted_features[[template_id]]$sample_id[1]
message(paste("the template is sample", template))

candi <- tibble::as_tibble(extracted_features[[template_id]]) |> dplyr::select(c(mz, rt, cluster))
template_features <- dplyr::bind_cols(candi, sample_id = rep(template, nrow(candi)))
return(tibble::as_tibble(template_features))
}

#' @export
correct_time <- function(this.feature, template_features) {
orig.features <- this.feature
Expand All @@ -99,11 +118,12 @@ correct_time <- function(this.feature, template_features) {
stop("too few, aborted")
} else {
all.ftr.table <- compute_template_adjusted_rt(this.comb, sel, j)
# the to be adjusted time
this.diff <- all.ftr.table[, 2]
# the difference between the true time and the to-be-adjusted time
avg_time <- all.ftr.table[, 1] - this.diff
this.feature <- compute_corrected_features(this.feature, this.diff, avg_time)

this.feature <- compute_corrected_features(
this.feature,
all.ftr.table[, 2], # the to be adjusted time
all.ftr.table[, 1] - all.ftr.table[, 2] # the difference between the true time and the to-be-adjusted time
)
}
}

Expand All @@ -117,6 +137,48 @@ correct_time <- function(this.feature, template_features) {
return(tibble::as_tibble(this.feature, column_name = c("mz", "rt", "sd1", "sd2", "area", "sample_id", "cluster")))
}

#' @export
compute_template <- function(extracted_features) {
num.ftrs <- sapply(extracted_features, nrow)
template_id <- which.max(num.ftrs)
template <- extracted_features[[template_id]]$sample_id[1]
message(paste("the template is sample", template))

candi <- tibble::as_tibble(extracted_features[[template_id]]) |> dplyr::select(c(mz, rt, cluster))
template_features <- dplyr::bind_cols(candi, sample_id = rep(template, nrow(candi)))
return(tibble::as_tibble(template_features))
}

correct_time_v2 <- function(features, template) {
if (unique(features$sample_id) == unique(template$sample_id))
return(tibble::as_tibble(features))

subsets <- template |>
dplyr::bind_rows(
features |> dplyr::select(c(mz, rt, cluster, sample_id))
) |>
dplyr::arrange_at(c("cluster", "mz")) |>
dplyr::group_by(cluster) |>
dplyr::mutate(count = dplyr::n_distinct(sample_id)) |>
filter(count == 2) |>
dplyr::add_count() |>
filter(n == 2) |>
dplyr::ungroup() |>
dplyr::group_by(sample_id) |>
dplyr::group_split()

all_features_new <- cbind(subsets[[1]]$rt, subsets[[2]]$rt)
all_features_new_order <- order(all_features_new[, 2])
all_features_new_arranged <- all_features_new[all_features_new_order,]

corrected <- compute_corrected_features_v2(
features,
all_features_new_arranged[, 2],
all_features_new_arranged[, 1] - all_features_new_arranged[, 2]
)
return(tibble::as_tibble(corrected))
}

#' Adjust retention time across spectra.
#'
#' This function adjusts the retention time in each LC/MS profile to achieve better between-profile agreement.
Expand Down
39 changes: 7 additions & 32 deletions tests/testthat/test-adjust-time.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,44 +42,19 @@ patrick::with_parameters_test_that(
)
)

test_that("compute_sel", {
testdata <- file.path("..", "testdata")
template_features <- file.path(testdata, "template", "RCX_shortened.parquet")
template_features <- arrow::read_parquet(template_features)
test_that("correct_time_v2 is close to correct time", {
template_features <- arrow::read_parquet(
file.path("..", "testdata", "template", "RCX_shortened.parquet")
)

clustered_table <- read_parquet_files(
c("RCX_06_shortened"),
"clusters",
"_extracted_clusters.parquet"
)[[1]]

all_features <- compute_template_adjusted_rt(
compute_comb(template_features, clustered_table),
compute_sel(combined),
"RCX_07_shortened"
)

subsets <- template_features |>
dplyr::bind_rows(clustered_table |> dplyr::select(c(mz, rt, cluster, sample_id))) |>
dplyr::arrange_at(c("cluster","mz")) |>
dplyr::group_by(cluster) |>
mutate(count = n_distinct(sample_id)) |>
filter(count == 2) |>
add_count() |>
filter(n == 2) |>
ungroup() |>
group_by(sample_id) |>
group_split()

all_features_new <- cbind(subsets[[1]]$rt, subsets[[2]]$rt)
all_features_new_order <- order(all_features_new[,2])
all_features_new_arranged <- all_features_new[all_features_new_order,]

this.feature <- compute_corrected_features(
clustered_table,
all_features_new_arranged[, 2],
all_features_new_arranged[, 1] - all_features_new_arranged[, 2]
)
expected <- correct_time(clustered_table, template_features)
actual <- correct_time_v2(clustered_table, template_features)

browser()
expect_equal(actual, expected, tolerance = 0.02)
})

0 comments on commit aa37e31

Please sign in to comment.