Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gdr 2643 #161

Merged
merged 15 commits into from
Aug 26, 2024
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Type: Package
Package: gDRcore
Title: Processing functions and interface to process and analyze drug
dose-response data
Version: 1.3.10
Date: 2024-08-19
Version: 1.3.11
Date: 2024-08-21
Authors@R: c(
person("Bartosz", "Czech", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0002-9908-3007")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ export(map_ids_to_fits)
export(merge_data)
export(normalize_SE)
export(prepare_input)
export(process_perturbations)
export(remove_drug_batch)
export(replace_conc_with_standardized_conc)
export(runDrugResponseProcessingPipeline)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## gDRcore 1.3.11 - 2024-08-21
* identify additional perturbations hidden in the secondary drug

## gDRcore 1.3.10 - 2024-08-19
* utilize `calc_sd` function

Expand Down
126 changes: 111 additions & 15 deletions R/data_type.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,25 +44,30 @@ identify_data_type <- function(df,
codilution_conc = 2,
matrix_conc = 1) {

# find the pairs of drugs with relevant metadata
j-smola marked this conversation as resolved.
Show resolved Hide resolved
drug_ids <- unlist(gDRutils::get_env_identifiers(
c("drug_name", "drug_name2", "drug_name3"),
simplify = FALSE
))
drugs_ids <- drug_ids[which(drug_ids %in% names(df))]

conc_ids <- unlist(gDRutils::get_env_identifiers(
c("concentration", "concentration2", "concentration3"),
simplify = FALSE
))
conc_ids <- conc_ids[which(conc_ids %in% names(df))]
# Get drug and concentration identifiers
drug_ids <- get_relevant_ids(c("drug_name", "drug_name2", "drug_name3"), df)
conc_ids <- get_relevant_ids(c("concentration", "concentration2", "concentration3"), df)

drugs_cotrt_ids <- drugs_ids[!names(drugs_ids) %in% "drug_name"]
conc_cotrt_ids <- conc_ids[!names(conc_ids) %in% "concentration"]
# Filter out primary drug and concentration identifiers
drugs_cotrt_ids <- setdiff(drug_ids, "drug_name")
bczech marked this conversation as resolved.
Show resolved Hide resolved
conc_cotrt_ids <- setdiff(conc_ids, "concentration")

# Get untreated tag and cell line identifiers
untreated_tag <- c(gDRutils::get_env_identifiers("untreated_tag"), NA)
cell <- gDRutils::get_env_identifiers("cellline_name")
cols_pairs <- intersect(names(df), c(drug_ids, cell, conc_ids))

# Process perturbations
dt <- process_perturbations(df, drugs_cotrt_ids, conc_cotrt_ids, untreated_tag)

# Double-check the columns after processing perturbations
drug_ids <- get_relevant_ids(c("drug_name", "drug_name2", "drug_name3"), df)
bczech marked this conversation as resolved.
Show resolved Hide resolved
conc_ids <- get_relevant_ids(c("concentration", "concentration2", "concentration3"), df)

drugs_cotrt_ids <- setdiff(drug_ids, "drug_name")
conc_cotrt_ids <- setdiff(conc_ids, "concentration")


cols_pairs <- intersect(names(df), c(drug_ids, cell, conc_ids))
drug_pairs <- unique(df[, cols_pairs, with = FALSE])

df[, record_id := .I]
Expand Down Expand Up @@ -252,6 +257,18 @@ split_raw_data <- function(df,
}, df_list)
}

#' @keywords internal
#' Function to get relevant identifiers from the environment
#' @param identifiers A character vector of identifier names to fetch from the environment
#' @param df A data frame containing the columns to be checked against the identifiers
bczech marked this conversation as resolved.
Show resolved Hide resolved
#' @return A character vector of relevant identifiers that are present in the data frame
#' @keywords internal
get_relevant_ids <- function(identifiers, df) {
bczech marked this conversation as resolved.
Show resolved Hide resolved
ids <- unlist(gDRutils::get_env_identifiers(identifiers, simplify = FALSE))
ids[ids %in% names(df)]
}


#' @keywords internal
#'
#' ensure that the pair of drugs are matching such that there are not data with
Expand Down Expand Up @@ -324,3 +341,82 @@ collapse_drugs <- function(df) {
}
df
}




#' Cleanup additional perturbations in the data.table
#'
#' This function processes drug and concentration columns in a data.table.
#' It checks if there is only one unique drug (excluding a specified untreated tag)
#' and if there are exactly two doses (one of which is 0). If these conditions are met,
#' it creates a new column named after the drug and fills it with the doses,
#' then removes the original drug and concentration columns.
#'
#' @param dt A data.table containing the data.
#' @param drugs_cotrt_ids A vector of column names related to drugs.
#' @param conc_cotrt_ids A vector of column names related to concentrations.
#' @param untreated_tag A string representing the untreated tag (default is "vehicle").
#' @return A modified data.table with new columns for the drugs and removed original drug and concentration columns.
#' @examples
#' library(data.table)
bczech marked this conversation as resolved.
Show resolved Hide resolved
#' dt <- data.table(
#' drug1 = c("vehicle", "drugA", "drugA"),
#' conc1 = c(0, 10, 0),
#' drug2 = c("vehicle", "drugB", "drugB"),
#' conc2 = c(0, 20, 0)
#' )
#' drugs_cotrt_ids <- c("drug1", "drug2")
#' conc_cotrt_ids <- c("conc1", "conc2")
#' dt <- process_drug_doses(dt, drugs_cotrt_ids, conc_cotrt_ids)
#' print(dt)
#' @export
bczech marked this conversation as resolved.
Show resolved Hide resolved
process_perturbations <- function(dt,
drugs_cotrt_ids,
conc_cotrt_ids,
untreated_tag = "vehicle") {

# Assertions
checkmate::assert_data_table(dt)
checkmate::assert_character(drugs_cotrt_ids, any.missing = FALSE)
bczech marked this conversation as resolved.
Show resolved Hide resolved
checkmate::assert_character(conc_cotrt_ids, any.missing = FALSE)
checkmate::assert_true(length(drugs_cotrt_ids) == length(conc_cotrt_ids))

bczech marked this conversation as resolved.
Show resolved Hide resolved

# If lengths of drugs_cotrt_ids and conc_cotrt_ids are 0, return dt unchanged
if (length(drugs_cotrt_ids) == 0 && length(conc_cotrt_ids) == 0) {
return(dt)
} else {
bczech marked this conversation as resolved.
Show resolved Hide resolved

# Iterate through each pair of columns in drugs_cotrt_ids and conc_cotrt_ids
for (i in seq_along(drugs_cotrt_ids)) {
drug_col <- drugs_cotrt_ids[i]
conc_col <- conc_cotrt_ids[i]

# Check if there is only one drug in the current drug column (excluding untreated_tag)
unique_drugs <- unique(dt[[drug_col]])
unique_drugs <- unique_drugs[!unique_drugs %in% untreated_tag]

if (length(unique_drugs) == 1) {
# Check if there are only two doses in the current concentration column (0 and another value)
unique_doses <- unique(dt[[conc_col]])

if (length(unique_doses) == 2 && 0 %in% unique_doses) {
# Create a new column named after the drug (excluding untreated_tag) and fill it with the doses
new_column_name <- unique_drugs[1]
dt[, (new_column_name) := dt[[conc_col]]]

drug_order <- gsub(".*_(\\d+)$|.*", "\\1", drug_col)
bczech marked this conversation as resolved.
Show resolved Hide resolved
drug_cols <-
get_relevant_ids(paste0(
c("drug", "drug_name", "drug_moa", "concentration"), drug_order),
dt)

# Remove the current drug and concentration columns
dt[, (drug_cols, drug_col, conc_col) := NULL]
}
}
}
return(dt)
}
}
45 changes: 45 additions & 0 deletions man/process_perturbations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

70 changes: 69 additions & 1 deletion tests/testthat/test-data_type.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,72 @@ test_that("collapse drugs works as expected", {
expect_equal(dt_collapsed[[cols$drug]], "drug2")
expect_equal(dt_collapsed[[cols$drug2]], "drug3")
expect_equal(dt_collapsed[[cols$drug3]], "vehicle")
})
})


test_that("process_perturbations works as expected", {
dt <- data.table::data.table(
drug1 = c("vehicle", "drugA", "drugA"),
conc1 = c(0, 10, 0),
drug2 = c("vehicle", "drugB", "drugB"),
conc2 = c(0, 20, 0)
)

drugs_cotrt_ids <- c("drug1", "drug2")
conc_cotrt_ids <- c("conc1", "conc2")

result <- process_perturbations(dt, drugs_cotrt_ids, conc_cotrt_ids)

expected <- data.table::data.table(
drugA = c(0, 10, 0),
drugB = c(0, 20, 0)
)

expect_equal(result, expected)


dt2 <- data.table::data.table(
drug1 = c("vehicle", "drugA", "drugA"),
conc1 = c(0, 10, 0),
drug2 = c("vehicle", "drugB", "drugB"),
conc2 = c(0, 20, 0),
drug3 = c("vehicle", "drugC", "drugC"),
conc3 = c(0, 30, 0)
)

drugs_cotrt_ids <- c("drug1", "drug2", "drug3")
conc_cotrt_ids <- c("conc1", "conc2", "conc3")

result <- process_perturbations(dt2, drugs_cotrt_ids, conc_cotrt_ids)

expected <- data.table::data.table(
drugA = c(0, 10, 0),
drugB = c(0, 20, 0),
drugC = c(0, 30, 0)
)
expect_equal(result, expected)


dt3 <- data.table::data.table(
drug1 = c("vehicle", "drugA", "drugB"),
conc1 = c(0, 10, 2),
drug2 = c("vehicle", "drugB", "drugB"),
conc2 = c(0, 20, 0),
drug3 = c("vehicle", "drugC", "drugC"),
conc3 = c(0, 30, 0)
)

drugs_cotrt_ids <- c("drug1", "drug2", "drug3")
conc_cotrt_ids <- c("conc1", "conc2", "conc3")

result <- process_perturbations(dt3, drugs_cotrt_ids, conc_cotrt_ids)

expected <- data.table::data.table(
drug1 = c("vehicle", "drugA", "drugB"),
conc1 = c(0, 10, 2),
drugB = c(0, 20, 0),
drugC = c(0, 30, 0)
)
expect_equal(result, expected)
})

Loading