gdrplatform · bczech · Aug 26, 2024 · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024
@@ -2,8 +2,8 @@ Type: Package
 Package: gDRcore
 Title: Processing functions and interface to process and analyze drug
     dose-response data
-Version: 1.3.10
-Date: 2024-08-19
+Version: 1.3.11
+Date: 2024-08-21
 Authors@R: c(
     person("Bartosz", "Czech", , "[email protected]", role = "aut",
            comment = c(ORCID = "0000-0002-9908-3007")),

@@ -37,6 +37,7 @@ export(map_ids_to_fits)
 export(merge_data)
 export(normalize_SE)
 export(prepare_input)
+export(process_perturbations)
 export(remove_drug_batch)
 export(replace_conc_with_standardized_conc)
 export(runDrugResponseProcessingPipeline)

@@ -1,3 +1,6 @@
+## gDRcore 1.3.11 - 2024-08-21
+* identify additional perturbations hidden in the secondary drug
+
 ## gDRcore 1.3.10 - 2024-08-19
 * utilize `calc_sd` function
 

@@ -44,25 +44,30 @@ identify_data_type <- function(df,
                                codilution_conc = 2,
                                matrix_conc = 1) {
 
-  # find the pairs of drugs with relevant metadata
-  drug_ids <- unlist(gDRutils::get_env_identifiers(
-    c("drug_name", "drug_name2", "drug_name3"), 
-    simplify = FALSE
-  )) 
-  drugs_ids <- drug_ids[which(drug_ids %in% names(df))]
-
-  conc_ids <- unlist(gDRutils::get_env_identifiers(
-    c("concentration", "concentration2", "concentration3"), 
-    simplify = FALSE
-  ))
-  conc_ids <- conc_ids[which(conc_ids %in% names(df))]
+  # Get drug and concentration identifiers
+  drug_ids <- get_relevant_ids(c("drug_name", "drug_name2", "drug_name3"), df)
+  conc_ids <- get_relevant_ids(c("concentration", "concentration2", "concentration3"), df)
 
-  drugs_cotrt_ids <- drugs_ids[!names(drugs_ids) %in% "drug_name"]
-  conc_cotrt_ids <- conc_ids[!names(conc_ids) %in% "concentration"]
+  # Filter out primary drug and concentration identifiers
+  drugs_cotrt_ids <- setdiff(drug_ids, "drug_name")
+  conc_cotrt_ids <- setdiff(conc_ids, "concentration")
 
+  # Get untreated tag and cell line identifiers
   untreated_tag <- c(gDRutils::get_env_identifiers("untreated_tag"), NA)
   cell <- gDRutils::get_env_identifiers("cellline_name")
-  cols_pairs <- intersect(names(df),  c(drug_ids, cell, conc_ids))
+
+  # Process perturbations
+  dt <- process_perturbations(df, drugs_cotrt_ids, conc_cotrt_ids, untreated_tag)
+
+  # Double-check the columns after processing perturbations
+  drug_ids <- get_relevant_ids(c("drug_name", "drug_name2", "drug_name3"), df)
+  conc_ids <- get_relevant_ids(c("concentration", "concentration2", "concentration3"), df)
+
+  drugs_cotrt_ids <- setdiff(drug_ids, "drug_name")
+  conc_cotrt_ids <- setdiff(conc_ids, "concentration")
+
+
+  cols_pairs <- intersect(names(df), c(drug_ids, cell, conc_ids))
   drug_pairs <- unique(df[, cols_pairs, with = FALSE])
 
   df[, record_id := .I]
@@ -252,6 +257,18 @@ split_raw_data <- function(df,
   }, df_list)
 }
 
+#' @keywords internal
+#' Function to get relevant identifiers from the environment
+#' @param identifiers A character vector of identifier names to fetch from the environment
+#' @param df A data frame containing the columns to be checked against the identifiers
+#' @return A character vector of relevant identifiers that are present in the data frame
+#' @keywords internal
+get_relevant_ids <- function(identifiers, df) {
+  ids <- unlist(gDRutils::get_env_identifiers(identifiers, simplify = FALSE))
+  ids[ids %in% names(df)]
+}
+
+
 #' @keywords internal
 #'
 #' ensure that the pair of drugs are matching such that there are not data with
@@ -324,3 +341,82 @@ collapse_drugs <- function(df) {
   }
   df  
 }
+
+
+
+
+#' Cleanup additional perturbations in the data.table
+#' 
+#' This function processes drug and concentration columns in a data.table.
+#' It checks if there is only one unique drug (excluding a specified untreated tag)
+#' and if there are exactly two doses (one of which is 0). If these conditions are met,
+#' it creates a new column named after the drug and fills it with the doses,
+#' then removes the original drug and concentration columns.
+#'
+#' @param dt A data.table containing the data.
+#' @param drugs_cotrt_ids A vector of column names related to drugs.
+#' @param conc_cotrt_ids A vector of column names related to concentrations.
+#' @param untreated_tag A string representing the untreated tag (default is "vehicle").
+#' @return A modified data.table with new columns for the drugs and removed original drug and concentration columns.
+#' @examples
+#' library(data.table)
+#' dt <- data.table(
+#'   drug1 = c("vehicle", "drugA", "drugA"),
+#'   conc1 = c(0, 10, 0),
+#'   drug2 = c("vehicle", "drugB", "drugB"),
+#'   conc2 = c(0, 20, 0)
+#' )
+#' drugs_cotrt_ids <- c("drug1", "drug2")
+#' conc_cotrt_ids <- c("conc1", "conc2")
+#' dt <- process_drug_doses(dt, drugs_cotrt_ids, conc_cotrt_ids)
+#' print(dt)
+#' @export
+process_perturbations <- function(dt,
+                                  drugs_cotrt_ids,
+                                  conc_cotrt_ids,
+                                  untreated_tag = "vehicle") {
+
+  # Assertions
+  checkmate::assert_data_table(dt)
+  checkmate::assert_character(drugs_cotrt_ids, any.missing = FALSE)
+  checkmate::assert_character(conc_cotrt_ids, any.missing = FALSE)
+  checkmate::assert_true(length(drugs_cotrt_ids) == length(conc_cotrt_ids))
+
+
+  # If lengths of drugs_cotrt_ids and conc_cotrt_ids are 0, return dt unchanged
+  if (length(drugs_cotrt_ids) == 0 && length(conc_cotrt_ids) == 0) {
+    return(dt)
+  } else {
+
+    # Iterate through each pair of columns in drugs_cotrt_ids and conc_cotrt_ids
+    for (i in seq_along(drugs_cotrt_ids)) {
+      drug_col <- drugs_cotrt_ids[i]
+      conc_col <- conc_cotrt_ids[i]
+
+      # Check if there is only one drug in the current drug column (excluding untreated_tag)
+      unique_drugs <- unique(dt[[drug_col]])
+      unique_drugs <- unique_drugs[!unique_drugs %in% untreated_tag]
+
+      if (length(unique_drugs) == 1) {
+        # Check if there are only two doses in the current concentration column (0 and another value)
+        unique_doses <- unique(dt[[conc_col]])
+
+        if (length(unique_doses) == 2 && 0 %in% unique_doses) {
+          # Create a new column named after the drug (excluding untreated_tag) and fill it with the doses
+          new_column_name <- unique_drugs[1]
+          dt[, (new_column_name) := dt[[conc_col]]]
+
+          drug_order <- gsub(".*_(\\d+)$|.*", "\\1", drug_col)
+          drug_cols <- 
+            get_relevant_ids(paste0(
+              c("drug", "drug_name", "drug_moa", "concentration"), drug_order),
+              dt)
+
+          # Remove the current drug and concentration columns
+          dt[, (drug_cols, drug_col, conc_col) := NULL]
+        }
+      }
+    }
+    return(dt) 
+  }
+}
@@ -67,4 +67,72 @@ test_that("collapse drugs works as expected", {
   expect_equal(dt_collapsed[[cols$drug]], "drug2")
   expect_equal(dt_collapsed[[cols$drug2]], "drug3")
   expect_equal(dt_collapsed[[cols$drug3]], "vehicle")
-})
+})
+
+
+test_that("process_perturbations works as expected", {
+  dt <- data.table::data.table(
+    drug1 = c("vehicle", "drugA", "drugA"),
+    conc1 = c(0, 10, 0),
+    drug2 = c("vehicle", "drugB", "drugB"),
+    conc2 = c(0, 20, 0)
+  )
+
+  drugs_cotrt_ids <- c("drug1", "drug2")
+  conc_cotrt_ids <- c("conc1", "conc2")
+
+  result <- process_perturbations(dt, drugs_cotrt_ids, conc_cotrt_ids)
+
+  expected <- data.table::data.table(
+    drugA = c(0, 10, 0),
+    drugB = c(0, 20, 0)
+  )
+
+  expect_equal(result, expected)
+
+
+  dt2 <- data.table::data.table(
+    drug1 = c("vehicle", "drugA", "drugA"),
+    conc1 = c(0, 10, 0),
+    drug2 = c("vehicle", "drugB", "drugB"),
+    conc2 = c(0, 20, 0),
+    drug3 = c("vehicle", "drugC", "drugC"),
+    conc3 = c(0, 30, 0)
+  )
+
+  drugs_cotrt_ids <- c("drug1", "drug2", "drug3")
+  conc_cotrt_ids <- c("conc1", "conc2", "conc3")
+
+  result <- process_perturbations(dt2, drugs_cotrt_ids, conc_cotrt_ids)
+
+  expected <- data.table::data.table(
+    drugA = c(0, 10, 0),
+    drugB = c(0, 20, 0),
+    drugC = c(0, 30, 0)
+  )
+  expect_equal(result, expected)
+
+
+  dt3 <- data.table::data.table(
+    drug1 = c("vehicle", "drugA", "drugB"),
+    conc1 = c(0, 10, 2),
+    drug2 = c("vehicle", "drugB", "drugB"),
+    conc2 = c(0, 20, 0),
+    drug3 = c("vehicle", "drugC", "drugC"),
+    conc3 = c(0, 30, 0)
+  )
+
+  drugs_cotrt_ids <- c("drug1", "drug2", "drug3")
+  conc_cotrt_ids <- c("conc1", "conc2", "conc3")
+
+  result <- process_perturbations(dt3, drugs_cotrt_ids, conc_cotrt_ids)
+
+  expected <- data.table::data.table(
+    drug1 = c("vehicle", "drugA", "drugB"),
+    conc1 = c(0, 10, 2),
+    drugB = c(0, 20, 0),
+    drugC = c(0, 30, 0)
+  )
+  expect_equal(result, expected)
+})
+