Merge pull request #21 from pawelqs/develop

Develop
pawelqs · Jul 13, 2023 · 17e258f · 17e258f
2 parents 12f6864 + bb2db46
commit 17e258f
Show file tree

Hide file tree

Showing 92 changed files with 3,493 additions and 3,065 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: cevomod
 Title: Cancer Evolution Models
-Version: 1.1.0
+Version: 2.0.0
 Authors@R: 
     person("Paweł", "Kuś", , "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0002-4367-9821"))
@@ -9,7 +9,7 @@ Description: Cancer Evolutionary Models. Set of methods facilitating the analysi
 License: GPL (>= 3)
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.2
+RoxygenNote: 7.2.3
 Suggests: 
     BMix,
     circlize,
@@ -61,4 +61,4 @@ Remotes:
     caravagnalab/mobster,
     caravagnalab/BMix
 LazyData: true
-URL: https://pawelqs.github.io/cevomod/
+URL: https://pawelqs.github.io/cevomod/, https://github.com/pawelqs/cevomod
diff --git a/NAMESPACE b/NAMESPACE
@@ -14,8 +14,6 @@ S3method(calc_SFS,cevo_snvs)
 S3method(calc_SFS,cevodata)
 S3method(calc_cumulative_tails,cevo_snvs)
 S3method(calc_cumulative_tails,cevodata)
-S3method(calc_mutation_frequencies,cevodata)
-S3method(cut_f_intervals,cevo_snvs)
 S3method(default_CNVs,cevodata)
 S3method(default_SNVs,cevodata)
 S3method(estimate_ITH,cevodata)
@@ -26,8 +24,8 @@ S3method(fit_mobster,cevodata)
 S3method(fit_powerlaw_tail_fixed,cevodata)
 S3method(fit_powerlaw_tail_optim,cevodata)
 S3method(get_CNVs_var_names,cevodata)
-S3method(get_Mf_1f,cevodata)
-S3method(get_SFS,cevodata)
+S3method(get_frequency_measure_name,cevo_snvs)
+S3method(get_frequency_measure_name,cevodata)
 S3method(get_model_names,cevodata)
 S3method(get_models,cevodata)
 S3method(get_mutation_rates,cevodata)
@@ -57,9 +55,6 @@ S3method(plot_mutations,cevodata)
 S3method(plot_mutations,tbl_df)
 S3method(plot_non_neutral_mutations_2D,cevodata)
 S3method(plot_private_shared_mutations,cevodata)
-S3method(plot_residuals_full_model,cevodata)
-S3method(plot_residuals_powerlaw_model,cevodata)
-S3method(plot_sampling_rate,cevodata)
 S3method(plot_sequencing_depth,cevo_snvs)
 S3method(plot_sequencing_depth,cevodata)
 S3method(print,cevo_colors)
@@ -77,12 +72,14 @@ export("default_CNVs<-")
 export("default_SNVs<-")
 export(CNVs)
 export(SNVs)
+export(SNVs_CNVs)
 export(active_models)
 export(add_CNV_data)
 export(add_SNV_data)
 export(add_patient_data)
 export(add_sample_data)
 export(annotate_mutation_contexts)
+export(annotate_normal_cn)
 export(as_cevo_snvs)
 export(calc_Mf_1f)
 export(calc_SFS)
@@ -97,6 +94,7 @@ export(count_neutral_tail_mutations)
 export(cut_f_intervals)
 export(default_CNVs)
 export(default_SNVs)
+export(dentro_2015_correction)
 export(estimate_ITH)
 export(evaluate_MC_runs)
 export(fill_na)
@@ -115,6 +113,8 @@ export(get_CNVs_var_names)
 export(get_Mf_1f)
 export(get_SFS)
 export(get_SNVs_wider)
+export(get_cevomod_verbosity)
+export(get_frequency_measure_name)
 export(get_model_names)
 export(get_models)
 export(get_mutation_rates)
@@ -129,6 +129,7 @@ export(hide_legend)
 export(identify_non_neutral_tail_mutations)
 export(impact_filter)
 export(init_cevodata)
+export(intervalize_mutation_frequencies)
 export(layer_lm_fits)
 export(layer_mutations)
 export(list_continuous_palettes)
@@ -162,9 +163,9 @@ export(scale_color_pnw)
 export(scale_fill_cevomod)
 export(scale_fill_pnw)
 export(set_cancer_type)
+export(set_cevomod_verbosity)
 export(shuffle)
 export(split_by)
-export(stat_SFS)
 export(stat_cumulative_tail)
 export(theme_ellie)
 export(to_clip)

diff --git a/NEWS.md b/NEWS.md
@@ -1,16 +1,17 @@
 
-# cevomod 1.x.x
+## cevomod 2.0.0
+* cevomod functions can no utilize VAF or CCF (Cancer Cell Fraction) as a measure
+  of mutation frequency. CCF is calculated using the formula introduced in [Dentro et al. *Principles of Reconstructing the Subclonal Architecture of Cancers* (2015)](https://doi.org/10.1101/cshperspect.a026625)
 
-**cevomod 1.1.0**
 
+## cevomod 1.1.0
 * cevodata export to [CliP](https://github.com/wwylab/CliP) implemented
 
-**cevomod 1.0.0**
 
+## cevomod 1.0.0
 * cevodata class implementation
 * fitting the power-law tails with exponent equal to 2 using $M(f) \sim 1/f$ statistic
 * fitting the power-law tails with optimized exponent
 * fitting subclones using mclust
 * fitting subclones using BMix
 * calculation of the evolutionary parameters using the [Williams et al. (2018)](https://doi.org/10.1038/s41588-018-0128-6) equations and the [MOBSTER code](https://github.com/caravagnalab/mobster/blob/master/R/evodynamics.R) [(Caravagna et al. (2020))](https://doi.org/10.1038/s41588-020-0675-5)
-
diff --git a/R/cevo_cnvs.R b/R/cevo_cnvs.R
@@ -0,0 +1,24 @@
+
+#' Annotate chromosome ploidies in CNV data
+#'
+#' Adds the normal_cn column to the data. This column is required for e.g.
+#' by Dentro CCF calculation method. Requires 'sex' column in the metadata.
+#' Males should be encoded by 'M' or "male'.
+#'
+#' @param object <cevodata> object
+#' @param which_cnvs Name of the CNVs slot
+#'
+#' @return <cevodata> object
+#' @export
+annotate_normal_cn <- function(object, which_cnvs = default_CNVs(object)) {
+  msg("Assuming human genome")
+  cnvs <- CNVs(object, which_cnvs) |>
+    left_join(get_patient_sex(object), by = "sample_id") |>
+    mutate(
+      normal_cn = if_else(
+        .data$sex %in% c("male", "M") & .data$chrom %in% c("chrX", "chrY"), 1, 2
+      )
+    )
+  object |>
+    add_CNV_data(cnvs, name = which_cnvs)
+}
diff --git a/R/cevo_snvs.R b/R/cevo_snvs.R
@@ -30,7 +30,7 @@ as_cevo_snvs <- function(snvs) {
   snvs |>
     select(
       "sample_id", "chrom", "pos", "gene_symbol",
-      "ref", "alt", "ref_reads", "alt_reads", "VAF", "impact",
+      "ref", "alt", "ref_reads", "alt_reads", "impact", "VAF",
       everything()
     ) |>
     new_cevo_snvs()

diff --git a/R/cevodata-export.R b/R/cevodata-export.R
@@ -15,13 +15,16 @@
 #' @param cnvs_name name of the cnvs to use
 #' @param purity_column name of the metadata column with the purity estimates
 #'   to be used
+#' @param keep_chromosomes list of non-sex chromosomes. CliP does not use sex
+#'   chromosomes
 #' @export
 to_clip <- function(cd, out_dir = NULL,
                     snvs_name = default_SNVs(cd),
                     cnvs_name = default_CNVs(cd),
-                    purity_column = "purity") {
-  snvs <- get_clip_snvs(cd, snvs_name)
-  cnvs <- get_clip_cnvs(cd, cnvs_name)
+                    purity_column = "purity",
+                    keep_chromosomes = str_c("chr", 1:22)) {
+  snvs <- get_clip_snvs(cd, snvs_name, keep_chromosomes)
+  cnvs <- get_clip_cnvs(cd, cnvs_name, keep_chromosomes)
   purities <- get_clip_purities(cd, purity_column)
 
   clip_data <- lst(snvs, cnvs, purities) |>
@@ -36,7 +39,9 @@ to_clip <- function(cd, out_dir = NULL,
 
 
 
-get_clip_snvs <- function(cd, snvs_name = default_SNVs(cd)) {
+get_clip_snvs <- function(cd,
+                          snvs_name = default_SNVs(cd),
+                          keep_chromosomes = str_c("chr", 1:22)) {
   empty_clip_snvs <- tibble(
     chromosome_index = double(),
     position = integer(),
@@ -45,6 +50,7 @@ get_clip_snvs <- function(cd, snvs_name = default_SNVs(cd)) {
   )
 
   SNVs(cd, which = snvs_name) |>
+    filter(.data$chrom %in% keep_chromosomes) |>
     transmute(
       sample_id = parse_factor(.data$sample_id, levels = get_sample_ids(cd)),
       chromosome_index = chromosomes_to_int(.data$chrom),
@@ -59,7 +65,9 @@ get_clip_snvs <- function(cd, snvs_name = default_SNVs(cd)) {
 
 
 
-get_clip_cnvs <- function(cd, cnvs_name = default_CNVs(cd)) {
+get_clip_cnvs <- function(cd,
+                          cnvs_name = default_CNVs(cd),
+                          keep_chromosomes = str_c("chr", 1:22)) {
   empty_clip_cnvs <- tibble(
     chromosome_index = double(),
     start_position = double(),
@@ -70,6 +78,7 @@ get_clip_cnvs <- function(cd, cnvs_name = default_CNVs(cd)) {
   )
 
   CNVs(cd, which = cnvs_name) |>
+    filter(.data$chrom %in% keep_chromosomes) |>
     transmute(
       sample_id = parse_factor(.data$sample_id, levels = get_sample_ids(cd)),
       chromosome_index = chromosomes_to_int(.data$chrom),
@@ -79,6 +88,7 @@ get_clip_cnvs <- function(cd, cnvs_name = default_CNVs(cd)) {
       minor_cn = .data$minor_cn,
       total_cn = .data$total_cn
     ) |>
+    filter(.data$total_cn > 0) |>   # these records break CliP
     nest_by(.data$sample_id) |>
     complete(.data$sample_id, fill = list(data = list(empty_clip_cnvs))) |>
     deframe()
@@ -94,11 +104,23 @@ get_clip_purities <- function(cd, purity_column = "purity") {
 
 
 
+chromosomes_to_int <- function(chrom) {
+  case_when(
+    chrom %in% str_c("chr", 1:22) ~ str_replace(chrom, "chr", "") |> as.integer(),
+    chrom == "chrX" ~ 23,
+    chrom == "chrY" ~ 24,
+    chrom == "chrMT" ~ 25,
+    TRUE ~ NA_integer_
+  )
+}
+
+
+
 save_clip_files <- function(clip_data, out_dir) {
   if (!dir.exists(out_dir)) {
     dir.create(out_dir)
   }
-  imap(
+  iwalk(
     clip_data,
     function(x, sample_id) {
       write_tsv(x$snvs, file.path(out_dir, str_c(sample_id, ".snv.tsv")))

diff --git a/R/cevodata-getters.R b/R/cevodata-getters.R
@@ -133,6 +133,24 @@ get_SNVs_wider_intervals <- function(object, fill_na = NULL, bins = NULL) {
 }
 
 
+#' Get SNVs with merged CNVs
+#' @param object cevodata object with SNVs and CNVs
+#' @export
+SNVs_CNVs <- function(object) {
+  SNVs(object) |>
+    join_CNVs(CNVs(object))
+}
+
+
+join_CNVs <- function(snvs, cnvs) {
+  left_join(
+    snvs, cnvs,
+    by = join_by("sample_id", "chrom", "pos" >= "start", "pos" <= "end"),
+    relationship = "many-to-one"
+  )
+}
+
+
 ## ------------------------------- Models ------------------------------------
 
 
@@ -228,7 +246,7 @@ fix_powerlaw_N_mutations <- function(models, cd, models_name) {
 }
 
 
-## ---------------------------------- Other -----------------------------------
+## ---------------------------------- CNVs -----------------------------------
 
 #' @rdname assays
 #' @export
@@ -265,6 +283,14 @@ get_CNVs_var_names.cevodata <- function(object, which = default_CNVs(object), ..
 }
 
 
+## ---------------------------------- Other -----------------------------------
+
+get_purities <- function(cd) {
+  cd$metadata |>
+    select("sample_id", "purity")
+}
+
+
 get_patients_data <- function(metadata) {
   patient_data_cols <- metadata |>
     group_by(.data$patient_id) |>
@@ -280,3 +306,9 @@ get_patients_data <- function(metadata) {
 get_sample_ids <- function(cd) {
   cd$metadata$sample_id
 }
+
+
+get_patient_sex <- function(cd) {
+  cd$metadata |>
+    select("sample_id", "sex")
+}
diff --git a/R/cevomod-global.R b/R/cevomod-global.R
@@ -0,0 +1,24 @@
+
+cevomod_global <- new.env(parent = emptyenv())
+cevomod_global$verbosity_level <- 1
+
+
+#' Get the verbosity level
+#' @export
+get_cevomod_verbosity <- function() {
+  cevomod_global$verbosity_level
+}
+
+
+#' Change the verbosity level
+#' @param verbosity_level Verbosity level to use:
+#'   0 - silent
+#'   1 - normal
+#'   2 - detailed (in some cases)
+#' @export
+set_cevomod_verbosity <- function(verbosity_level = 1) {
+  old <- cevomod_global$verbosity_level
+  cevomod_global$verbosity_level <- verbosity_level
+  invisible(old)
+}
+
diff --git a/R/evolutionary_parameters.R b/R/evolutionary_parameters.R
@@ -37,7 +37,7 @@ get_mutation_rates.cevodata <- function(object, models_name = "powerlaw_fixed",
   # residuals <- get_residuals(object)
   # bin_widths <- residuals |>
   #   group_by(sample_id) |>
-  #   summarise(bin_width = get_interval_width(VAF_interval))
+  #   summarise(bin_width = get_interval_width(f_interval))
   #
   # min_VAF <- 0.2
   # max_VAF <- 0.8