From 1b7047ab43c4b5ec00632edaffc0ce7ce938babe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82?= Date: Wed, 19 Jul 2023 12:47:20 +0200 Subject: [PATCH 1/6] use_purity() implemented --- NAMESPACE | 1 + R/cevodata-construction.R | 27 +++++++++++++++++++++++++++ man/use_purity.Rd | 20 ++++++++++++++++++++ 3 files changed, 48 insertions(+) create mode 100644 man/use_purity.Rd diff --git a/NAMESPACE b/NAMESPACE index 4d5fe1b..3228e74 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -169,6 +169,7 @@ export(split_by) export(stat_cumulative_tail) export(theme_ellie) export(to_clip) +export(use_purity) export(variant_classification_filter) import(dplyr) import(forcats) diff --git a/R/cevodata-construction.R b/R/cevodata-construction.R index bb4136e..63f1c24 100644 --- a/R/cevodata-construction.R +++ b/R/cevodata-construction.R @@ -257,6 +257,33 @@ add_sample_data.cevodata <- function(object, data, ...) { } +#' Choose purity measure +#' +#' metadata can contain purity measures in columns other than 'purity'. +#' T his function can be used to set 'purity' values using values from requested +#' column +#' +#' @param cd object +#' @param name Name of the metadata column with chosen purity values +#' @param verbose Verbose? +#' @export +use_purity <- function(cd, name, verbose = get_cevomod_verbosity()) { + if (name %not in% names(cd$metadata)) { + stop( + "`name` should be a name of the column in the metadata tibble, ", + "which should be used as purity measure" + ) + } else { + msg("Using '", name, "' as default purity measure", verbose = verbose) + if (!is.null(cd$metadata[["purity"]])) { + cd$metadata$prev_purity <- cd$metadata$purity + } + cd$metadata$purity <- cd$metadata[[name]] + cd + } +} + + is_cevodata_singlepatient <- function(object) { n_patients <- count_patients(object) if (is.na(n_patients)) { diff --git a/man/use_purity.Rd b/man/use_purity.Rd new file mode 100644 index 0000000..90c13e3 --- /dev/null +++ b/man/use_purity.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cevodata-construction.R +\name{use_purity} +\alias{use_purity} +\title{Choose purity measure} +\usage{ +use_purity(cd, name, verbose = get_cevomod_verbosity()) +} +\arguments{ +\item{cd}{\if{html}{\out{}} object} + +\item{name}{Name of the metadata column with chosen purity values} + +\item{verbose}{Verbose?} +} +\description{ +\if{html}{\out{}} metadata can contain purity measures in columns other than 'purity'. +T his function can be used to set 'purity' values using values from requested +column +} From a4119de4aee75777906d4accffc191b2729c51f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82?= Date: Wed, 19 Jul 2023 12:48:14 +0200 Subject: [PATCH 2/6] add_to_cevodata.cevo_ASCAT() implemented --- DESCRIPTION | 3 +- NAMESPACE | 3 + R/cevodata-readthis_integration.R | 64 +++++++++++++++++++ man/readthis-integration.Rd | 54 ++++++++++++++++ .../test-cevodata-readthis_integration.R | 12 ++++ 5 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 R/cevodata-readthis_integration.R create mode 100644 man/readthis-integration.Rd create mode 100644 tests/testthat/test-cevodata-readthis_integration.R diff --git a/DESCRIPTION b/DESCRIPTION index 347c431..f4b366d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,7 +29,8 @@ Suggests: shinyWidgets, testthat (>= 3.0.0), tidyverse, - vdiffr + vdiffr, + readthis Config/testthat/edition: 3 VignetteBuilder: knitr Imports: diff --git a/NAMESPACE b/NAMESPACE index 3228e74..ae6f115 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ S3method(add_CNV_data,cevodata) S3method(add_SNV_data,cevodata) S3method(add_patient_data,cevodata) S3method(add_sample_data,cevodata) +S3method(add_to_cevodata,cevo_ASCAT) S3method(calc_Mf_1f,cevo_snvs) S3method(calc_Mf_1f,cevodata) S3method(calc_SFS,cevo_snvs) @@ -76,8 +77,10 @@ export(SNVs_CNVs) export(active_models) export(add_CNV_data) export(add_SNV_data) +export(add_data) export(add_patient_data) export(add_sample_data) +export(add_to_cevodata) export(annotate_mutation_contexts) export(annotate_normal_cn) export(as_cevo_snvs) diff --git a/R/cevodata-readthis_integration.R b/R/cevodata-readthis_integration.R new file mode 100644 index 0000000..25344f9 --- /dev/null +++ b/R/cevodata-readthis_integration.R @@ -0,0 +1,64 @@ + +#' readthis integration +#' +#' @description +#' [readthis](https://github.com/pawelqs/readthis) package may be used to easily +#' read the data from some popular mutation callers into R environment. readthis +#' functions can be supplied not only with the single file paths, but also with +#' lists of files or even paths to the directories with files to be loaded (and +#' cevodata object is to store the data from many samples!) +#' +#' readthis functions return tibbles or list of tibbles. These tibbles/ +#' objects usually are instances of *cevo_* S3 classes. cevomod +#' implements methods that allow to add these types of data to the cevodata +#' objects conveniently. +#' +#' @param cd object +#' @param data Object read with readthis functions +#' @param name Name for the data +#' @param verbose Verbose? +#' @param ... Other arguments +#' +#' @examples +#' # library(cevomod) +#' +#' ascat_dir <- system.file("extdata", "ASCAT", package = "readthis") +#' ascat <- readthis::read_ascat_files(ascat_dir) +#' cd <- init_cevodata("Test dataset") |> +#' add_data(ascat) +#' +#' @name readthis-integration +NULL + + + +#' @describeIn readthis-integration add_data() function takes cevodata as the +#' first argument, so it is a preferred method for adding data in R pipelines. +#' @export +add_data <- function(cd, data, ...) { + add_to_cevodata(data, cd) +} + + +#' @describeIn readthis-integration add_to_cevodata() is a generic with a set +#' of methods for different classes of `data`. These methods are called by +#' add_data() function. +#' @export +add_to_cevodata <- function(data, cd, name, ...) { + UseMethod("add_to_cevodata") +} + + +#' @export +add_to_cevodata.cevo_ASCAT <- function(data, cd, + name = "ASCAT", + verbose = get_cevomod_verbosity(), + ...) { + sample_data <- data$sample_statistics |> + mutate(ascat_purity = 1 - .data$normal_contamination) + cd |> + add_CNV_data(data$cnvs, name = name) |> + add_sample_data(sample_data) |> + use_purity("ascat_purity") +} + diff --git a/man/readthis-integration.Rd b/man/readthis-integration.Rd new file mode 100644 index 0000000..c404f2f --- /dev/null +++ b/man/readthis-integration.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cevodata-readthis_integration.R +\name{readthis-integration} +\alias{readthis-integration} +\alias{add_data} +\alias{add_to_cevodata} +\title{readthis integration} +\usage{ +add_data(cd, data, ...) + +add_to_cevodata(data, cd, name, ...) +} +\arguments{ +\item{cd}{\if{html}{\out{}} object} + +\item{data}{Object read with readthis functions} + +\item{...}{Other arguments} + +\item{name}{Name for the data} + +\item{verbose}{Verbose?} +} +\description{ +\href{https://github.com/pawelqs/readthis}{readthis} package may be used to easily +read the data from some popular mutation callers into R environment. readthis +functions can be supplied not only with the single file paths, but also with +lists of files or even paths to the directories with files to be loaded (and +cevodata object is to store the data from many samples!) + +readthis functions return tibbles or list of tibbles. These tibbles/ +objects usually are instances of \emph{cevo_} S3 classes. cevomod +implements methods that allow to add these types of data to the cevodata +objects conveniently. +} +\section{Functions}{ +\itemize{ +\item \code{add_data()}: add_data() function takes cevodata as the +first argument, so it is a preferred method for adding data in R pipelines. + +\item \code{add_to_cevodata()}: add_to_cevodata() is a generic with a set +of methods for different classes of \code{data}. These methods are called by +add_data() function. + +}} +\examples{ +# library(cevomod) + +ascat_dir <- system.file("extdata", "ASCAT", package = "readthis") +ascat <- readthis::read_ascat_files(ascat_dir) +cd <- init_cevodata("Test dataset") |> + add_data(ascat) + +} diff --git a/tests/testthat/test-cevodata-readthis_integration.R b/tests/testthat/test-cevodata-readthis_integration.R new file mode 100644 index 0000000..463196a --- /dev/null +++ b/tests/testthat/test-cevodata-readthis_integration.R @@ -0,0 +1,12 @@ +test_that("adding ASCAT data works", { + ascat_dir <- system.file("extdata", "ASCAT", package = "readthis") + data <- readthis::read_ascat_files(ascat_dir, sample_id_pattern = "(?<=ASCAT\\/)[:alnum:]*(?=\\.)") + cd <- init_cevodata("Test dataset") |> + add_data(data) + expect_s3_class(cd, "cevodata") + expect_s3_class(CNVs(cd), "tbl") + expect_equal(cd$active_CNVs, "ASCAT") + expect_equal(dim(CNVs(cd)), c(20, 8)) + expect_equal(cd$metadata$purity, c(0.99322, 0.99322)) + expect_equal(cd$metadata$purity, cd$metadata$ascat_purity) +}) From cb3d5415e26a44f17da131054890d965ae0ded26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82?= Date: Mon, 24 Jul 2023 15:26:41 +0200 Subject: [PATCH 3/6] readthis integration for FACETS, Mutect and Strelka --- NAMESPACE | 3 + R/cevodata-readthis_integration.R | 58 ++++++++++++++++++- .../test-cevodata-readthis_integration.R | 53 +++++++++++++++++ 3 files changed, 113 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index ae6f115..87bc536 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,9 @@ S3method(add_SNV_data,cevodata) S3method(add_patient_data,cevodata) S3method(add_sample_data,cevodata) S3method(add_to_cevodata,cevo_ASCAT) +S3method(add_to_cevodata,cevo_FACETS) +S3method(add_to_cevodata,cevo_Mutect) +S3method(add_to_cevodata,cevo_Strelka) S3method(calc_Mf_1f,cevo_snvs) S3method(calc_Mf_1f,cevodata) S3method(calc_SFS,cevo_snvs) diff --git a/R/cevodata-readthis_integration.R b/R/cevodata-readthis_integration.R index 25344f9..96cbd79 100644 --- a/R/cevodata-readthis_integration.R +++ b/R/cevodata-readthis_integration.R @@ -59,6 +59,62 @@ add_to_cevodata.cevo_ASCAT <- function(data, cd, cd |> add_CNV_data(data$cnvs, name = name) |> add_sample_data(sample_data) |> - use_purity("ascat_purity") + use_purity("ascat_purity", verbose = verbose) } + +#' @export +add_to_cevodata.cevo_FACETS <- function(data, cd, + name = "FACETS", + verbose = get_cevomod_verbosity(), + ...) { + cnvs <- data |> + select(-"Purity", -"Ploidy") + sample_data <- data |> + select("sample_id", facets_purity = "Purity", facets_ploidy = "Ploidy") |> + unique() + cd |> + add_CNV_data(data, name = name) |> + add_sample_data(sample_data) |> + use_purity("facets_purity", verbose = verbose) +} + + +#' @export +add_to_cevodata.cevo_Mutect <- function(data, cd, name = "Mutect", ...) { + patient_ids_present <- "patient_id" %in% names(data) + + if (patient_ids_present) { + sample_data <- data |> + select("patient_id", "sample_id") |> + unique() + data$patient_id <- NULL + } + + cd <- add_SNV_data(cd, data, name = name) + if (patient_ids_present) { + cd <- add_sample_data(cd, sample_data) + } + + cd +} + + +#' @export +add_to_cevodata.cevo_Strelka <- function(data, cd, name = "Strelka", ...) { + patient_ids_present <- "patient_id" %in% names(data) + + if (patient_ids_present) { + sample_data <- data |> + select("patient_id", "sample_id") |> + unique() + data$patient_id <- NULL + } + + cd <- add_SNV_data(cd, data, name = name) + if (patient_ids_present) { + cd <- add_sample_data(cd, sample_data) + } + + cd +} diff --git a/tests/testthat/test-cevodata-readthis_integration.R b/tests/testthat/test-cevodata-readthis_integration.R index 463196a..99e9a28 100644 --- a/tests/testthat/test-cevodata-readthis_integration.R +++ b/tests/testthat/test-cevodata-readthis_integration.R @@ -10,3 +10,56 @@ test_that("adding ASCAT data works", { expect_equal(cd$metadata$purity, c(0.99322, 0.99322)) expect_equal(cd$metadata$purity, cd$metadata$ascat_purity) }) + + + +test_that("adding FACETS data works", { + facets_dir <- system.file("extdata", "FACETS", package = "readthis") + data <- readthis::read_facets_cnvs(facets_dir) + cd <- init_cevodata("Test dataset") |> + add_data(data) + expect_s3_class(cd, "cevodata") + expect_s3_class(CNVs(cd), "tbl") + expect_equal(cd$active_CNVs, "FACETS") + expect_equal(dim(CNVs(cd)), c(128, 18)) + expect_equal(cd$metadata$purity, c(0.3, 0.3)) + expect_equal(cd$metadata$purity, cd$metadata$facets_purity) +}) + + + +test_that("adding Mutect2 data works", { + path <- system.file("extdata", "Mutect", package = "readthis") + data <- readthis::read_mutect_snvs( + path, + patient_id_pattern = "(?<=Mutect\\/)[:alnum:]*(?=\\.)", + verbose = FALSE + ) + cd <- init_cevodata("Test dataset") |> + add_data(data) + expect_s3_class(cd, "cevodata") + expect_s3_class(SNVs(cd), "tbl") + expect_equal(cd$active_SNVs, "Mutect") + expect_equal(dim(SNVs(cd)), c(16, 14)) + expect_equal(cd$metadata$sample_id, c("S1_L1", "S1_P1", "S2_L1", "S2_P1")) + expect_equal(cd$metadata$patient_id, c("S1", "S1", "S2", "S2")) +}) + + + +test_that("adding Strelka data works", { + path <- system.file("extdata", "Strelka", package = "readthis") + data <- readthis::read_strelka_somatic_snvs( + path, + patient_id_pattern = "(?<=Strelka\\/)[:alnum:]*(?=\\.)", + verbose = FALSE + ) |> + mutate(sample_id = str_c(patient_id, sample_id, sep = "_")) + cd <- init_cevodata("Test dataset") |> + add_data(data) + expect_s3_class(cd, "cevodata") + expect_s3_class(SNVs(cd), "tbl") + expect_equal(cd$active_SNVs, "Strelka") + expect_equal(dim(SNVs(cd)), c(18, 11)) + expect_equal(cd$metadata$sample_id, c("S1_TUMOR", "S2_TUMOR")) +}) From e3f4a06b530d35eec50ee4395a3bae9e34ce93ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82?= Date: Tue, 25 Jul 2023 17:25:57 +0200 Subject: [PATCH 4/6] documentation fixes --- R/cevodata-readthis_integration.R | 12 +++++++++--- man/readthis-integration.Rd | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/R/cevodata-readthis_integration.R b/R/cevodata-readthis_integration.R index 96cbd79..f1bb094 100644 --- a/R/cevodata-readthis_integration.R +++ b/R/cevodata-readthis_integration.R @@ -44,7 +44,7 @@ add_data <- function(cd, data, ...) { #' of methods for different classes of `data`. These methods are called by #' add_data() function. #' @export -add_to_cevodata <- function(data, cd, name, ...) { +add_to_cevodata <- function(data, cd, name, verbose, ...) { UseMethod("add_to_cevodata") } @@ -81,7 +81,10 @@ add_to_cevodata.cevo_FACETS <- function(data, cd, #' @export -add_to_cevodata.cevo_Mutect <- function(data, cd, name = "Mutect", ...) { +add_to_cevodata.cevo_Mutect <- function(data, cd, + name = "Mutect", + verbose = get_cevomod_verbosity(), + ...) { patient_ids_present <- "patient_id" %in% names(data) if (patient_ids_present) { @@ -101,7 +104,10 @@ add_to_cevodata.cevo_Mutect <- function(data, cd, name = "Mutect", ...) { #' @export -add_to_cevodata.cevo_Strelka <- function(data, cd, name = "Strelka", ...) { +add_to_cevodata.cevo_Strelka <- function(data, cd, + name = "Strelka", + verbose = get_cevomod_verbosity(), + ...) { patient_ids_present <- "patient_id" %in% names(data) if (patient_ids_present) { diff --git a/man/readthis-integration.Rd b/man/readthis-integration.Rd index c404f2f..eac6306 100644 --- a/man/readthis-integration.Rd +++ b/man/readthis-integration.Rd @@ -8,7 +8,7 @@ \usage{ add_data(cd, data, ...) -add_to_cevodata(data, cd, name, ...) +add_to_cevodata(data, cd, name, verbose, ...) } \arguments{ \item{cd}{\if{html}{\out{}} object} From 84e5c6ac3382b183928a8e7755b3d9606ed3124e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82?= Date: Tue, 25 Jul 2023 17:26:35 +0200 Subject: [PATCH 5/6] v2.1.0, pkgdown site update --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ README.md | 6 +++--- _pkgdown.yml | 1 + vignettes/get_started.Rmd | 2 ++ 5 files changed, 11 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f4b366d..eecec5b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: cevomod Title: Cancer Evolution Models -Version: 2.0.0 +Version: 2.1.0 Authors@R: person("Paweł", "Kuś", , "kpawel2210@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-4367-9821")) diff --git a/NEWS.md b/NEWS.md index eb0a0eb..cae15e1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,8 @@ +## cevomod 2.1.0 +* cevomod is integrated with a helper [readthis](https://pawelqs.github.io/readthis/index.html) package, designed for bulk reading of variant files from algorithms such as Mutect2, Strelka, ASCAT, or FACETS, in the cevomod-friendly data format. Objects returned by `readthis::read_*()` functions can be added to the cevodata object using a general `add_data()` function. + + ## cevomod 2.0.0 * cevomod functions can no utilize VAF or CCF (Cancer Cell Fraction) as a measure of mutation frequency. CCF is calculated using the formula introduced in [Dentro et al. *Principles of Reconstructing the Subclonal Architecture of Cancers* (2015)](https://doi.org/10.1101/cshperspect.a026625) diff --git a/README.md b/README.md index 2168b82..c1407d6 100644 --- a/README.md +++ b/README.md @@ -26,9 +26,9 @@ devtools::install_github("pawelqs/cevomod") ``` -## Chnages in version 2.0.0 - -Starting with version 2.0.0, cevomod can use either VAF or CCF (Cancer Cell Fraction) as a measure of mutation frequency. CCF is a measure of mutation frequency corrected for tumor purity and copy number alterations. CCF can be calculated prior to mutation frequency intervalization using the `calc_mutation_frequencies()` function and requires information on total copy number in tumor and normal tissue and sample purity (tumor cell content). See the Vignettes for more examples. +## Last changes +* **v2.1.0** - cevomod is integrated with a helper [readthis](https://pawelqs.github.io/readthis/index.html) package, designed for bulk reading of variant files from algorithms such as Mutect2, Strelka, ASCAT, or FACETS, in the cevomod-friendly data format. Objects returned by `readthis::read_*()` functions can be added to the cevodata object using a general `add_data()` function. +* **v2.0.0** - Starting with version 2.0.0, cevomod can use either VAF or CCF (Cancer Cell Fraction) as a measure of mutation frequency. CCF is a measure of mutation frequency corrected for tumor purity and copy number alterations. CCF can be calculated prior to mutation frequency intervalization using the `calc_mutation_frequencies()` function and requires information on total copy number in tumor and normal tissue and sample purity (tumor cell content). See the Vignettes for more examples. To see the previous changes in the package see the [Changelog](https://pawelqs.github.io/cevomod/news/index.html) diff --git a/_pkgdown.yml b/_pkgdown.yml index 441eed3..6bf5b67 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -21,6 +21,7 @@ reference: - starts_with("add_") - starts_with("default_") - set_cancer_type + - use_purity - title: cevodata transformations - contents: - filter.cevodata diff --git a/vignettes/get_started.Rmd b/vignettes/get_started.Rmd index 89ef295..2ed9d17 100644 --- a/vignettes/get_started.Rmd +++ b/vignettes/get_started.Rmd @@ -58,6 +58,8 @@ cd `name` can be any string that is informative for the user. +*To facilitate the use of cevomod with the data from popular variant callers such as Mutect2, Strelka2, ASCAT, or FACETS, we have implemented a * [readthis](https://pawelqs.github.io/readthis/index.html) *package. readthis functions are designed for bulk reading of many output variant files (they accept a path to a single file, named vector of file paths, or a path to a directory containing many files). Data objects read with readthis functions can be added to the cevodata object with a single call of general* `add_data()` *function. For more information see the* [readthis page](https://pawelqs.github.io/readthis/index.html). + ## Variant Frequency Spectra From 107e5c07842adbd1b5d21d4f998118106c4a5c96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82?= Date: Tue, 25 Jul 2023 17:30:09 +0200 Subject: [PATCH 6/6] pawelqs/readthis added to DESCRIPTION/Remotes --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index eecec5b..afbe08c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -60,6 +60,7 @@ Depends: R (>= 2.10) Remotes: caravagnalab/mobster, - caravagnalab/BMix + caravagnalab/BMix, + pawelqs/readthis LazyData: true URL: https://pawelqs.github.io/cevomod/, https://github.com/pawelqs/cevomod