diff --git a/DESCRIPTION b/DESCRIPTION index 347c431..afbe08c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: cevomod Title: Cancer Evolution Models -Version: 2.0.0 +Version: 2.1.0 Authors@R: person("Paweł", "Kuś", , "kpawel2210@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-4367-9821")) @@ -29,7 +29,8 @@ Suggests: shinyWidgets, testthat (>= 3.0.0), tidyverse, - vdiffr + vdiffr, + readthis Config/testthat/edition: 3 VignetteBuilder: knitr Imports: @@ -59,6 +60,7 @@ Depends: R (>= 2.10) Remotes: caravagnalab/mobster, - caravagnalab/BMix + caravagnalab/BMix, + pawelqs/readthis LazyData: true URL: https://pawelqs.github.io/cevomod/, https://github.com/pawelqs/cevomod diff --git a/NAMESPACE b/NAMESPACE index 4d5fe1b..87bc536 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,10 @@ S3method(add_CNV_data,cevodata) S3method(add_SNV_data,cevodata) S3method(add_patient_data,cevodata) S3method(add_sample_data,cevodata) +S3method(add_to_cevodata,cevo_ASCAT) +S3method(add_to_cevodata,cevo_FACETS) +S3method(add_to_cevodata,cevo_Mutect) +S3method(add_to_cevodata,cevo_Strelka) S3method(calc_Mf_1f,cevo_snvs) S3method(calc_Mf_1f,cevodata) S3method(calc_SFS,cevo_snvs) @@ -76,8 +80,10 @@ export(SNVs_CNVs) export(active_models) export(add_CNV_data) export(add_SNV_data) +export(add_data) export(add_patient_data) export(add_sample_data) +export(add_to_cevodata) export(annotate_mutation_contexts) export(annotate_normal_cn) export(as_cevo_snvs) @@ -169,6 +175,7 @@ export(split_by) export(stat_cumulative_tail) export(theme_ellie) export(to_clip) +export(use_purity) export(variant_classification_filter) import(dplyr) import(forcats) diff --git a/NEWS.md b/NEWS.md index eb0a0eb..cae15e1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,8 @@ +## cevomod 2.1.0 +* cevomod is integrated with a helper [readthis](https://pawelqs.github.io/readthis/index.html) package, designed for bulk reading of variant files from algorithms such as Mutect2, Strelka, ASCAT, or FACETS, in the cevomod-friendly data format. Objects returned by `readthis::read_*()` functions can be added to the cevodata object using a general `add_data()` function. + + ## cevomod 2.0.0 * cevomod functions can no utilize VAF or CCF (Cancer Cell Fraction) as a measure of mutation frequency. CCF is calculated using the formula introduced in [Dentro et al. *Principles of Reconstructing the Subclonal Architecture of Cancers* (2015)](https://doi.org/10.1101/cshperspect.a026625) diff --git a/R/cevodata-construction.R b/R/cevodata-construction.R index bb4136e..63f1c24 100644 --- a/R/cevodata-construction.R +++ b/R/cevodata-construction.R @@ -257,6 +257,33 @@ add_sample_data.cevodata <- function(object, data, ...) { } +#' Choose purity measure +#' +#' metadata can contain purity measures in columns other than 'purity'. +#' T his function can be used to set 'purity' values using values from requested +#' column +#' +#' @param cd object +#' @param name Name of the metadata column with chosen purity values +#' @param verbose Verbose? +#' @export +use_purity <- function(cd, name, verbose = get_cevomod_verbosity()) { + if (name %not in% names(cd$metadata)) { + stop( + "`name` should be a name of the column in the metadata tibble, ", + "which should be used as purity measure" + ) + } else { + msg("Using '", name, "' as default purity measure", verbose = verbose) + if (!is.null(cd$metadata[["purity"]])) { + cd$metadata$prev_purity <- cd$metadata$purity + } + cd$metadata$purity <- cd$metadata[[name]] + cd + } +} + + is_cevodata_singlepatient <- function(object) { n_patients <- count_patients(object) if (is.na(n_patients)) { diff --git a/R/cevodata-readthis_integration.R b/R/cevodata-readthis_integration.R new file mode 100644 index 0000000..f1bb094 --- /dev/null +++ b/R/cevodata-readthis_integration.R @@ -0,0 +1,126 @@ + +#' readthis integration +#' +#' @description +#' [readthis](https://github.com/pawelqs/readthis) package may be used to easily +#' read the data from some popular mutation callers into R environment. readthis +#' functions can be supplied not only with the single file paths, but also with +#' lists of files or even paths to the directories with files to be loaded (and +#' cevodata object is to store the data from many samples!) +#' +#' readthis functions return tibbles or list of tibbles. These tibbles/ +#' objects usually are instances of *cevo_* S3 classes. cevomod +#' implements methods that allow to add these types of data to the cevodata +#' objects conveniently. +#' +#' @param cd object +#' @param data Object read with readthis functions +#' @param name Name for the data +#' @param verbose Verbose? +#' @param ... Other arguments +#' +#' @examples +#' # library(cevomod) +#' +#' ascat_dir <- system.file("extdata", "ASCAT", package = "readthis") +#' ascat <- readthis::read_ascat_files(ascat_dir) +#' cd <- init_cevodata("Test dataset") |> +#' add_data(ascat) +#' +#' @name readthis-integration +NULL + + + +#' @describeIn readthis-integration add_data() function takes cevodata as the +#' first argument, so it is a preferred method for adding data in R pipelines. +#' @export +add_data <- function(cd, data, ...) { + add_to_cevodata(data, cd) +} + + +#' @describeIn readthis-integration add_to_cevodata() is a generic with a set +#' of methods for different classes of `data`. These methods are called by +#' add_data() function. +#' @export +add_to_cevodata <- function(data, cd, name, verbose, ...) { + UseMethod("add_to_cevodata") +} + + +#' @export +add_to_cevodata.cevo_ASCAT <- function(data, cd, + name = "ASCAT", + verbose = get_cevomod_verbosity(), + ...) { + sample_data <- data$sample_statistics |> + mutate(ascat_purity = 1 - .data$normal_contamination) + cd |> + add_CNV_data(data$cnvs, name = name) |> + add_sample_data(sample_data) |> + use_purity("ascat_purity", verbose = verbose) +} + + +#' @export +add_to_cevodata.cevo_FACETS <- function(data, cd, + name = "FACETS", + verbose = get_cevomod_verbosity(), + ...) { + cnvs <- data |> + select(-"Purity", -"Ploidy") + sample_data <- data |> + select("sample_id", facets_purity = "Purity", facets_ploidy = "Ploidy") |> + unique() + cd |> + add_CNV_data(data, name = name) |> + add_sample_data(sample_data) |> + use_purity("facets_purity", verbose = verbose) +} + + +#' @export +add_to_cevodata.cevo_Mutect <- function(data, cd, + name = "Mutect", + verbose = get_cevomod_verbosity(), + ...) { + patient_ids_present <- "patient_id" %in% names(data) + + if (patient_ids_present) { + sample_data <- data |> + select("patient_id", "sample_id") |> + unique() + data$patient_id <- NULL + } + + cd <- add_SNV_data(cd, data, name = name) + if (patient_ids_present) { + cd <- add_sample_data(cd, sample_data) + } + + cd +} + + +#' @export +add_to_cevodata.cevo_Strelka <- function(data, cd, + name = "Strelka", + verbose = get_cevomod_verbosity(), + ...) { + patient_ids_present <- "patient_id" %in% names(data) + + if (patient_ids_present) { + sample_data <- data |> + select("patient_id", "sample_id") |> + unique() + data$patient_id <- NULL + } + + cd <- add_SNV_data(cd, data, name = name) + if (patient_ids_present) { + cd <- add_sample_data(cd, sample_data) + } + + cd +} diff --git a/README.md b/README.md index 2168b82..c1407d6 100644 --- a/README.md +++ b/README.md @@ -26,9 +26,9 @@ devtools::install_github("pawelqs/cevomod") ``` -## Chnages in version 2.0.0 - -Starting with version 2.0.0, cevomod can use either VAF or CCF (Cancer Cell Fraction) as a measure of mutation frequency. CCF is a measure of mutation frequency corrected for tumor purity and copy number alterations. CCF can be calculated prior to mutation frequency intervalization using the `calc_mutation_frequencies()` function and requires information on total copy number in tumor and normal tissue and sample purity (tumor cell content). See the Vignettes for more examples. +## Last changes +* **v2.1.0** - cevomod is integrated with a helper [readthis](https://pawelqs.github.io/readthis/index.html) package, designed for bulk reading of variant files from algorithms such as Mutect2, Strelka, ASCAT, or FACETS, in the cevomod-friendly data format. Objects returned by `readthis::read_*()` functions can be added to the cevodata object using a general `add_data()` function. +* **v2.0.0** - Starting with version 2.0.0, cevomod can use either VAF or CCF (Cancer Cell Fraction) as a measure of mutation frequency. CCF is a measure of mutation frequency corrected for tumor purity and copy number alterations. CCF can be calculated prior to mutation frequency intervalization using the `calc_mutation_frequencies()` function and requires information on total copy number in tumor and normal tissue and sample purity (tumor cell content). See the Vignettes for more examples. To see the previous changes in the package see the [Changelog](https://pawelqs.github.io/cevomod/news/index.html) diff --git a/_pkgdown.yml b/_pkgdown.yml index 441eed3..6bf5b67 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -21,6 +21,7 @@ reference: - starts_with("add_") - starts_with("default_") - set_cancer_type + - use_purity - title: cevodata transformations - contents: - filter.cevodata diff --git a/man/readthis-integration.Rd b/man/readthis-integration.Rd new file mode 100644 index 0000000..eac6306 --- /dev/null +++ b/man/readthis-integration.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cevodata-readthis_integration.R +\name{readthis-integration} +\alias{readthis-integration} +\alias{add_data} +\alias{add_to_cevodata} +\title{readthis integration} +\usage{ +add_data(cd, data, ...) + +add_to_cevodata(data, cd, name, verbose, ...) +} +\arguments{ +\item{cd}{\if{html}{\out{}} object} + +\item{data}{Object read with readthis functions} + +\item{...}{Other arguments} + +\item{name}{Name for the data} + +\item{verbose}{Verbose?} +} +\description{ +\href{https://github.com/pawelqs/readthis}{readthis} package may be used to easily +read the data from some popular mutation callers into R environment. readthis +functions can be supplied not only with the single file paths, but also with +lists of files or even paths to the directories with files to be loaded (and +cevodata object is to store the data from many samples!) + +readthis functions return tibbles or list of tibbles. These tibbles/ +objects usually are instances of \emph{cevo_} S3 classes. cevomod +implements methods that allow to add these types of data to the cevodata +objects conveniently. +} +\section{Functions}{ +\itemize{ +\item \code{add_data()}: add_data() function takes cevodata as the +first argument, so it is a preferred method for adding data in R pipelines. + +\item \code{add_to_cevodata()}: add_to_cevodata() is a generic with a set +of methods for different classes of \code{data}. These methods are called by +add_data() function. + +}} +\examples{ +# library(cevomod) + +ascat_dir <- system.file("extdata", "ASCAT", package = "readthis") +ascat <- readthis::read_ascat_files(ascat_dir) +cd <- init_cevodata("Test dataset") |> + add_data(ascat) + +} diff --git a/man/use_purity.Rd b/man/use_purity.Rd new file mode 100644 index 0000000..90c13e3 --- /dev/null +++ b/man/use_purity.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cevodata-construction.R +\name{use_purity} +\alias{use_purity} +\title{Choose purity measure} +\usage{ +use_purity(cd, name, verbose = get_cevomod_verbosity()) +} +\arguments{ +\item{cd}{\if{html}{\out{}} object} + +\item{name}{Name of the metadata column with chosen purity values} + +\item{verbose}{Verbose?} +} +\description{ +\if{html}{\out{}} metadata can contain purity measures in columns other than 'purity'. +T his function can be used to set 'purity' values using values from requested +column +} diff --git a/tests/testthat/test-cevodata-readthis_integration.R b/tests/testthat/test-cevodata-readthis_integration.R new file mode 100644 index 0000000..99e9a28 --- /dev/null +++ b/tests/testthat/test-cevodata-readthis_integration.R @@ -0,0 +1,65 @@ +test_that("adding ASCAT data works", { + ascat_dir <- system.file("extdata", "ASCAT", package = "readthis") + data <- readthis::read_ascat_files(ascat_dir, sample_id_pattern = "(?<=ASCAT\\/)[:alnum:]*(?=\\.)") + cd <- init_cevodata("Test dataset") |> + add_data(data) + expect_s3_class(cd, "cevodata") + expect_s3_class(CNVs(cd), "tbl") + expect_equal(cd$active_CNVs, "ASCAT") + expect_equal(dim(CNVs(cd)), c(20, 8)) + expect_equal(cd$metadata$purity, c(0.99322, 0.99322)) + expect_equal(cd$metadata$purity, cd$metadata$ascat_purity) +}) + + + +test_that("adding FACETS data works", { + facets_dir <- system.file("extdata", "FACETS", package = "readthis") + data <- readthis::read_facets_cnvs(facets_dir) + cd <- init_cevodata("Test dataset") |> + add_data(data) + expect_s3_class(cd, "cevodata") + expect_s3_class(CNVs(cd), "tbl") + expect_equal(cd$active_CNVs, "FACETS") + expect_equal(dim(CNVs(cd)), c(128, 18)) + expect_equal(cd$metadata$purity, c(0.3, 0.3)) + expect_equal(cd$metadata$purity, cd$metadata$facets_purity) +}) + + + +test_that("adding Mutect2 data works", { + path <- system.file("extdata", "Mutect", package = "readthis") + data <- readthis::read_mutect_snvs( + path, + patient_id_pattern = "(?<=Mutect\\/)[:alnum:]*(?=\\.)", + verbose = FALSE + ) + cd <- init_cevodata("Test dataset") |> + add_data(data) + expect_s3_class(cd, "cevodata") + expect_s3_class(SNVs(cd), "tbl") + expect_equal(cd$active_SNVs, "Mutect") + expect_equal(dim(SNVs(cd)), c(16, 14)) + expect_equal(cd$metadata$sample_id, c("S1_L1", "S1_P1", "S2_L1", "S2_P1")) + expect_equal(cd$metadata$patient_id, c("S1", "S1", "S2", "S2")) +}) + + + +test_that("adding Strelka data works", { + path <- system.file("extdata", "Strelka", package = "readthis") + data <- readthis::read_strelka_somatic_snvs( + path, + patient_id_pattern = "(?<=Strelka\\/)[:alnum:]*(?=\\.)", + verbose = FALSE + ) |> + mutate(sample_id = str_c(patient_id, sample_id, sep = "_")) + cd <- init_cevodata("Test dataset") |> + add_data(data) + expect_s3_class(cd, "cevodata") + expect_s3_class(SNVs(cd), "tbl") + expect_equal(cd$active_SNVs, "Strelka") + expect_equal(dim(SNVs(cd)), c(18, 11)) + expect_equal(cd$metadata$sample_id, c("S1_TUMOR", "S2_TUMOR")) +}) diff --git a/vignettes/get_started.Rmd b/vignettes/get_started.Rmd index 89ef295..2ed9d17 100644 --- a/vignettes/get_started.Rmd +++ b/vignettes/get_started.Rmd @@ -58,6 +58,8 @@ cd `name` can be any string that is informative for the user. +*To facilitate the use of cevomod with the data from popular variant callers such as Mutect2, Strelka2, ASCAT, or FACETS, we have implemented a * [readthis](https://pawelqs.github.io/readthis/index.html) *package. readthis functions are designed for bulk reading of many output variant files (they accept a path to a single file, named vector of file paths, or a path to a directory containing many files). Data objects read with readthis functions can be added to the cevodata object with a single call of general* `add_data()` *function. For more information see the* [readthis page](https://pawelqs.github.io/readthis/index.html). + ## Variant Frequency Spectra