Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Develop #24

Merged
merged 26 commits into from
Aug 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
43f6de2
Update test-fit_powerlaw_tail_optim.R
pawelqs Aug 2, 2023
0eee818
intervalize_mutation_frequencies() as method
pawelqs Aug 2, 2023
9464ce8
bootstrapping v1
pawelqs Aug 3, 2023
40a65a7
verbose_down() implemented
pawelqs Aug 3, 2023
10c25ab
calc_SFS_resamples() implemented and tested
pawelqs Aug 3, 2023
51956b6
bootstrap v2
pawelqs Aug 3, 2023
1bcbdad
incorrect verbosity fixed, missing classes added
pawelqs Aug 3, 2023
19a205d
expected warning in suppressed in test-fit_powerlaw_tail_optim.R
pawelqs Aug 3, 2023
74fbbc1
version up
pawelqs Aug 3, 2023
9ddfcad
calc_powerllaw_model_residuals() works for bootstrapping results
pawelqs Aug 3, 2023
57f18ff
residuals for bootstrap models are calculated (given tha base SFS)
pawelqs Aug 3, 2023
c75fea9
test_data_fitted has bootstrapped models
pawelqs Aug 4, 2023
e522db7
Update stat_SFS.R
pawelqs Aug 4, 2023
9bc6a26
Plot models can plot bootstrapped powerlaw models
pawelqs Aug 4, 2023
92e4ecb
R CMD CHECK fixes
pawelqs Aug 4, 2023
1d9f123
Documentation update
pawelqs Aug 4, 2023
30bc7d6
Update README.md
pawelqs Aug 4, 2023
5d98e08
Update fitting_models.Rmd
pawelqs Aug 4, 2023
8b5149e
verbose message update
pawelqs Aug 4, 2023
dc8f9b1
Update test-fit_powerlaw_tail_optim.R
pawelqs Aug 4, 2023
407dff3
Unused params removed from test-plot_models.R
pawelqs Aug 4, 2023
000a7a8
Update test-plot_models.R
pawelqs Aug 4, 2023
c25a4a9
rsample pkg required in fit_powerlaw_tail_optim.cevo_SFS_bootstraps()
pawelqs Aug 4, 2023
3417c9f
tests for verbose_down() added
pawelqs Aug 4, 2023
1530f0c
update verbose_down()
pawelqs Aug 4, 2023
8779d64
Merge pull request #23 from pawelqs/bootstrapping
pawelqs Aug 4, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: cevomod
Title: Cancer Evolution Models
Version: 2.1.0
Version: 2.2.0
Authors@R:
person("Paweł", "Kuś", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-4367-9821"))
Expand Down Expand Up @@ -30,7 +30,8 @@ Suggests:
testthat (>= 3.0.0),
tidyverse,
vdiffr,
readthis
readthis,
rsample
Config/testthat/edition: 3
VignetteBuilder: knitr
Imports:
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ S3method(evaluate_MC_runs,matrix)
S3method(filter,cevodata)
S3method(fit_mobster,cevodata)
S3method(fit_powerlaw_tail_fixed,cevodata)
S3method(fit_powerlaw_tail_optim,cevo_SFS_bootstraps)
S3method(fit_powerlaw_tail_optim,cevo_SFS_tbl)
S3method(fit_powerlaw_tail_optim,cevodata)
S3method(get_CNVs_var_names,cevodata)
S3method(get_frequency_measure_name,cevo_snvs)
Expand All @@ -42,6 +44,8 @@ S3method(get_selection_coefficients,cevodata)
S3method(get_selection_coefficients,tbl_df)
S3method(identify_non_neutral_tail_mutations,cevodata)
S3method(identify_non_neutral_tail_mutations,singlepatient_cevodata)
S3method(intervalize_mutation_frequencies,cevo_snvs)
S3method(intervalize_mutation_frequencies,cevodata)
S3method(merge,cevodata)
S3method(plot,cevo_ITH_tbl)
S3method(plot,cevo_MC_solutions_eval)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@

## cevomod 2.2.0
* fit_powerlaw_tail_fixed() has a bootstrap option

## cevomod 2.1.0
* cevomod is integrated with a helper [readthis](https://pawelqs.github.io/readthis/index.html) package, designed for bulk reading of variant files from algorithms such as Mutect2, Strelka, ASCAT, or FACETS, in the cevomod-friendly data format. Objects returned by `readthis::read_*()` functions can be added to the cevodata object using a general `add_data()` function.

Expand Down
151 changes: 142 additions & 9 deletions R/fit_powerlaw_tail_optim.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

#' Fitting Tung Durrett models
#' Fitting power-law tails with aptimum exponent value
#'
#' `fit_powerlaw_tail_optim()` uses `stats::optim` to find optimal A and alpha
#' whch maximizes SFS area under the powerlaw curve (*sampled* region of SFS and
Expand Down Expand Up @@ -35,12 +35,15 @@ fit_powerlaw_tail_optim <- function(object, ...) {

#' @rdname powerlaw_optim
#' @inheritParams get_non_zero_SFS_range
#' @param peak_detection_upper_limit upper f value up to which the main peak is searched
#' @param reward_upper_limit mutations under the curve up to this limit will be rewarded
#' @param peak_detection_upper_limit Upper f value up to which the main peak is searched
#' @param reward_upper_limit Mutations under the curve up to this limit will be rewarded
#' @param bootstraps Number of bootstrap samples, or FALSE to make no resampling.
#' **This option significantly extendis the model fitting time!!**
#' @export
fit_powerlaw_tail_optim.cevodata <- function(object,
name = "powerlaw_optim",
# pct_left = 0, pct_right = 0.98,
bootstraps = FALSE,
allowed_zero_bins = 2,
y_treshold = 1,
y_threshold_pct = 0.01,
Expand All @@ -50,10 +53,143 @@ fit_powerlaw_tail_optim.cevodata <- function(object,
control = list(maxit = 1000, ndeps = c(0.1, 0.01)),
verbose = get_cevomod_verbosity(),
...) {
if (!bootstraps) {
sfs <- get_SFS(object, name = "SFS")
models <- fit_powerlaw_tail_optim(
sfs,
name = name,
allowed_zero_bins = allowed_zero_bins,
y_treshold = y_treshold,
y_threshold_pct = y_threshold_pct,
av_filter = av_filter,
peak_detection_upper_limit = peak_detection_upper_limit,
reward_upper_limit = reward_upper_limit,
control = control,
verbose = verbose
)
object$models[[name]] <- models
object <- calc_powerlaw_model_residuals(object, name)
object$active_models <- name
object
} else {
rlang::check_installed("rsample", reason = "to perform bootstrap sampling of SNVs")
sfs_resamples <- calc_SFS_resamples(object, times = bootstraps, verbose = verbose)

models <- sfs_resamples |>
map(
fit_powerlaw_tail_optim,
name = name,
allowed_zero_bins = allowed_zero_bins,
y_treshold = y_treshold,
y_threshold_pct = y_threshold_pct,
av_filter = av_filter,
peak_detection_upper_limit = peak_detection_upper_limit,
reward_upper_limit = reward_upper_limit,
control = control,
verbose = verbose
)

bootstrap_models <- models |>
map("bootstrap_models") |>
bind_rows()

models <- models |>
map("models") |>
bind_rows(.id = "sample_id")

bootstrap_name <- str_c(name, "_bootstraps")
object$models[[bootstrap_name]] <- bootstrap_models
object <- calc_powerlaw_model_residuals(object, bootstrap_name)
object$models[[name]] <- models
object <- calc_powerlaw_model_residuals(object, name)
object$active_models <- name
object
}
}


#' @rdname powerlaw_optim
#' @export
fit_powerlaw_tail_optim.cevo_SFS_bootstraps <- function(object,
name = "powerlaw_optim",
allowed_zero_bins = 2,
y_treshold = 1,
y_threshold_pct = 0.01,
av_filter = c(1/3, 1/3, 1/3),
peak_detection_upper_limit = 0.3,
reward_upper_limit = 0.4,
control = list(maxit = 1000, ndeps = c(0.1, 0.01)),
verbose = get_cevomod_verbosity(),
...) {
rlang::check_installed("rsample", reason = "to perform bootstrap sampling of SNVs")
msg("Fitting models to ", unique(object$sfs[[1]]$sample_id), " resamples", verbose = verbose)
pb <- if (verbose) progress_bar$new(total = nrow(object)) else NULL

object$models <- object$sfs |>
map(function(sfs) {
if (!is.null(pb)) pb$tick()
fit_powerlaw_tail_optim(
sfs,
name = name,
allowed_zero_bins = allowed_zero_bins,
y_treshold = y_treshold,
y_threshold_pct = y_threshold_pct,
av_filter = av_filter,
peak_detection_upper_limit = peak_detection_upper_limit,
reward_upper_limit = reward_upper_limit,
control = control,
verbose = verbose_down(verbose)
)
})

object$tidy_models <- object$models |>
map(~pivot_longer(.x, all_of(c("A", "alpha")), names_to = "term", values_to = "estimate"))

conf_intervals <- rsample::int_pctl(object, .data$tidy_models)

bootstrap_models <- object$models |>
set_names(object$id) |>
bind_rows(.id = "resample_id")
class(bootstrap_models) <- c("cevo_bootstrap_powerlaw_models", class(bootstrap_models))

models <- conf_intervals |>
pivot_wider(
names_from = "term",
values_from = ".lower":".upper",
names_glue = "{term}{.value}",
names_vary = "slowest"
) |>
transmute(
model = name,
component = "powerlaw tail",
A = .data$A.estimate,
.data$A.lower, .data$A.upper,
alpha = .data$alpha.estimate,
.data$alpha.lower, .data$alpha.upper
)
class(models) <- c("cevo_powerlaw_models", class(models))

lst(bootstrap_models, models)
}


#' @rdname powerlaw_optim
#' @export
fit_powerlaw_tail_optim.cevo_SFS_tbl <- function(object,
name = "powerlaw_optim",
allowed_zero_bins = 2,
y_treshold = 1,
y_threshold_pct = 0.01,
av_filter = c(1/3, 1/3, 1/3),
peak_detection_upper_limit = 0.3,
reward_upper_limit = 0.4,
control = list(maxit = 1000, ndeps = c(0.1, 0.01)),
verbose = get_cevomod_verbosity(),
...) {
msg("Fitting optimized power-law models...", verbose = verbose)
start_time <- Sys.time()

sfs <- get_SFS(object, name = "SFS")
sfs <- object
bounds <- sfs |>
get_non_zero_SFS_range(
allowed_zero_bins = allowed_zero_bins,
Expand Down Expand Up @@ -109,14 +245,11 @@ fit_powerlaw_tail_optim.cevodata <- function(object,
class(models) <- c("cevo_powerlaw_models", class(models))

msg("Models fitted in ", Sys.time() - start_time, " seconds", verbose = verbose)

object$models[[name]] <- models
object <- calc_powerlaw_model_residuals(object, name)
object$active_models <- name
object
models
}



td_optim <- function(init_A, init_alpha, data,
peak_detection_upper_limit = 0.3,
reward_upper_limit = 0.4,
Expand Down
Loading
Loading