Skip to content

Commit

Permalink
Added strata option
Browse files Browse the repository at this point in the history
  • Loading branch information
WillNickols committed Jul 27, 2024
1 parent cc1ac91 commit 0ef1c3c
Show file tree
Hide file tree
Showing 11 changed files with 204 additions and 62 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Description: MaAsLin 3 refines and extends generalized multivariate linear model
License: MIT + file LICENSE
Imports: dplyr, pbapply, lmerTest, parallel, lme4, optparse, logging,
data.table, multcomp, ggplot2, RColorBrewer, patchwork, scales,
rlang, hash, matrixStats, tibble, ggnewscale, kableExtra
rlang, hash, matrixStats, tibble, ggnewscale, kableExtra, survival
Suggests: knitr, testthat (>= 2.1.0), rmarkdown, markdown
VignetteBuilder: knitr
Collate: fit.R utility_scripts.R viz.R maaslin3.R
Expand Down
195 changes: 152 additions & 43 deletions R/fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ get_fixed_effects <- function(formula, random_effects_formula, dat_sub, groups,
} else { # Random effects
patterns <- paste0("(", unlist(lme4::findbars(formula(gsub("^expr ", "", safe_deparse(formula))))), ")")

# Remove all the random effects from the formula
for (pattern in patterns) {
fixed_effects_only <- gsub(pattern, "",
paste0(trimws(safe_deparse(formula(gsub("^expr ", "", safe_deparse(formula))))), collapse = " "),
Expand Down Expand Up @@ -193,6 +194,24 @@ fit.model <- function(
formula <- extract_special_predictor_out[[1]]
ordereds <- extract_special_predictor_out[[2]]

# Extract strata
extract_special_predictor_out <- extract_special_predictor(formula, 'strata')
formula <- extract_special_predictor_out[[1]]
strata <- extract_special_predictor_out[[2]]

if (length(strata) > 1) {
stop("Only one strata allowed. Please change the formula.")
}

if (length(strata) > 0 & !is.null(random_effects_formula)) {
stop("Strata and random effects cannot be combined. Please only use random effects if you have multiple grouping categories.")
}

if (length(strata) > 0 & model == 'LM') {
formula <- formula(paste0(safe_deparse(formula), ' + (1 | ', strata, ')'))
random_effects_formula <- formula
}

#############################################################
# Determine the function and summary for the model selected #
#############################################################
Expand Down Expand Up @@ -278,51 +297,140 @@ fit.model <- function(
##################

if (model == "logistic") {
if (is.null(random_effects_formula)) { # Fixed effects only
if (augment) {
model_function <- function(formula, mm, weight_scheme, na.action) {
assign("weight_sch_current", weight_scheme, envir = environment(formula))
if (is.null(random_effects_formula)) {
if (length(strata) > 0) {
if (augment) {
model_function <-
function(formula, mm, weight_scheme, na.action) {
formula <- formula(paste0(safe_deparse(formula), ' + strata(', strata, ')'))

assign("weight_sch_current", weight_scheme, envir = environment(formula))

clogit_out <- tryCatch({
fit1 <- survival::clogit(
formula(formula),
data = mm,
method = "breslow",
control = coxph.control(iter.max = 1000),
na.action = na.action,
weights = weight_sch_current,
robust = F) # Robust SE seem to be worse with weighting...
}, warning = function(w) {
'warning'
}, error = function(e) {
'error'
})

if (is.character(clogit_out)) {
fit1 <- survival::clogit(
formula(formula),
data = mm,
method = "breslow",
control = coxph.control(iter.max = 1000),
na.action = na.action,
weights = weight_sch_current,
robust = F) # Robust SE seem to be worse with weighting...
return(fit1)
} else {
return(clogit_out)
}
}
} else {
model_function <-
function(formula, data, na.action) {
clogit_out <- tryCatch({
fit1 <- survival::clogit(
formula(formula),
data = data,
method = "breslow",
control = coxph.control(iter.max = 1000),
na.action = na.action,
robust = F) # Robust SE seem to be worse with weighting...)
}, warning = function(w) {
'warning'
}, error = function(e) {
'error'
})

if (is.character(clogit_out)) {
fit1 <- survival::clogit(
formula(formula),
data = data,
method = "breslow",
control = coxph.control(iter.max = 1000),
na.action = na.action,
robust = F) # Robust SE seem to be worse with weighting...)
return(fit1)
} else {
return(clogit_out)
}
}
}
summary_function <- function(fit, names_to_include) {
lm_summary <- coef(summary(fit))

store_names <- gsub('`', '', rownames(lm_summary))
if (!all(names_to_include %in% store_names)) { # If deficient rank, make sure all rownames are included
rows_to_add = names_to_include[!(names_to_include %in% store_names)]
lm_summary <- rbind(lm_summary, matrix(rep(NaN, ncol(lm_summary) * length(rows_to_add)), nrow=length(rows_to_add)))
rownames(lm_summary) <- c(store_names, rows_to_add)
}

glm_out <- glm(
formula = formula(formula),
family = 'binomial',
data = mm,
weights = weight_sch_current,
na.action = na.action,
)
if ('robust se' %in% colnames(lm_summary)) {
para <- as.data.frame(lm_summary)[,-c(2,4,5)] # Don't actually use robust SE
} else {
para <- as.data.frame(lm_summary)[,-c(2,4)]
}

return(glm_out)
para$name <- rownames(lm_summary)
return(para)
}
} else {
model_function <-
function(formula, data, na.action) {
return(glm(
formula(formula),
data = data,
} else { # Fixed effects only
if (augment) {
model_function <- function(formula, mm, weight_scheme, na.action) {
assign("weight_sch_current", weight_scheme, envir = environment(formula))

glm_out <- glm(
formula = formula(formula),
family = 'binomial',
data = mm,
weights = weight_sch_current,
na.action = na.action,
))
)

return(glm_out)
}
}
summary_function <- function(fit, names_to_include) {
lm_summary <- summary(fit)$coefficients
store_names <- gsub('`', '', rownames(lm_summary))
if (!all(names_to_include %in% store_names)) { # If deficient rank, make sure all rownames are included
rows_to_add = names_to_include[!(names_to_include %in% store_names)]
lm_summary <- rbind(lm_summary, matrix(rep(NaN, 4 * length(rows_to_add)), nrow=length(rows_to_add)))
rownames(lm_summary) <- c(store_names, rows_to_add)
} else {
model_function <-
function(formula, data, na.action) {
return(glm(
formula(formula),
data = data,
family = 'binomial',
na.action = na.action,
))
}
}
summary_function <- function(fit, names_to_include) {
lm_summary <- summary(fit)$coefficients
store_names <- gsub('`', '', rownames(lm_summary))
if (!all(names_to_include %in% store_names)) { # If deficient rank, make sure all rownames are included
rows_to_add = names_to_include[!(names_to_include %in% store_names)]
lm_summary <- rbind(lm_summary, matrix(rep(NaN, 4 * length(rows_to_add)), nrow=length(rows_to_add)))
rownames(lm_summary) <- c(store_names, rows_to_add)
}
para <- as.data.frame(lm_summary)[-1, -3]
para$name <- rownames(lm_summary)[-1]
return(para)
}
para <- as.data.frame(lm_summary)[-1, -3]
para$name <- rownames(lm_summary)[-1]
return(para)
}
} else { # Random effects
ranef_function <- lme4::ranef
if (augment) {
model_function <-
function(formula, mm, weight_scheme, na.action) {
assign("weight_sch_current", weight_scheme, envir = environment(formula))

index <- 1

while (index < length(optimizers)) {
Expand All @@ -335,7 +443,7 @@ fit.model <- function(
na.action = na.action,
weights = weight_sch_current,
control = lme4::glmerControl(optimizer = optimizers[index],
optCtrl = optCtrlList[[index]]))
optCtrl = optCtrlList[[index]]))
}, warning=function(w) {
if (w$message == "non-integer #successes in a binomial glm!") {
# Still worked
Expand All @@ -358,14 +466,14 @@ fit.model <- function(

if (is.character(glm_out)) {
withCallingHandlers({ # Catch non-integer # successes first
fit1 <- lme4::glmer(
formula(formula),
data = mm,
family = 'binomial',
na.action = na.action,
weights = weight_sch_current,
control = lme4::glmerControl(optimizer = optimizers[index],
optCtrl = optCtrlList[[index]]))
fit1 <- lme4::glmer(
formula(formula),
data = mm,
family = 'binomial',
na.action = na.action,
weights = weight_sch_current,
control = lme4::glmerControl(optimizer = optimizers[index],
optCtrl = optCtrlList[[index]]))
}, warning=function(w) {
if (w$message == "non-integer #successes in a binomial glm!") {
# Still worked
Expand All @@ -390,7 +498,7 @@ fit.model <- function(
family = 'binomial',
na.action = na.action,
control = lme4::glmerControl(optimizer = optimizers[index],
optCtrl = optCtrlList[[index]]))
optCtrl = optCtrlList[[index]]))
}, warning = function(w) {
'warning'
}, error = function(e) {
Expand All @@ -412,7 +520,7 @@ fit.model <- function(
family = 'binomial',
na.action = na.action,
control = lme4::glmerControl(optimizer = optimizers[index],
optCtrl = optCtrlList[[index]])))
optCtrl = optCtrlList[[index]])))
} else {
return(glm_out)
}
Expand Down Expand Up @@ -457,7 +565,8 @@ fit.model <- function(
'lmerTest',
'parallel',
'lme4',
'multcomp'
'multcomp',
'survival'
)) {
suppressPackageStartupMessages(require(lib, character.only = TRUE))
}
Expand Down
36 changes: 29 additions & 7 deletions R/maaslin3.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ args$formula <- NULL
args$random_effects <- NULL
args$group_effects <- NULL
args$ordered_effects <- NULL
args$strata_effects <- NULL
args$fixed_effects <- NULL
args$feature_specific_covariate <- NULL
args$feature_specific_covariate_name <- NULL
Expand Down Expand Up @@ -188,6 +189,18 @@ options <-
"[ Default: none ]"
)
)
options <-
optparse::add_option(
options,
c("--strata_effects"),
type = "character",
dest = "strata_effects",
default = args$ordered_effects,
help = paste("The strata effects for the model.",
"Only one is allowed.",
"[ Default: none ]"
)
)
options <-
optparse::add_option(
options,
Expand Down Expand Up @@ -510,6 +523,7 @@ maaslin_parse_param_list <- function(param_list) {
fixed_effects = args$fixed_effects,
group_effects = args$group_effects,
ordered_effects = args$ordered_effects,
strata_effects = args$strata_effects,
feature_specific_covariate = args$feature_specific_covariate,
feature_specific_covariate_name = args$feature_specific_covariate_name,
feature_specific_covariate_record = args$feature_specific_covariate_record,
Expand Down Expand Up @@ -684,6 +698,7 @@ maaslin_log_arguments <- function(param_list) {
logging::logdebug("Fixed effects: %s", param_list[["fixed_effects"]])
logging::logdebug("Group effects: %s", param_list[["group_effects"]])
logging::logdebug("Ordered effects: %s", param_list[["ordered_effects"]])
logging::logdebug("Strata effects: %s", param_list[["strata_effects"]])
logging::logdebug("Formula: %s", param_list[["formula"]])
logging::logdebug("Correction method: %s", param_list[["correction"]])
logging::logdebug("Standardize: %s", param_list[["standardize"]])
Expand Down Expand Up @@ -1079,17 +1094,19 @@ maaslin_compute_formula <- function(params_and_data) {
fixed_effects <- param_list[["fixed_effects"]]
group_effects <- param_list[["group_effects"]]
ordered_effects <- param_list[["ordered_effects"]]
strata_effects <- param_list[["strata_effects"]]
random_effects <- param_list[["random_effects"]]
feature_specific_covariate_name <- param_list[['feature_specific_covariate_name']]

if (!is.null(param_list[["formula"]])) {
if (!is.null(param_list[["fixed_effects"]]) |
!is.null(param_list[["random_effects"]]) |
!is.null(param_list[["group_effects"]]) |
!is.null(param_list[["ordered_effects"]])) {
!is.null(param_list[["ordered_effects"]]) |
!is.null(param_list[["strata_effects"]])) {
logging::logwarn(
paste("fixed_effects, random_effects, group_effects, or ordered_effects provided in addition to formula,",
"using just the fixed_effects, random_effects, group_effects, and ordered_effects"))
paste("fixed_effects, random_effects, group_effects, ordered_effects, or strata_effects provided in addition to formula,",
"using just the fixed_effects, random_effects, group_effects, ordered_effects, and strata_effects"))
} else {
logging::logwarn(
paste("maaslin_compute_formula called even though a formula is provided",
Expand Down Expand Up @@ -1202,7 +1219,7 @@ maaslin_compute_formula <- function(params_and_data) {
}

# reduce metadata to only include fixed/group/random effects in formula
effects_names <- unique(c(fixed_effects, random_effects, group_effects, ordered_effects))
effects_names <- unique(c(fixed_effects, random_effects, group_effects, ordered_effects, strata_effects))
metadata <- metadata[, effects_names, drop = FALSE]

# create the fixed effects formula text
Expand All @@ -1213,6 +1230,9 @@ maaslin_compute_formula <- function(params_and_data) {
if (length(ordered_effects) > 0) {
formula_effects <- union(formula_effects, paste0("ordered(", ordered_effects, ")"))
}
if (length(strata_effects) > 0) {
formula_effects <- union(formula_effects, paste0("strata(", strata_effects, ")"))
}
if (!is.null(feature_specific_covariate_name)) {
formula_effects <- union(formula_effects, feature_specific_covariate_name)
}
Expand Down Expand Up @@ -1274,9 +1294,10 @@ maaslin_check_formula <- function(params_and_data) {
if (!is.null(param_list[["fixed_effects"]]) |
!is.null(param_list[["random_effects"]]) |
!is.null(param_list[["group_effects"]]) |
!is.null(param_list[["ordered_effects"]])) {
!is.null(param_list[["ordered_effects"]]) |
!is.null(param_list[["strata_effects"]])) {
logging::logwarn(
paste("fixed_effects, random_effects, group_effects, or ordered_effects provided in addition to formula,",
paste("fixed_effects, random_effects, group_effects, ordered_effects, or strata_effects provided in addition to formula,",
"using only formula"))
}

Expand Down Expand Up @@ -1319,7 +1340,7 @@ maaslin_check_formula <- function(params_and_data) {

term_labels <- attr(terms(formula), "term.labels")

if (sum(!grepl("\\|", term_labels)) == 0 & is.null(feature_specific_covariate_name)) {
if (sum(!grepl("strata\\(|\\|", term_labels)) == 0 & is.null(feature_specific_covariate_name)) {
logging::logerror("No fixed, group, or ordered effects included in formula.")
stop()
}
Expand Down Expand Up @@ -2056,6 +2077,7 @@ if (identical(environment(), globalenv()) &&
fixed_effects = current_args$fixed_effects,
group_effects = current_args$group_effects,
ordered_effects = current_args$ordered_effects,
strata_effects = current_args$strata_effects,
feature_specific_covariate = current_args$feature_specific_covariate,
feature_specific_covariate_name = current_args$feature_specific_covariate_name,
feature_specific_covariate_record = current_args$feature_specific_covariate_record,
Expand Down
Loading

0 comments on commit 0ef1c3c

Please sign in to comment.