diff --git a/regression.analyses.r b/regression.analyses.r index 925c77d..99dde49 100755 --- a/regression.analyses.r +++ b/regression.analyses.r @@ -401,9 +401,9 @@ res.failed <- subset(res.failed, subset = grepl(Factor, pattern = "(^#.*|^Too (m dep.var.cats <- sub(dep.vars, pattern = "^.*:", replacement = "") dep.var.conts <- sub(grep(dep.vars, pattern = ".*:.*", value = T), pattern = ":.*$", replacement = "") -# df <- df[df %>% dplyr::select(all_of(c(dep.var.conts, grep(dep.vars, pattern = ":", value = T, invert = T)))) %>% is.na %>% rowSums() == 0, , drop = F] if(all(sub.vars != "ALL_SAMPLES")) { + df <- df[df %>% dplyr::select(all_of(sub.vars)) %>% is.na %>% rowSums == 0, , drop = F] df <- df %>% mutate(across(.cols = c(dep.var.cats, sub.vars), .fns = as.factor)) N <- with(df, aggregate(df %>% dplyr::select(dep.var.cats, dep.var.conts, other.factors), @@ -422,12 +422,12 @@ if(all(sub.vars != "ALL_SAMPLES")) { by = sapply(sub.vars, FUN = function(x) {get(x)}, simplify = F), FUN = function(y) {if(is.factor(y)) {table(y, useNA = "always")} else {summary_f(y)}})) %>% as.matrix() %>% as.data.frame(stringsAsFactors = F) %>% mutate(across(.cols = !sub.vars, .fns = as.numeric)) - valid.cols <- foreach(i = colnames(df.summary), .combine = c) %do% { - if(i %in% sub.vars) {i} else - if(is.na(sum(df.summary[i]))) {i} else - if(str_contains(x = i, pattern = sub(dep.var.conts, pattern = "(^.*$)", replacement = "\\1\\."), logic = "OR")) {i} else - if(sum(df.summary[i]) >0) {i} - } +valid.cols <- foreach(i = colnames(df.summary), .combine = c) %do% { + if(!is.numeric(df.summary[,i])) {i} else + if(!grepl(i, pattern = "\\.NA('s)?$")) {i} else + if(sum(df.summary[,i]) >0) {i} + } + df.summary <- df.summary[valid.cols] df.summary.fin <- cbind(df.summary, N)