-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
100 changed files
with
28,335 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
154 changes: 154 additions & 0 deletions
154
arachne-ee/TestRareBloodCancersPrevalence/cdm_from_environment.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
|
||
|
||
cdm_from_environment <- function(write_prefix = "") { | ||
|
||
connection_string <- Sys.getenv("CONNECTION_STRING") | ||
|
||
if (connection_string == "") { | ||
stop("CONNECTION_STRING environment variable not set.") | ||
} | ||
|
||
parts <- stringr::str_split(connection_string, ":")[[1]] | ||
port_db <- stringr::str_split(parts[4], "/")[[1]] | ||
|
||
# required args that need to be passed in | ||
dbms <- Sys.getenv("DBMS_TYPE") | ||
server <- stringr::str_remove(parts[3], "^//+") | ||
port <- port_db[1] | ||
dbname <- port_db[2] | ||
username <- Sys.getenv("DBMS_USERNAME") | ||
password <- Sys.getenv("DBMS_PASSWORD") | ||
cdm_schema <- Sys.getenv("DBMS_SCHEMA") | ||
write_schema <- Sys.getenv("RESULT_SCHEMA") | ||
|
||
|
||
vars <- c(dbms, server, port, dbname, password, cdm_schema, write_schema) | ||
names_vars <- c("dbms", "server", "port", "dbname", "password", "cdm_schema", "write_schema") | ||
|
||
for (i in seq_along(vars)) { | ||
if (nchar(vars[i]) < 1) stop(paste(names_vars[i], "is not required but not available!")) | ||
} | ||
|
||
supported_db <- c("postgresql", "sql server", "redshift", "duckdb", "snowflake") | ||
|
||
if (!(dbms %in% supported_db)) { | ||
cli::cli_abort("The environment variable DBMS_TYPE must be on one of {paste(supported_db, collapse = ', ')} not `{Sys.getenv('DBMS_TYPE')}`.") | ||
} | ||
|
||
if (dbms == "duckdb") { | ||
db <- dbname | ||
if (db == "") { | ||
db <- "GiBleed" | ||
} | ||
|
||
checkmate::assert_choice(db, CDMConnector::example_datasets()) | ||
con <- DBI::dbConnect(duckdb::duckdb(), CDMConnector::eunomia_dir(db)) | ||
cdm <- CDMConnector::cdm_from_con(con, "main", "main", cdm_version = "5.3", cdm_name = db) | ||
return(cdm) | ||
} | ||
|
||
cdm_schema <- stringr::str_split(cdm_schema, "\\.")[[1]] | ||
write_schema <- stringr::str_split(write_schema, "\\.")[[1]] | ||
|
||
|
||
print(write_schema) | ||
print(sapply(write_schema, nchar)) | ||
|
||
if (dbms %in% c("postgresql", "redshift")) { | ||
|
||
drv <- switch (dbms, | ||
"postgresql" = RPostgres::Postgres(), | ||
"redshift" = RPostgres::Redshift() | ||
) | ||
|
||
con <- DBI::dbConnect(drv = drv, | ||
dbname = dbname, | ||
host = server, | ||
user = username, | ||
password = password, | ||
port = port) | ||
|
||
if (!DBI::dbIsValid(con)) { | ||
cli::cli_abort("Database connection failed!") | ||
} | ||
|
||
} else if (dbms == "sql server") { | ||
|
||
con <- DBI::dbConnect(odbc::odbc(), | ||
Driver = "ODBC Driver 17 for SQL Server", | ||
Server = server, | ||
Database = dbname, | ||
UID = username, | ||
PWD = password, | ||
TrustServerCertificate="yes", | ||
Port = port) | ||
|
||
if (!DBI::dbIsValid(con)) { | ||
cli::cli_abort("Database connection failed!") | ||
} | ||
|
||
|
||
} else if (dbms == "snowflake") { | ||
con <- DBI::dbConnect(odbc::odbc(), | ||
DRIVER = "SnowflakeDSIIDriver", | ||
SERVER = server, | ||
DATABASE = dbname, | ||
UID = username, | ||
PWD = password, | ||
WAREHOUSE = "COMPUTE_WH_XS") # don't hardcode this | ||
|
||
if (!DBI::dbIsValid(con)) { | ||
cli::cli_abort("Database connection failed!") | ||
} | ||
|
||
} else { | ||
cli::cli_abort("{dbms} is not a supported database type!") | ||
} | ||
|
||
# split schemas. If write schema has a dot we need to interpret it as catalog.schema | ||
# cdm schema should not have a dot | ||
|
||
# if (stringr::str_detect(Sys.getenv("WRITE_SCHEMA"), "\\.")) { | ||
# write_schema <- stringr::str_split(write_schema, "\\.")[[1]] | ||
# if (length(write_schema) != 2) { | ||
# cli::cli_abort("write_schema can have at most one period (.)!") | ||
# } | ||
# | ||
# stopifnot(nchar(write_schema[1]) > 0, nchar(write_schema[2]) > 0) | ||
# write_schema <- c(catalog = write_schema[1], schema = write_schema[2]) | ||
# } else { | ||
# write_schema <- c(schema = Sys.getenv("WRITE_SCHEMA")) | ||
# } | ||
# | ||
# if (write_prefix != "") { | ||
# if (dbms != "snowflake") { | ||
# write_schema <- c(write_schema, prefix = write_prefix) | ||
# } | ||
# } | ||
|
||
# add prefix | ||
if (write_prefix != "") { | ||
if (length(write_schema) == 1) { | ||
write_schema <- c(schema = write_schema, prefix = write_prefix) | ||
} else if (length(write_schema) == 2) { | ||
write_schema <- c(catalog = write_schema[1], | ||
schema = write_schema[2], | ||
prefix = write_prefix) | ||
} | ||
} else if (length(write_schema) == 2) { | ||
write_schema <- c(catalog = write_schema[1], schema = write_schema[2]) | ||
} | ||
|
||
cdm <- CDMConnector::cdm_from_con( | ||
con = con, | ||
cdm_schema = cdm_schema, | ||
write_schema = write_schema, | ||
cdm_version = "5.3", | ||
cdm_name = Sys.getenv("DATA_SOURCE_NAME", unset = "unnamed_cdm")) | ||
|
||
if (length(names(cdm)) == 0) { | ||
cli::cli_abort("CDM object creation failed!") | ||
} | ||
|
||
return(cdm) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
library(CDMConnector) | ||
library(IncidencePrevalence) | ||
library(dplyr) | ||
library(log4r) | ||
setwd(file.path(getwd(), "TestRareBloodCancersPrevalence")) | ||
print(getwd()) | ||
print(list.files()) | ||
# source("arachne-ee/TestRareBloodCancersPrevalence/cdm_from_environment.R") | ||
source("cdm_from_environment.R") | ||
|
||
# database metadata and connection details ----- | ||
# The name/ acronym for the database | ||
db_name <- Sys.getenv("DATA_SOURCE_NAME") %>% | ||
tolower() %>% | ||
stringr::str_replace_all(" ", "_") %>% | ||
stringr::str_replace_all("\\(|\\)", "") | ||
|
||
# Set output folder location ----- | ||
# the path to a folder where the results from this analysis will be saved | ||
# output_folder <- here::here("results") # does not work with execution engine | ||
output_folder <- "/results" | ||
|
||
# create cdm reference ---- | ||
# con <- DBI::dbConnect(duckdb::duckdb(), eunomia_dir()) | ||
# cdm <- cdm_from_con(con, "main", "main") | ||
|
||
# cdm <- cdm_from_environment(write_prefix = "dw_") | ||
prefix <- paste0("tmp", as.integer(Sys.time()) %% 1000, "_") | ||
cdm <- cdm_from_environment(prefix) | ||
|
||
# check database connection | ||
# running the next line should give you a count of your person table | ||
n <- cdm$person %>% | ||
tally() %>% | ||
pull(n) | ||
|
||
print(paste(n, "persons in the CDM person table")) | ||
|
||
print(paste("Database connection class:", class(attr(cdm, "dbcon")))) | ||
|
||
# create directory if it does not already exist ---- | ||
if (!file.exists(output_folder)) { | ||
dir.create(output_folder, recursive = TRUE) | ||
} | ||
|
||
# start log ---- | ||
log_file <- paste0(output_folder, "/log.txt") | ||
logger <- create.logger() | ||
logfile(logger) <- log_file | ||
level(logger) <- "INFO" | ||
|
||
# tables --- | ||
table_outcome <-"outcome" | ||
table_dpop_sex <- "dpop_sex" | ||
table_ph <- "dpop_ph" | ||
table_age <- "dpop_age" | ||
table_point_prev <- "point_prev" | ||
table_period_prev <- "period_prev" | ||
|
||
# instantiate outcome cohorts ---- | ||
info(logger, "INSTANTIATE OUTCOME COHORTS") | ||
|
||
outcome_cohorts <- readCohortSet(here::here("outcomeCohorts")) %>% | ||
slice(1:2) # for testing | ||
|
||
info(logger, "- getting outcomes") | ||
|
||
cdm <- generateCohortSet(cdm, | ||
outcome_cohorts, | ||
name = table_outcome, | ||
overwrite = TRUE) | ||
|
||
total_subjects <- cohort_count(cdm[[table_outcome]]) %>% | ||
summarise(total = sum(number_subjects)) %>% | ||
pull(total) | ||
|
||
print(paste("total subjects with outcomes:", total_subjects)) | ||
|
||
# get denominator cohorts ----- | ||
info(logger, "GETTING DENOMINATOR COHORTS") | ||
info(logger, "- getting denominator - primary and sex") | ||
|
||
cdm <- generateDenominatorCohortSet( | ||
cdm = cdm, | ||
name = table_dpop_sex, | ||
cohortDateRange = c(as.Date("2010-01-01"), as.Date("2024-01-01")), | ||
sex = c("Male", "Female", "Both"), | ||
daysPriorObservation = 365, | ||
overwrite = TRUE | ||
) | ||
|
||
info(logger, "- getting denominator - prior history") | ||
|
||
cdm <- generateDenominatorCohortSet( | ||
cdm = cdm, | ||
name = table_ph, | ||
cohortDateRange = c(as.Date("2010-01-01"), as.Date("2024-01-01")), | ||
daysPriorObservation = c(0, 1095), | ||
overwrite = TRUE | ||
) | ||
|
||
info(logger, "- getting denominator - age_gr") | ||
|
||
cdm <- generateDenominatorCohortSet( | ||
cdm = cdm, | ||
name = table_age, | ||
cohortDateRange = c(as.Date("2010-01-01"), as.Date("2024-01-01")), | ||
ageGroup = list( | ||
# age_gr_1 | ||
c(0, 9), c(10, 19), c(20, 29), c(30, 39), c(40, 49), | ||
c(50, 59), c(60, 69), c(70, 79), c(80, 89), | ||
c(90, 99), c(100, 150), | ||
# age_gr_2 | ||
c(0, 44), c(45, 64), c(65, 150) | ||
), | ||
daysPriorObservation = 365, | ||
overwrite = TRUE | ||
) | ||
|
||
# estimate prevalence ----- | ||
denominators <- c( | ||
table_dpop_sex, | ||
table_ph, | ||
table_age | ||
) | ||
|
||
prevalence_estimates <- list() | ||
|
||
for (i in seq_along(denominators)) { | ||
|
||
info(logger, paste0("- getting point prevalence for ", denominators[i])) | ||
# debugonce(estimatePointPrevalence) | ||
prevalence_estimates[[paste0("point_prevalence_", denominators[[i]])]] <- estimatePointPrevalence( | ||
cdm = cdm, | ||
denominatorTable = denominators[i], | ||
outcomeTable = table_outcome, | ||
interval = "years", | ||
temporary = FALSE | ||
) | ||
|
||
info(logger, paste0("- getting period prevalence for ", denominators[i])) | ||
|
||
|
||
prevalence_estimates[[paste0("period_prevalence_", denominators[[i]])]] <- estimatePeriodPrevalence( | ||
cdm = cdm, | ||
denominatorTable = denominators[i], | ||
outcomeTable = table_outcome, | ||
completeDatabaseIntervals = TRUE, | ||
fullContribution = c(TRUE, FALSE), | ||
interval = "years", | ||
temporary = FALSE | ||
) | ||
} | ||
|
||
# gather results and export ----- | ||
info(logger, "ZIPPING RESULTS") | ||
exportIncidencePrevalenceResults( | ||
result = prevalence_estimates, | ||
zipName = paste0(c(db_name, "C1_001_Results", format(Sys.Date(), format="%Y%m%d")), collapse = "_"), | ||
outputFolder = output_folder | ||
) | ||
|
||
cdm_disconnect(cdm) | ||
|
||
print("-- Thank you for running the study!") | ||
print("-- If all has worked, there should now be a zip folder with your results in the output folder to share") | ||
|
||
|
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
|
||
|
||
testnames <- list.files(here::here("arachne-ee")) | ||
|
||
for (i in seq_along(testnames)) { | ||
message(paste("writing ", testnames[i])) | ||
withr::with_dir(here::here("arachne-ee"), { | ||
filenames <- list.files(testnames[i], full.names = T) | ||
zip(zipfile = paste0("../arachne-ee-zip/", testnames[i]), files = filenames) | ||
}) | ||
} | ||
|
||
|
||
|
||
|
||
testnames <- list.files(here::here("darwin-ee")) | ||
|
||
for (i in seq_along(testnames)) { | ||
message(paste("writing ", testnames[i])) | ||
withr::with_dir(here::here("arachne-ee"), { | ||
filenames <- list.files(testnames[i], full.names = T) | ||
zip(zipfile = paste0("../darwin-ee-zip/", testnames[i]), files = filenames) | ||
}) | ||
} |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
.Rproj.user | ||
.Rhistory | ||
.RData | ||
.Ruserdata | ||
postgresql-42.2.18.jar | ||
CodeToRun_local.R | ||
Results/CPRD GOLD/* | ||
*.zip | ||
*log.txt | ||
.Rprofile |
Oops, something went wrong.