From 602c31e75109bef909a2bbaf7324b08a6b64b49c Mon Sep 17 00:00:00 2001 From: Marta Alcalde-Herraiz Date: Fri, 26 Jan 2024 16:56:06 +0000 Subject: [PATCH] Update CDMConnector() --- .Rhistory | 856 +++++++++++++++++------------------ R/generateMatchedCohortSet.R | 8 +- 2 files changed, 432 insertions(+), 432 deletions(-) diff --git a/.Rhistory b/.Rhistory index 1d3b7143..a1257944 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,46 +1,340 @@ +CDMConnector::cohort_attrition(cdm$matched_cohort1) %>% filter(cohort_definition_id == 1) +# Matched cohort +CDMConnector::cohort_attrition(cdm$matched_cohort1) %>% filter(cohort_definition_id == 4) +cdm <- generateMatchedCohortSet(cdm = cdm, +name = "matched_cohort1", +targetCohortName = "cohort1", +targetCohortId = 1, +ratio = 0) +CDMConnector::cohort_count(cdm$matched_cohort1) +cdm$matched_cohort1 +cohort_attrition(cdm$matched_cohort1) +CDMConnector::cohort_attrition(cdm$matched_cohort1) +cdm <- generateMatchedCohortSet(cdm = cdm, +name = "matched_cohort1", +targetCohortName = "cohort1", +targetCohortId = 1, +ratio = -1) +CDMConnector::cohort_attrition(cdm$matched_cohort1) +cdm <- generateMatchedCohortSet(cdm = cdm, +name = "matched_cohort2", +targetCohortName = "cohort1", +targetCohortId = c(1,3), +ratio = 10) +CDMConnector::cohort_count(cdm$matched_cohort2) +CDMConnector::cohort_count(cdm$matched_cohort2) +CDMConnector::cohort_set(cdm$matched_cohort2) +cdm <- generateMatchedCohortSet(cdm = cdm, +name = "matched_cohort2", +targetCohortName = "cohort1", +targetCohortId = c(1,3), +ratio = 10) +CDMConnector::cohort_set(cdm$matched_cohort2) %>% filter(cohort_definition_id %in% c(1,4)) +CDMConnector::cohort_set(cdm$matched_cohort2) %>% filter(cohort_definition_id %in% c(3,6)) +cdm <- generateMatchedCohortSet(cdm = cdm, +name = "matched_cohort2", +targetCohortName = "cohort1", +targetCohortId = 1, +ratio = Inf) +devtools::load_all() +devtools::check() +cdm +cdm %>% generateMatchedCohortSet(name = "matched_cohort_nosequant",targetCohortName = "cohort1",targetCohortId = c(1)) +cdm <- cdm %>% generateMatchedCohortSet(name = "matched_cohort_nosequant",targetCohortName = "cohort1",targetCohortId = c(1)) +cdm$matched_cohort_nosequant +name <- "matched_cohort_nosequant" +cdm[[name]] %>% +CDMConnector::cohort_set() +cdm <- cdm %>% generateMatchedCohortSet(name = "matched_cohort_nosequant",targetCohortName = "cohort1",targetCohortId = c(1,3)) +cdm[[name]] %>% +CDMConnector::cohort_set() +cdm[[name]] %>% +CDMConnector::cohort_set() %>% +dplyr::arrange("cohort_name") +cdm[[name]] %>% +CDMConnector::cohort_set() %>% +dplyr::cohort_definition_id +cdm[[name]] %>% +CDMConnector::cohort_set() %>% +dplyr::mutate(cohort_definition_id = target_cohort_definition_id) +cdm[[name]] %>% +CDMConnector::cohort_set() %>% +dplyr::mutate(cohort_definition_id = target_cohort__id) +cdm[[name]] %>% +CDMConnector::cohort_set() %>% +dplyr::mutate(cohort_definition_id = target_cohort_id) +cdm[[name]] %>% +CDMConnector::cohort_set() %>% +dplyr::mutate(cohort_definition_id = target_cohort_id) %>% +arrange(cohort_definition_id) +cdm[[name]] %>% +CDMConnector::cohort_set() %>% +dplyr::mutate(cohort_definition_id = target_cohort_id) %>% +arrange(cohort_definition_id) %>% +dplyr::mutate(cohort_definition_id = dplyr::n()) +cdm[[name]] %>% +CDMConnector::cohort_set() %>% +dplyr::mutate(cohort_definition_id = target_cohort_id) %>% +arrange(cohort_definition_id) %>% +dplyr::mutate(cohort_definition_id = dplyr::row_n()) +cdm[[name]] %>% +CDMConnector::cohort_set() %>% +dplyr::mutate(cohort_definition_id = target_cohort_id) %>% +arrange(cohort_definition_id) %>% +dplyr::mutate(cohort_definition_id = dplyr::row_number()) +cdm[[name]] %>% +CDMConnector::cohort_set() %>% +dplyr::mutate(cohort_definition_id_new = target_cohort_id) %>% +arrange(cohort_definition_id_new) %>% +dplyr::mutate(cohort_definition_id_new = dplyr::row_number()) +cdm[[name]] %>% +CDMConnector::cohort_set() %>% +dplyr::mutate(cohort_definition_id_new = target_cohort_id) %>% +arrange(cohort_definition_id_new, cohort_name) %>% +dplyr::mutate(cohort_definition_id_new = dplyr::row_number()) +cdm[[name]] %>% +CDMConnector::cohort_set() %>% +dplyr::mutate(cohort_definition_id_new = target_cohort_id) +cdm[[name]] %>% +CDMConnector::cohort_set() %>% +dplyr::mutate(cohort_definition_id_new = target_cohort_id) %>% +arrange(cohort_definition_id_new) %>% +dplyr::mutate(cohort_definition_id_new = dplyr::row_number()) +cdm[[name]] %>% +CDMConnector::cohort_attrition() +cdm[[name]] %>% +CDMConnector::cohort_attrition() %>% +inner_join( +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new") +) +new_cohort_set <- cdm[[name]] %>% +CDMConnector::cohort_set() %>% +dplyr::mutate(cohort_definition_id_new = target_cohort_id) %>% +arrange(cohort_definition_id_new) %>% +dplyr::mutate(cohort_definition_id_new = dplyr::row_number()) +cdm[[name]] %>% +CDMConnector::cohort_attrition() %>% +inner_join( +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new") +) +CDMConnector::cohort_count() +CDMConnector::cohort_count(cdm$matched_cohort_nosequant) +cdm[[name]] %>% +CDMConnector::cohort_count() %>% +inner_join( +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new") +) %>% +dplyr::select(-"cohort_definition_id") %>% +dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") +cdm[[name]] %>% +CDMConnector::cohort_count() %>% +inner_join( +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new") +) %>% +dplyr::select(-"cohort_definition_id") %>% +dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>% +dplyr::relocate(cohort_definition_id) +cdm[[name]] +cdm[[name]] %>% +inner_join( +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new") +) %>% +dplyr::select(-"cohort_definition_id") %>% +dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>% +dplyr::relocate(cohort_definition_id) +cdm[[name]] %>% +inner_join( +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new") +) +new_cohort_set +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new") +cdm[[name]] %>% +inner_join( +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new"), +by = "cohort_definition_id" +) %>% +dplyr::select(-"cohort_definition_id") %>% +dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>% +dplyr::relocate(cohort_definition_id) +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new") +cdm[[name]] +cdm[[name]] %>% +inner_join( +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new"), +by = "cohort_definition_id", +copy = TRUE +) %>% +dplyr::select(-"cohort_definition_id") %>% +dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>% +dplyr::relocate(cohort_definition_id) +cdm[[name]] <- cdm[[name]] %>% +inner_join( +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new"), +by = "cohort_definition_id", +copy = TRUE +) %>% +dplyr::select(-"cohort_definition_id") %>% +dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>% +dplyr::relocate(cohort_definition_id) +new_cohort_set +cdm[[name]] <- CDMConnector::new_generated_cohort_set( +cohort_ref = new_cohort, +cohort_attrition_ref = new_cohort_attrition , +cohort_set_ref = new_cohort_set, +cohort_count_ref = new_cohort_count, +overwrite = TRUE) +new_cohort_set <- cdm[[name]] %>% +CDMConnector::cohort_set() %>% +dplyr::mutate(cohort_definition_id_new = target_cohort_id) %>% +arrange(cohort_definition_id_new) %>% +dplyr::mutate(cohort_definition_id_new = dplyr::row_number()) +new_cohort_attrition <- cdm[[name]] %>% +CDMConnector::cohort_attrition() %>% +inner_join( +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new"), +by = "cohort_definition_id" +) %>% +dplyr::select(-"cohort_definition_id") %>% +dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>% +dplyr::relocate(cohort_definition_id) +new_cohort_count <- cdm[[name]] %>% +CDMConnector::cohort_count() %>% +inner_join( +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new"), +by = "cohort_definition_id" +) %>% +dplyr::select(-"cohort_definition_id") %>% +dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>% +dplyr::relocate(cohort_definition_id) +new_cohort <- cdm[[name]] %>% +inner_join( +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new"), +by = "cohort_definition_id", +copy = TRUE +) %>% +dplyr::select(-"cohort_definition_id") %>% +dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>% +dplyr::relocate(cohort_definition_id) +new_cohort_set <- new_cohort_set %>% +dplyr::select(-"cohort_definition_id") %>% +dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>% +dplyr::relocate(cohort_definition_id) +cdm[[name]] <- CDMConnector::new_generated_cohort_set( +cohort_ref = new_cohort, +cohort_attrition_ref = new_cohort_attrition , +cohort_set_ref = new_cohort_set, +cohort_count_ref = new_cohort_count, +overwrite = TRUE) +new_cohort_set <- cdm[[name]] %>% +CDMConnector::cohort_set() %>% +dplyr::mutate(cohort_definition_id_new = target_cohort_id) %>% +arrange(cohort_definition_id_new) %>% +dplyr::mutate(cohort_definition_id_new = dplyr::row_number()) +new_cohort_attrition <- cdm[[name]] %>% +CDMConnector::cohort_attrition() %>% +inner_join( +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new"), +by = "cohort_definition_id" +) %>% +dplyr::select(-"cohort_definition_id") %>% +dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>% +dplyr::relocate(cohort_definition_id) +new_cohort_count <- cdm[[name]] %>% +CDMConnector::cohort_count() %>% +inner_join( +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new"), +by = "cohort_definition_id" +) %>% +dplyr::select(-"cohort_definition_id") %>% +dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>% +dplyr::relocate(cohort_definition_id) +new_cohort <- cdm[[name]] %>% +inner_join( +new_cohort_set %>% dplyr::select("cohort_definition_id","cohort_definition_id_new"), +by = "cohort_definition_id", +copy = TRUE +) %>% +dplyr::select(-"cohort_definition_id") %>% +dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>% +dplyr::relocate(cohort_definition_id) %>% +CDMConnector::compute_query() %>% +CDMConnector::compute_query(name = name, temporary = FALSE, schema = attr(cdm, "write_schema"), overwrite = TRUE) +new_cohort_set <- new_cohort_set %>% +dplyr::select(-"cohort_definition_id") %>% +dplyr::rename("cohort_definition_id" = "cohort_definition_id_new") %>% +dplyr::relocate(cohort_definition_id) +cdm[[name]] <- CDMConnector::new_generated_cohort_set( +cohort_ref = new_cohort, +cohort_attrition_ref = new_cohort_attrition , +cohort_set_ref = new_cohort_set, +cohort_count_ref = new_cohort_count, +overwrite = TRUE) +cdm[[name]] +CDMConnector::cohort_set(cdm[[name]]) +devtools::load_all() +cdm$cohort1 %>% +generateMatchedCohortSet +cdm$cohort1 %>% +generateMatchedCohortSet(name = "new_matched_cohort", +targetCohortName = "cohort1", +targetCohortId = c(1,2), matchSex = TRUE, matchYearOfBirth = TRUE, -ratio = 4) -a <- generateMatchedCohortSet(cdm, -name = "new_cohort", -targetCohortName = "cases", -targetCohortId = NULL, +ratio = 2) +cdm$cohort1 +cdm$cohort1 %>% +generateMatchedCohortSet(name = "new_matched_cohort", +targetCohortName = "cohort1", +targetCohortId = c(1,2), matchSex = TRUE, matchYearOfBirth = TRUE, -ratio = 4) -a -a[["new_cohort"]] -a <- generateMatchedCohortSet(cdm, -name = "new_cohort", -targetCohortName = "cases", -targetCohortId = 1, +ratio = 2) +cdm$cohort1 %>% +generateMatchedCohortSet(name = "new_matched_cohort", +targetCohortName = "cohort_1", +targetCohortId = c(1,2), matchSex = TRUE, matchYearOfBirth = TRUE, -ratio = 4) -a -a[["new_cohort"]] -a <- generateMatchedCohortSet(cdm, -name = "new_cohort", -targetCohortName = "cases", -targetCohortId = 1, +ratio = 2) +cdm <- mockDrugUtilisation(numberIndividuals = 100) +cdm$cohort1 %>% +generateMatchedCohortSet(name = "new_matched_cohort", +targetCohortName = "cohort_1", +targetCohortId = c(1,2), matchSex = TRUE, matchYearOfBirth = TRUE, -ratio = 4) -a[["new_cohort"]] -a <- generateMatchedCohortSet(cdm, -name = "new_cohort", -targetCohortName = "cases", -targetCohortId = 2, +ratio = 2) +cdm +cdm$cohort1 %>% +generateMatchedCohortSet(name = "new_matched_cohort", +targetCohortName = "cohort1", +targetCohortId = c(1,2), matchSex = TRUE, matchYearOfBirth = TRUE, -ratio = 4) -a[["new_cohort"]] -name = "new_cohort" -targetCohortName = "cases" -targetCohortId = 2 +ratio = 2) +generateMatchedCohortSet(name = "new_matched_cohort", +targetCohortName = "cohort1", +targetCohortId = c(1,2), +matchSex = TRUE, +matchYearOfBirth = TRUE, +ratio = 2) +cdm %>% generateMatchedCohortSet(name = "new_matched_cohort", +targetCohortName = "cohort1", +targetCohortId = c(1,2), +matchSex = TRUE, +matchYearOfBirth = TRUE, +ratio = 2) +cdm +name <- "new_matched_cohort" +targetCohortName <- "cohort1" +targetCohortId = c(1,2) +targetCohortId <- c(1,2) matchSex = TRUE matchYearOfBirth = TRUE -ratio = 4 +ratio = 2 +cdm +cdm %>% generateMatchedCohortSet(name = name, targetCohortName = targetCohortName, targetCohortId = targetCohortId, matchSex = matchSex, matchYearOfBirth = matchYearOfBirth, ratio = ratio, ) +cdm %>% generateMatchedCohortSet(name = name, targetCohortName = targetCohortName, targetCohortId = targetCohortId, matchSex = matchSex, matchYearOfBirth = matchYearOfBirth, ratio = ratio) # validate initial input validateInput( cdm = cdm, name = name, targetCohortName = targetCohortName, @@ -49,258 +343,122 @@ matchYearOfBirth = matchYearOfBirth, ratio = ratio ) # get the number of cohorts n <- getNumberOfCohorts(cdm, targetCohortName) -n # get target cohort id targetCohortId <- getTargetCohortId(cdm, targetCohortId, targetCohortName) -targetCohortId # Create the cohort name with cases and controls of the targetCohortId cdm <- getNewCohort(cdm, name, targetCohortName, targetCohortId, n) +# Exclude cases from controls +cdm <- excludeCases(cdm, name, targetCohortId, n) +# get matched tables +matchCols <- getMatchCols(matchSex, matchYearOfBirth) +if(!is.null(matchCols)){ +# Exclude individuals without any match +cdm <- excludeNoMatchedIndividuals(cdm, name, matchCols, n) +# Match as ratio was infinite +cdm <- infiniteMatching(cdm, name, targetCohortId) +# Delete controls that are not in observation +cdm <- checkObservationPeriod(cdm, name, targetCohortId, n) +# Check ratio +cdm <- checkRatio(cdm, name, ratio, targetCohortId, n) +# Check cohort set ref +cdm <- checkCohortSetRef(cdm, name, targetCohortName, matchSex, matchYearOfBirth, targetCohortId, n) +# Rename cohort definition ids +cdm <- renameCohortDefinitionIds(cdm) +} +# Rename cohort definition ids +cdm <- renameCohortDefinitionIds(cdm, name) +devtools::load_all() +devtools::check() +library(CohortConstructor) +library(dplyr) +library(DrugUtilisation) +cdm <- mockDrugUtilisation(numberIndividual = 1000) +library(CohortConstructor) +library(dplyr) +library(DrugUtilisation) +cdm <- mockDrugUtilisation(numberIndividual = 1000) +cdm <- generateMatchedCohortSet(cdm = cdm, +name = "matched_cohort2", +targetCohortName = "cohort1", +targetCohortId = 1, +ratio = Inf) +CDMConnector::cohort_count(cdm$matched_cohort2) +library(CohortConstructor) +library(dplyr) +library(DrugUtilisation) +cdm <- mockDrugUtilisation(numberIndividual = 1000) +library(CohortConstructor) +library(dplyr) +library(DrugUtilisation) +cdm <- mockDrugUtilisation(numberIndividual = 1000) +CDMConnector::cohort_set(cdm$cohort1) +cdm <- generateMatchedCohortSet(cdm = cdm, +name = "matched_cohort1", +targetCohortName = "cohort1", +targetCohortId = 1) +CDMConnector::cohort_set(cdm$matched_cohort1) cdm -cdm[["new_cohort"]] -cdm[["new_cohort"]] %>% print(n = 10) -cdm[["new_cohort"]] %>% print(n = 15) -cdm[["cases"]] -# Generate mock data -cdmMock <- DrugUtilisation::mockDrugUtilisation( -numberIndividuals = 10, -person = tibble::tibble("person_id" = seq(1,10,1), -"gender_concept_id" = rep(8507,10), -"year_of_birth" = rep(1980, 10), -"day_of_birth" = rep(1, 10), -"birth_date_time" = rep(as.Date(1980,04,01),10), -"month_of_birth" = rep(4, 10)), -condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1), -"person_id" = seq(1,10,1), -"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)), -"condition_start_date" = as.Date(c("2017-10-30","2012-01-04","2014-12-15","2004-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")), -"condition_end_date" = as.Date(c("2017-11-01","2012-01-05","2014-12-16","2004-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")), -"condition_type_concept_id" = rep(32020,10), -) -) -cdm <- DrugUtilisation::generateConceptCohortSet( -cdm = cdmMock, -conceptSet = list(c1 = 317009, c2 = 4266367), -name = "cases", -end = "observation_period_end_date", -requiredObservation = c(0,0), -overwrite = TRUE -) -cdm[["cases"]] -# Generate mock data -cdmMock <- DrugUtilisation::mockDrugUtilisation( -numberIndividuals = 10, -person = tibble::tibble("person_id" = seq(1,10,1), -"gender_concept_id" = rep(8532,10), -"year_of_birth" = rep(1980, 10), -"day_of_birth" = rep(1, 10), -"birth_date_time" = rep(as.Date(1980,04,01),10), -"month_of_birth" = rep(4, 10)), -condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1), -"person_id" = seq(1,10,1), -"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)), -"condition_start_date" = as.Date(c("2017-10-30","2012-01-04","2014-12-15","2004-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")), -"condition_end_date" = as.Date(c("2017-11-01","2012-01-05","2014-12-16","2004-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")), -"condition_type_concept_id" = rep(32020,10), -) -) -cdmMock -cdmMock[["condition_occurrence"]] -# Generate mock data -cdmMock <- DrugUtilisation::mockDrugUtilisation( -numberIndividuals = 10, -person = tibble::tibble("person_id" = seq(1,10,1), -"gender_concept_id" = rep(8532,10), -"year_of_birth" = rep(1980, 10), -"day_of_birth" = rep(1, 10), -"birth_date_time" = rep(as.Date(1980,04,01),10), -"month_of_birth" = rep(4, 10)), -condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1), -"person_id" = seq(1,10,1), -"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)), -"condition_start_date" = as.Date(c("2017-10-30","2012-01-04","2014-12-15","2004-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")), -"condition_end_date" = as.Date(c("2017-11-01","2012-01-05","2014-12-16","2004-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")), -"condition_type_concept_id" = rep(32020,10), -) -) -cdm <- DrugUtilisation::generateConceptCohortSet( -cdm = cdmMock, -conceptSet = list(c1 = 317009, c2 = 4266367), -name = "cases", -end = "observation_period_end_date", -requiredObservation = c(0,0), -overwrite = TRUE -) -cdm[["cases"]] -cdm[["observation_period"]] -# Generate mock data -cdmMock <- DrugUtilisation::mockDrugUtilisation( -numberIndividuals = 10, -person = tibble::tibble("person_id" = seq(1,10,1), -"gender_concept_id" = rep(8532,10), -"year_of_birth" = rep(1980, 10), -"day_of_birth" = rep(1, 10), -"birth_date_time" = rep(as.Date(1980,04,01),10), -"month_of_birth" = rep(4, 10)), -condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1), -"person_id" = seq(1,10,1), -"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)), -"condition_start_date" = as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")), -"condition_end_date" = as.Date(c("2017-11-01","2003-01-05","2014-12-16","2010-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")), -"condition_type_concept_id" = rep(32020,10), -) -) +devtools::load_all() +devtools::load_all() +devtools::check() +library(dplyr) +devtools::load_all() +devtools::check() +devtools::load_all() +devtools::check() +# Create cdm object cdm <- DrugUtilisation::generateConceptCohortSet( -cdm = cdmMock, -conceptSet = list(c1 = 317009, c2 = 4266367), +cdm = DrugUtilisation::mockDrugUtilisation(numberIndividuals = 200), +conceptSet = list(asthma = 317009), name = "cases", end = "observation_period_end_date", -requiredObservation = c(0,0), -overwrite = TRUE -) -cdm[["cases"]] +requiredObservation = c(180, 180), +overwrite = TRUE) generateMatchedCohortSet(cdm, name = "new_cohort", targetCohortName = "cases", -targetCohortId = 2, -matchSex = TRUE, -matchYearOfBirth = TRUE, -ratio = 4) -a <- generateMatchedCohortSet(cdm, +ratio = 2) +expect_no_error(a <- generateMatchedCohortSet(cdm, name = "new_cohort", targetCohortName = "cases", -targetCohortId = 2, -matchSex = TRUE, -matchYearOfBirth = TRUE, -ratio = 4) -cdm[["cases"]] -a[["new_cohort"]] -cdm[["observation_period"]] -# Generate mock data -cdmMock <- DrugUtilisation::mockDrugUtilisation( -numberIndividuals = 10, -person = tibble::tibble("person_id" = seq(1,10,1), -"gender_concept_id" = rep(8532,10), -"year_of_birth" = rep(1980, 10), -"day_of_birth" = rep(1, 10), -"birth_date_time" = rep(as.Date(1980,04,01),10), -"month_of_birth" = rep(4, 10)), -condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1), -"person_id" = seq(1,10,1), -"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)), -"condition_start_date" = as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")), -"condition_end_date" = as.Date(c("2017-11-01","2003-01-05","2014-12-16","2010-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")), -"condition_type_concept_id" = rep(32020,10)), -observation_period = tibble::tibble("observation_period_id" = seq(1,10,1), -"person_id" = seq(1,10,1), -"observation_period_start_date" = rep(as.Date(1950-01-01),10), -"observation_period_end_date" = rep(as.Date(2023-12-05),10), -"period_type_concept_id" = 44814724) -) +ratio = 2)) cdm <- DrugUtilisation::generateConceptCohortSet( -cdm = cdmMock, -conceptSet = list(c1 = 317009, c2 = 4266367), +cdm = DrugUtilisation::mockDrugUtilisation(numberIndividuals = 200), +conceptSet = list(asthma = 317009, other = 4141052, other1 = 432526), name = "cases", end = "observation_period_end_date", -requiredObservation = c(0,0), -overwrite = TRUE -) -a <- generateMatchedCohortSet(cdm, +requiredObservation = c(10,10), +overwrite = TRUE) +cdm +expect_no_error(generateMatchedCohortSet(cdm, +name = "new_cohort", +targetCohortName = "cases")) +expect_no_error(generateMatchedCohortSet(cdm, name = "new_cohort", targetCohortName = "cases", -targetCohortId = 2, -matchSex = TRUE, -matchYearOfBirth = TRUE, -ratio = 4) -a -# Generate mock data -cdmMock <- DrugUtilisation::mockDrugUtilisation( -numberIndividuals = 10, -person = tibble::tibble("person_id" = seq(1,10,1), -"gender_concept_id" = rep(8532,10), -"year_of_birth" = rep(1980, 10), -"day_of_birth" = rep(1, 10), -"birth_date_time" = rep(as.Date(1980,04,01),10), -"month_of_birth" = rep(4, 10)), -condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1), -"person_id" = seq(1,10,1), -"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)), -"condition_start_date" = as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")), -"condition_end_date" = as.Date(c("2017-11-01","2003-01-05","2014-12-16","2010-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")), -"condition_type_concept_id" = rep(32020,10)), -observation_period = tibble::tibble("observation_period_id" = seq(1,10,1), -"person_id" = seq(1,10,1), -"observation_period_start_date" = rep(as.Date(1950-01-01),10), -"observation_period_end_date" = rep(as.Date(2023-12-05),10), -"period_type_concept_id" = 44814724) -) -cdm <- DrugUtilisation::generateConceptCohortSet( -cdm = cdmMock, -conceptSet = list(c1 = 317009, c2 = 4266367), -name = "cases", -end = "observation_period_end_date", -requiredObservation = c(0,0), -overwrite = TRUE -) -a <- generateMatchedCohortSet(cdm, +ratio = 3)) +expect_no_error(generateMatchedCohortSet(cdm, name = "new_cohort", targetCohortName = "cases", -targetCohortId = 2, -matchSex = TRUE, +ratio = Inf)) +expect_no_error(generateMatchedCohortSet(cdm, +name = "new_cohort", +matchSex = FALSE, matchYearOfBirth = TRUE, -ratio = 4) -cdm[["cases"]] -# Generate mock data -cdmMock <- DrugUtilisation::mockDrugUtilisation( -numberIndividuals = 10, -person = tibble::tibble("person_id" = seq(1,10,1), -"gender_concept_id" = rep(8532,10), -"year_of_birth" = rep(1980, 10), -"day_of_birth" = rep(1, 10), -"birth_date_time" = rep(as.Date(1980,04,01),10), -"month_of_birth" = rep(4, 10)), -condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1), -"person_id" = seq(1,10,1), -"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)), -"condition_start_date" = as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")), -"condition_end_date" = as.Date(c("2017-11-01","2003-01-05","2014-12-16","2010-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")), -"condition_type_concept_id" = rep(32020,10)), -observation_period = tibble::tibble("observation_period_id" = seq(1,10,1), -"person_id" = seq(1,10,1), -"observation_period_start_date" = rep(as.Date(1950-01-01),10), -"observation_period_end_date" = rep(as.Date(2021-12-05),10), -"period_type_concept_id" = 44814724) -) -cdm <- DrugUtilisation::generateConceptCohortSet( -cdm = cdmMock, -conceptSet = list(c1 = 317009, c2 = 4266367), -name = "cases", -end = "observation_period_end_date", -requiredObservation = c(0,0), -overwrite = TRUE -) -cdm[["cases"]] -# Generate mock data -cdmMock <- DrugUtilisation::mockDrugUtilisation( -numberIndividuals = 10, -person = tibble::tibble("person_id" = seq(1,10,1), -"gender_concept_id" = rep(8532,10), -"year_of_birth" = rep(1980, 10), -"day_of_birth" = rep(1, 10), -"birth_date_time" = rep(as.Date(1980,04,01),10), -"month_of_birth" = rep(4, 10)), -condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1), -"person_id" = seq(1,10,1), -"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)), -"condition_start_date" = as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")), -"condition_end_date" = as.Date(c("2017-11-01","2003-01-05","2014-12-16","2010-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")), -"condition_type_concept_id" = rep(32020,10)), -observation_period = tibble::tibble("observation_period_id" = seq(1,10,1), -"person_id" = seq(1,10,1), -"observation_period_start_date" = rep(as.Date(1984-01-01),10), -"observation_period_end_date" = rep(as.Date(2021-12-05),10), -"period_type_concept_id" = 44814724) -) -rep(as.Date(1984-01-01),10) +targetCohortName = "cases")) +expect_no_error(generateMatchedCohortSet(cdm, +name = "new_cohort", +matchSex = TRUE, +matchYearOfBirth = FALSE, +targetCohortName = "cases")) +expect_no_error(b <- generateMatchedCohortSet(cdm, +name = "new_cohort", +matchSex = FALSE, +matchYearOfBirth = FALSE, +targetCohortName = "cases")) +help(arrange) +devtools::load_all() +devtools::check() # Generate mock data cdmMock <- DrugUtilisation::mockDrugUtilisation( numberIndividuals = 10, @@ -308,7 +466,7 @@ person = tibble::tibble("person_id" = seq(1,10,1), "gender_concept_id" = rep(8532,10), "year_of_birth" = rep(1980, 10), "day_of_birth" = rep(1, 10), -"birth_date_time" = rep(as.Date(1980,04,01),10), +"birth_date_time" = as.Date(rep("1980-04-01",10)), "month_of_birth" = rep(4, 10)), condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1), "person_id" = seq(1,10,1), @@ -330,15 +488,6 @@ end = "observation_period_end_date", requiredObservation = c(0,0), overwrite = TRUE ) -cdm[["cases"]] -a <- generateMatchedCohortSet(cdm, -name = "new_cohort", -targetCohortName = "cases", -targetCohortId = 2, -matchSex = TRUE, -matchYearOfBirth = TRUE, -ratio = 4) -a[["new_cohort"]] a <- generateMatchedCohortSet(cdm, name = "new_cohort", targetCohortName = "cases", @@ -346,167 +495,18 @@ targetCohortId = NULL, matchSex = TRUE, matchYearOfBirth = TRUE, ratio = 4) -a[["cases"]] -a[["new_cohort"]] -a[["new_cohort"]] %>% print(n = 20) -a[["new_cohort"]] %>% -dplyr::filter(cohort_definition_id %in% c(1,3)) %>% -dplyr::summarise(subject_unique) -a[["new_cohort"]] %>% -dplyr::filter(cohort_definition_id %in% c(1,3)) %>% -dplyr::summarise(subject_id) -a[["new_cohort"]] %>% -dplyr::filter(cohort_definition_id %in% c(1,3)) %>% -dplyr::summarise(subject_id) %>% unique() +a$new_cohort a[["new_cohort"]] %>% dplyr::filter(cohort_definition_id %in% c(1,3)) %>% dplyr::summarise(subject_id) %>% -distinct() -a[["new_cohort"]] %>% -dplyr::filter(cohort_definition_id %in% c(1,3)) %>% -dplyr::summarise(subject_id) %>% -dplyr::distinct() -a[["new_cohort"]] %>% -dplyr::filter(cohort_definition_id %in% c(1,3)) %>% -dplyr::summarise(subject_id) %>% -dplyr::distinct() %>% dplyr::pull() -a[["new_cohort"]] %>% -dplyr::filter(cohort_definition_id %in% c(1,3)) %>% -dplyr::summarise(subject_id) %>% -dplyr::distinct() %>% dplyr::pull() %>% length() == 10 -expect_true(a[["new_cohort"]] %>% -dplyr::filter(cohort_definition_id %in% c(2,4)) %>% -dplyr::summarise(subject_id) %>% dplyr::distinct() %>% dplyr::pull() %>% length() == 10) +a[["new_cohort"]] a[["new_cohort"]] %>% -dplyr::filter(cohort_definition_id %in% c(2,4)) %>% -dplyr::summarise(subject_id) %>% -dplyr::distinct() %>% dplyr::pull() %>% length() -a[["new_cohort"]] %>% -dplyr::filter(cohort_definition_id %in% c(2,4)) +dplyr::filter(cohort_definition_id %in% c(1,3)) a[["new_cohort"]] %>% -dplyr::filter(cohort_definition_id %in% c(2,4)) %>% -dplyr::summarise(cohort_start_date) %>% -dplyr::distinct() %>% dplyr::pull() %>% length() == 2 -a[["new_cohort"]] %>% dplyr::filter(subject_id == 5) -a[["new_cohort"]] %>% dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% dplyr::arrange() %>% dplyr::pull() -a[["new_cohort"]] %>% dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% dplyr::arrange(desc()) %>% dplyr::pull() -a[["new_cohort"]] %>% dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) -a[["new_cohort"]] %>% dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% dplyr::pull() -a[["new_cohort"]] %>% dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% dplyr::arrange() -as.Date(c("2017-01-01","2019-01-01","2015-01-01")) -arrange(as.Date(c("2017-01-01","2019-01-01","2015-01-01"))) -dplyr::arrange(as.Date(c("2017-01-01","2019-01-01","2015-01-01"))) -order(as.Date(c("2017-01-01","2019-01-01","2015-01-01"))) -as.Date(c("2017-01-01","2019-01-01","2015-01-01")) -as.Date(c("2017-01-01","2019-01-01","2015-01-01")) %>% order() -as.Date(c("2017-01-01","2019-01-01","2015-01-01")) %>% mdy -as.Date(c("2017-01-01","2019-01-01","2015-01-01")) %>% lubridate::mdy() -a[["new_cohort"]] %>% dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% dplyr::pull() -a[["new_cohort"]] %>% dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% dplyr::pull() %in% c("2017-10-30","2003-01-04","2014-12-15","2010-09-09") -expect_true(a[["new_cohort"]] %>% -dplyr::filter(subject_id %in% c(seq(5,10,1)) %>% dplyr::summarise(cohort_start_date) %>% dplyr::pull() %in% c("2017-10-30","2003-01-04","2014-12-15","2010-09-09")) -}) -expect_true(a[["new_cohort"]] %>% -dplyr::filter(subject_id %in% c(seq(5,10,1))) %>% dplyr::summarise(cohort_start_date) %>% dplyr::pull() %in% c("2017-10-30","2003-01-04","2014-12-15","2010-09-09")) -expect_true(a[["new_cohort"]] %>% -dplyr::filter(subject_id %in% seq(5,10,1)) %>% dplyr::summarise(cohort_start_date) %>% dplyr::pull() %in% c("2017-10-30","2003-01-04","2014-12-15","2010-09-09")) -expect_true(a[["new_cohort"]] %>% -dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% dplyr::pull() %in% c("2017-10-30","2003-01-04","2014-12-15","2010-09-09")) -outc %>% dplyr::distinct() -outc <- a[["new_cohort"]] %>% -dplyr::filter(subject_id == 5) %>% dplyr::summarise(cohort_start_date) %>% dplyr::pull() %in% c("2017-10-30","2003-01-04","2014-12-15","2010-09-09") -outc %>% dplyr::distinct() -outc == c(TRUE,TRUE) -dplyr::distinct(outc) -unique(outc) -expect_true(unique(outc) == TRUE) -devtools::load_all(".") -devtools::check() -usethis::use_package(tibble,"Suggests") -usethis::use_package(tibble,"Suggests") -usethis::use_package("tibble","Suggests") -devtools::check() -devtools::load_all(".") -devtools::check() -devtools::load_all(".") +dplyr::filter(cohort_definition_id %in% c(1,3)) %>% +dplyr::summarise(subject_id) +devtools::load_all() devtools::check() -install.packages("DrugUtilisation") -install.packages("DrugUtilisation") -devtools::load_all(".") +devtools::load_all() devtools::check() -# Generate mock data -cdmMock <- DrugUtilisation::mockDrugUtilisation( -numberIndividuals = 10, -person = tibble::tibble("person_id" = seq(1,10,1), -"gender_concept_id" = rep(8532,10), -"year_of_birth" = rep(1980, 10), -"day_of_birth" = rep(1, 10), -"birth_date_time" = rep(as.Date(1980,04,01),10), -"month_of_birth" = rep(4, 10)), -condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1), -"person_id" = seq(1,10,1), -"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)), -"condition_start_date" = as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")), -"condition_end_date" = as.Date(c("2017-11-01","2003-01-05","2014-12-16","2010-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")), -"condition_type_concept_id" = rep(32020,10)), -observation_period = tibble::tibble("observation_period_id" = seq(1,10,1), -"person_id" = seq(1,10,1), -"observation_period_start_date" = as.Date(rep("1984-01-01",10)), -"observation_period_end_date" = as.Date(rep("2021-01-01",10)), -"period_type_concept_id" = 44814724) -) -help(as.Date) -help(rep) -help(seq) -cdm <- DrugUtilisation::generateConceptCohortSet( -cdm = cdmMock, -conceptSet = list(c1 = 317009, c2 = 4266367), -name = "cases", -end = "observation_period_end_date", -requiredObservation = c(0,0), -overwrite = TRUE -) -generateMatchedCohortSet(cdm, -name = "new_cohort", -targetCohortName = "cases", -targetCohortId = NULL, -matchSex = TRUE, -matchYearOfBirth = TRUE, -ratio = 4) -a <- generateMatchedCohortSet(cdm, -name = "new_cohort", -targetCohortName = "cases", -targetCohortId = NULL, -matchSex = TRUE, -matchYearOfBirth = TRUE, -ratio = 4) -rep(as.Date(1980,04,01),10) -# Generate mock data -cdmMock <- DrugUtilisation::mockDrugUtilisation( -numberIndividuals = 10, -person = tibble::tibble("person_id" = seq(1,10,1), -"gender_concept_id" = rep(8532,10), -"year_of_birth" = rep(1980, 10), -"day_of_birth" = rep(1, 10), -"birth_date_time" = rep(as.Date("1980,04,01"),10), -"month_of_birth" = rep(4, 10)), -condition_occurrence = tibble::tibble("condition_ocurrence_id" = seq(1,10,1), -"person_id" = seq(1,10,1), -"condition_concept_id" = c(317009,317009,4266367,4266367,rep(1,6)), -"condition_start_date" = as.Date(c("2017-10-30","2003-01-04","2014-12-15","2010-09-09","2004-08-26","1985-03-31","1985-03-13","1985-07-11","1983-11-07","2020-01-13")), -"condition_end_date" = as.Date(c("2017-11-01","2003-01-05","2014-12-16","2010-09-10","2004-08-27","1985-04-01","1985-03-14","1985-07-12","1983-11-08","2020-01-14")), -"condition_type_concept_id" = rep(32020,10)), -observation_period = tibble::tibble("observation_period_id" = seq(1,10,1), -"person_id" = seq(1,10,1), -"observation_period_start_date" = as.Date(rep("1984-01-01",10)), -"observation_period_end_date" = as.Date(rep("2021-01-01",10)), -"period_type_concept_id" = 44814724) -) -as.Date(rep("1980,04,01",10)) -as.Date(rep("1980,04,01",10)) -as.Date(rep("1980-04-01",10)) -devtools::load_all(".") -devtools::check() -library(CohortConstructor) -library(CohortConstructor) diff --git a/R/generateMatchedCohortSet.R b/R/generateMatchedCohortSet.R index ad7d768a..32adde19 100644 --- a/R/generateMatchedCohortSet.R +++ b/R/generateMatchedCohortSet.R @@ -192,7 +192,7 @@ getNewCohort <- function(cdm, name, targetCohortName, targetCohortId, n){ temporary = FALSE, name = name, overwrite = TRUE), - cohort_attrition_ref = cdm[[targetCohortName]] %>% CDMConnector::cohort_attrition(), + cohort_attrition_ref = cdm[[targetCohortName]] %>% CDMConnector::cohort_attrition() %>% dplyr::as_tibble(), cohort_set_ref = cdm[[targetCohortName]] %>% CDMConnector::cohort_set(), overwrite = TRUE) }else{ @@ -236,7 +236,7 @@ getNewCohort <- function(cdm, name, targetCohortName, targetCohortId, n){ cdm[[name]] <- CDMConnector::new_generated_cohort_set( cohort_ref = all, - cohort_attrition_ref = cohort_attrition, + cohort_attrition_ref = cohort_attrition %>% dplyr::as_tibble(), cohort_set_ref = cohort_set_ref, overwrite = TRUE) } @@ -450,7 +450,7 @@ checkCohortSetRef <- function(cdm, name, targetCohortName, matchSex, matchYearOf cdm[[name]] <- CDMConnector::new_generated_cohort_set( cohort_ref = cdm[[name]], - cohort_attrition_ref = cdm[[name]] %>% CDMConnector::cohort_attrition(), + cohort_attrition_ref = cdm[[name]] %>% CDMConnector::cohort_attrition() %>% dplyr::as_tibble(), cohort_set_ref = cohort_set_ref, overwrite = TRUE) @@ -503,7 +503,7 @@ renameCohortDefinitionIds <- function(cdm, name){ cdm[[name]] <- CDMConnector::new_generated_cohort_set( cohort_ref = new_cohort, - cohort_attrition_ref = new_cohort_attrition , + cohort_attrition_ref = new_cohort_attrition %>% dplyr::as_tibble(), cohort_set_ref = new_cohort_set, cohort_count_ref = new_cohort_count, overwrite = TRUE)