diff --git a/DESCRIPTION b/DESCRIPTION index 7790036..9ac4a24 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: Characterization Type: Package Title: Characterizations of Cohorts -Version: 2.0.0 -Date: 2024-08-07 +Version: 2.0.1 +Date: 2024-08-21 Authors@R: c( person("Jenna", "Reps", , "reps@ohdsi.org", role = c("aut", "cre")), person("Patrick", "Ryan", , "ryan@ohdsi.org", role = c("aut")), diff --git a/NEWS.md b/NEWS.md index 076b428..1f3e7d0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +Characterization 2.0.1 +====================== +- edited cohort_type in results to varchar(12) +- fixed setting id being messed up by readr loading + Characterization 2.0.0 ====================== - added tests for all HADES supported dbms diff --git a/R/RunCharacterization.R b/R/RunCharacterization.R index 4d8a0fa..7951510 100644 --- a/R/RunCharacterization.R +++ b/R/RunCharacterization.R @@ -406,6 +406,14 @@ aggregateCsvs <- function( "rechallenge_fail_case_series.csv", "dechallenge_rechallenge.csv" ) + colTypes <- c( + 'ciicc','ciiiicciiccc', 'didciiccd', + 'didddddddddciicc', 'dciicicc', + 'icciicccc', 'iiciicciicddddd', + '????????', + '?????????????????', '????????????????????' + ) + # this makes sure results are recreated firstTracker <- data.frame( table = tables, @@ -430,7 +438,8 @@ aggregateCsvs <- function( # TODO do this in batches data <- readr::read_csv( file = loadPath, - show_col_types = F + show_col_types = F, + col_types = colTypes[csvType == tables] ) if (csvType == "analysis_ref.csv") { diff --git a/inst/settings/resultsDataModelSpecification.csv b/inst/settings/resultsDataModelSpecification.csv index 907c67d..3dc0e59 100644 --- a/inst/settings/resultsDataModelSpecification.csv +++ b/inst/settings/resultsDataModelSpecification.csv @@ -45,7 +45,7 @@ dechallenge_rechallenge,pct_rechallenge_attempt,float,Yes,No,No,No,The percentag dechallenge_rechallenge,pct_rechallenge_success,float,Yes,No,No,No,The percentage of rechallenge success dechallenge_rechallenge,pct_rechallenge_fail,float,Yes,No,No,No,The percentage of rechallenge fails analysis_ref,database_id,varchar(100),Yes,Yes,No,No,The database identifier -analysis_ref,setting_id,float,Yes,Yes,No,No,The run identifier +analysis_ref,setting_id,varchar(30),Yes,Yes,No,No,The run identifier analysis_ref,analysis_id,int,Yes,Yes,No,No,The analysis identifier analysis_ref,analysis_name,varchar,Yes,No,No,No,The analysis name analysis_ref,domain_id,varchar,Yes,No,No,No,The domain id @@ -54,7 +54,7 @@ analysis_ref,end_day,int,Yes,No,No,No,The end day analysis_ref,is_binary,varchar(1),Yes,No,No,No,Is this a binary analysis analysis_ref,missing_means_zero,varchar(1),Yes,No,No,No,Missing means zero covariate_ref,database_id,varchar(100),Yes,Yes,No,No,The database identifier -covariate_ref,setting_id,float,Yes,Yes,No,No,The run identifier +covariate_ref,setting_id,varchar(30),Yes,Yes,No,No,The run identifier covariate_ref,covariate_id,bigint,Yes,Yes,No,No,The covariate identifier covariate_ref,covariate_name,varchar,Yes,No,No,No,The covariate name covariate_ref,analysis_id,int,Yes,No,No,No,The analysis identifier @@ -62,8 +62,8 @@ covariate_ref,concept_id,bigint,Yes,No,No,No,The concept identifier covariate_ref,value_as_concept_id,int,N,N,N,N,The value as concept_id for features created from observation or measurement values covariate_ref,collisions,int,N,N,N,N,The number of collisions found for the covariate_id covariates,database_id,varchar(100),Yes,Yes,No,No,The database identifier -covariates,setting_id,float,Yes,Yes,No,No,The run identifier -covariates,cohort_type,varchar(10),Yes,Yes,No,No,The cohort type +covariates,setting_id,varchar(30),Yes,Yes,No,No,The run identifier +covariates,cohort_type,varchar(12),Yes,Yes,No,No,The cohort type covariates,target_cohort_id,int,Yes,Yes,No,No,The target cohort id covariates,outcome_cohort_id,int,Yes,Yes,No,No,The outcome cohort id covariates,min_characterization_mean,float,No,Yes,No,No,Minimum fraction for feature extraction @@ -71,8 +71,8 @@ covariates,covariate_id,bigint,Yes,Yes,No,No,The covaraite id covariates,sum_value,int,Yes,No,No,No,The sum value covariates,average_value,float,No,No,No,No,The average value covariates_continuous,database_id,varchar(100),Yes,Yes,No,No,The database identifier -covariates_continuous,setting_id,float,Yes,Yes,No,No,The run identifier -covariates_continuous,cohort_type,varchar(10),Yes,Yes,No,No,The cohort type +covariates_continuous,setting_id,varchar(30),Yes,Yes,No,No,The run identifier +covariates_continuous,cohort_type,varchar(12),Yes,Yes,No,No,The cohort type covariates_continuous,target_cohort_id,int,Yes,Yes,No,No,The target cohort id covariates_continuous,outcome_cohort_id,int,Yes,Yes,No,No,The outcome cohort id covariates_continuous,covariate_id,bigint,Yes,Yes,No,No,The covariate identifier @@ -86,7 +86,7 @@ covariates_continuous,p_10_value,float,No,No,No,No,The 10th percentile covariates_continuous,p_25_value,float,No,No,No,No,The 25th percentile covariates_continuous,p_75_value,float,No,No,No,No,The 75th percentile covariates_continuous,p_90_value,float,No,No,No,No,The 90th percentile -settings,setting_id,float,Yes,Yes,No,No,The run identifier +settings,setting_id,varchar(30),Yes,Yes,No,No,The run identifier settings,database_id,varchar(100),Yes,Yes,No,No,The database identifier settings,covariate_setting_json,varchar,Yes,No,No,No,The covariate settings JSON settings,case_covariate_setting_json,varchar,Yes,No,No,No,The during covariate settings JSON @@ -99,12 +99,12 @@ settings,end_anchor,varchar(15),No,No,No,No,The end anchor settings,case_pre_target_duration,int,No,No,No,No,How far to look back before index for case features settings,case_post_outcome_duration,int,No,No,No,No,How far to look forward after outcome for case features cohort_details,database_id,varchar(100),Yes,Yes,No,No,The database identifier -cohort_details,setting_id,float,Yes,Yes,No,No,The run identifier -cohort_details,cohort_type,varchar(10),Yes,Yes,No,No,The cohort type +cohort_details,setting_id,varchar(30),Yes,Yes,No,No,The run identifier +cohort_details,cohort_type,varchar(12),Yes,Yes,No,No,The cohort type cohort_details,target_cohort_id,int,Yes,Yes,No,No,The target cohort id cohort_details,outcome_cohort_id,int,Yes,Yes,No,No,The outcome cohort id cohort_counts,database_id,varchar(100),Yes,No,No,No,The database identifier -cohort_counts,cohort_type,varchar(10),Yes,No,No,No,The cohort type +cohort_counts,cohort_type,varchar(12),Yes,No,No,No,The cohort type cohort_counts,target_cohort_id,int,Yes,No,No,No,The target cohort id cohort_counts,outcome_cohort_id,int,Yes,No,No,No,The outcome cohort id cohort_counts,risk_window_start,int,No,No,No,No,The risk window start diff --git a/inst/sql/sql_server/ResultTables.sql b/inst/sql/sql_server/ResultTables.sql index d8209be..edb85e2 100644 --- a/inst/sql/sql_server/ResultTables.sql +++ b/inst/sql/sql_server/ResultTables.sql @@ -61,7 +61,7 @@ CREATE TABLE @my_schema.@table_prefixdechallenge_rechallenge ( CREATE TABLE @my_schema.@table_prefixanalysis_ref ( database_id varchar(100) NOT NULL, - setting_id float NOT NULL, + setting_id varchar(30) NOT NULL, analysis_id int NOT NULL, analysis_name varchar(max) NOT NULL, domain_id varchar(30), @@ -74,7 +74,7 @@ CREATE TABLE @my_schema.@table_prefixanalysis_ref ( CREATE TABLE @my_schema.@table_prefixcovariate_ref ( database_id varchar(100) NOT NULL, - setting_id float NOT NULL, + setting_id varchar(30) NOT NULL, covariate_id bigint NOT NULL, covariate_name varchar(max) NOT NULL, analysis_id int NOT NULL, @@ -86,8 +86,8 @@ CREATE TABLE @my_schema.@table_prefixcovariate_ref ( CREATE TABLE @my_schema.@table_prefixcovariates ( database_id varchar(100) NOT NULL, - setting_id float NOT NULL, - cohort_type varchar(10), + setting_id varchar(30) NOT NULL, + cohort_type varchar(12), target_cohort_id int, outcome_cohort_id int, min_characterization_mean float, @@ -99,8 +99,8 @@ CREATE TABLE @my_schema.@table_prefixcovariates ( CREATE TABLE @my_schema.@table_prefixcovariates_continuous ( database_id varchar(100) NOT NULL, - setting_id float NOT NULL, - cohort_type varchar(10), + setting_id varchar(30) NOT NULL, + cohort_type varchar(12), target_cohort_id int, outcome_cohort_id int, covariate_id bigint NOT NULL, @@ -119,7 +119,7 @@ CREATE TABLE @my_schema.@table_prefixcovariates_continuous ( -- covariateSettings CREATE TABLE @my_schema.@table_prefixsettings ( - setting_id float NOT NULL, + setting_id varchar(30) NOT NULL, database_id varchar(100) NOT NULL, covariate_setting_json varchar(MAX), case_covariate_setting_json varchar(MAX), @@ -137,8 +137,8 @@ CREATE TABLE @my_schema.@table_prefixsettings ( -- added this table CREATE TABLE @my_schema.@table_prefixcohort_details ( database_id varchar(100) NOT NULL, - setting_id float NOT NULL, - cohort_type varchar(10), + setting_id varchar(30) NOT NULL, + cohort_type varchar(12), target_cohort_id int, outcome_cohort_id int, PRIMARY KEY (setting_id, database_id,target_cohort_id,outcome_cohort_id,cohort_type) @@ -146,7 +146,7 @@ CREATE TABLE @my_schema.@table_prefixcohort_details ( CREATE TABLE @my_schema.@table_prefixcohort_counts( database_id varchar(100) NOT NULL, - cohort_type varchar(10), + cohort_type varchar(12), target_cohort_id int, outcome_cohort_id int, risk_window_start int,