From 67c253d37684029a1615bb329838148fdb9ee3f6 Mon Sep 17 00:00:00 2001 From: jreps Date: Wed, 21 Aug 2024 13:44:21 -0400 Subject: [PATCH] settings id fix - updating the settings id to a varchar in results - manually specifying csv type so readr does not convert and mess up settings id --- NEWS.md | 1 + R/RunCharacterization.R | 11 ++++++++++- inst/settings/resultsDataModelSpecification.csv | 12 ++++++------ inst/sql/sql_server/ResultTables.sql | 12 ++++++------ 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/NEWS.md b/NEWS.md index 7d78590..1f3e7d0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ Characterization 2.0.1 ====================== - edited cohort_type in results to varchar(12) +- fixed setting id being messed up by readr loading Characterization 2.0.0 ====================== diff --git a/R/RunCharacterization.R b/R/RunCharacterization.R index 4d8a0fa..7951510 100644 --- a/R/RunCharacterization.R +++ b/R/RunCharacterization.R @@ -406,6 +406,14 @@ aggregateCsvs <- function( "rechallenge_fail_case_series.csv", "dechallenge_rechallenge.csv" ) + colTypes <- c( + 'ciicc','ciiiicciiccc', 'didciiccd', + 'didddddddddciicc', 'dciicicc', + 'icciicccc', 'iiciicciicddddd', + '????????', + '?????????????????', '????????????????????' + ) + # this makes sure results are recreated firstTracker <- data.frame( table = tables, @@ -430,7 +438,8 @@ aggregateCsvs <- function( # TODO do this in batches data <- readr::read_csv( file = loadPath, - show_col_types = F + show_col_types = F, + col_types = colTypes[csvType == tables] ) if (csvType == "analysis_ref.csv") { diff --git a/inst/settings/resultsDataModelSpecification.csv b/inst/settings/resultsDataModelSpecification.csv index 457830f..3dc0e59 100644 --- a/inst/settings/resultsDataModelSpecification.csv +++ b/inst/settings/resultsDataModelSpecification.csv @@ -45,7 +45,7 @@ dechallenge_rechallenge,pct_rechallenge_attempt,float,Yes,No,No,No,The percentag dechallenge_rechallenge,pct_rechallenge_success,float,Yes,No,No,No,The percentage of rechallenge success dechallenge_rechallenge,pct_rechallenge_fail,float,Yes,No,No,No,The percentage of rechallenge fails analysis_ref,database_id,varchar(100),Yes,Yes,No,No,The database identifier -analysis_ref,setting_id,float,Yes,Yes,No,No,The run identifier +analysis_ref,setting_id,varchar(30),Yes,Yes,No,No,The run identifier analysis_ref,analysis_id,int,Yes,Yes,No,No,The analysis identifier analysis_ref,analysis_name,varchar,Yes,No,No,No,The analysis name analysis_ref,domain_id,varchar,Yes,No,No,No,The domain id @@ -54,7 +54,7 @@ analysis_ref,end_day,int,Yes,No,No,No,The end day analysis_ref,is_binary,varchar(1),Yes,No,No,No,Is this a binary analysis analysis_ref,missing_means_zero,varchar(1),Yes,No,No,No,Missing means zero covariate_ref,database_id,varchar(100),Yes,Yes,No,No,The database identifier -covariate_ref,setting_id,float,Yes,Yes,No,No,The run identifier +covariate_ref,setting_id,varchar(30),Yes,Yes,No,No,The run identifier covariate_ref,covariate_id,bigint,Yes,Yes,No,No,The covariate identifier covariate_ref,covariate_name,varchar,Yes,No,No,No,The covariate name covariate_ref,analysis_id,int,Yes,No,No,No,The analysis identifier @@ -62,7 +62,7 @@ covariate_ref,concept_id,bigint,Yes,No,No,No,The concept identifier covariate_ref,value_as_concept_id,int,N,N,N,N,The value as concept_id for features created from observation or measurement values covariate_ref,collisions,int,N,N,N,N,The number of collisions found for the covariate_id covariates,database_id,varchar(100),Yes,Yes,No,No,The database identifier -covariates,setting_id,float,Yes,Yes,No,No,The run identifier +covariates,setting_id,varchar(30),Yes,Yes,No,No,The run identifier covariates,cohort_type,varchar(12),Yes,Yes,No,No,The cohort type covariates,target_cohort_id,int,Yes,Yes,No,No,The target cohort id covariates,outcome_cohort_id,int,Yes,Yes,No,No,The outcome cohort id @@ -71,7 +71,7 @@ covariates,covariate_id,bigint,Yes,Yes,No,No,The covaraite id covariates,sum_value,int,Yes,No,No,No,The sum value covariates,average_value,float,No,No,No,No,The average value covariates_continuous,database_id,varchar(100),Yes,Yes,No,No,The database identifier -covariates_continuous,setting_id,float,Yes,Yes,No,No,The run identifier +covariates_continuous,setting_id,varchar(30),Yes,Yes,No,No,The run identifier covariates_continuous,cohort_type,varchar(12),Yes,Yes,No,No,The cohort type covariates_continuous,target_cohort_id,int,Yes,Yes,No,No,The target cohort id covariates_continuous,outcome_cohort_id,int,Yes,Yes,No,No,The outcome cohort id @@ -86,7 +86,7 @@ covariates_continuous,p_10_value,float,No,No,No,No,The 10th percentile covariates_continuous,p_25_value,float,No,No,No,No,The 25th percentile covariates_continuous,p_75_value,float,No,No,No,No,The 75th percentile covariates_continuous,p_90_value,float,No,No,No,No,The 90th percentile -settings,setting_id,float,Yes,Yes,No,No,The run identifier +settings,setting_id,varchar(30),Yes,Yes,No,No,The run identifier settings,database_id,varchar(100),Yes,Yes,No,No,The database identifier settings,covariate_setting_json,varchar,Yes,No,No,No,The covariate settings JSON settings,case_covariate_setting_json,varchar,Yes,No,No,No,The during covariate settings JSON @@ -99,7 +99,7 @@ settings,end_anchor,varchar(15),No,No,No,No,The end anchor settings,case_pre_target_duration,int,No,No,No,No,How far to look back before index for case features settings,case_post_outcome_duration,int,No,No,No,No,How far to look forward after outcome for case features cohort_details,database_id,varchar(100),Yes,Yes,No,No,The database identifier -cohort_details,setting_id,float,Yes,Yes,No,No,The run identifier +cohort_details,setting_id,varchar(30),Yes,Yes,No,No,The run identifier cohort_details,cohort_type,varchar(12),Yes,Yes,No,No,The cohort type cohort_details,target_cohort_id,int,Yes,Yes,No,No,The target cohort id cohort_details,outcome_cohort_id,int,Yes,Yes,No,No,The outcome cohort id diff --git a/inst/sql/sql_server/ResultTables.sql b/inst/sql/sql_server/ResultTables.sql index 445ddb8..edb85e2 100644 --- a/inst/sql/sql_server/ResultTables.sql +++ b/inst/sql/sql_server/ResultTables.sql @@ -61,7 +61,7 @@ CREATE TABLE @my_schema.@table_prefixdechallenge_rechallenge ( CREATE TABLE @my_schema.@table_prefixanalysis_ref ( database_id varchar(100) NOT NULL, - setting_id float NOT NULL, + setting_id varchar(30) NOT NULL, analysis_id int NOT NULL, analysis_name varchar(max) NOT NULL, domain_id varchar(30), @@ -74,7 +74,7 @@ CREATE TABLE @my_schema.@table_prefixanalysis_ref ( CREATE TABLE @my_schema.@table_prefixcovariate_ref ( database_id varchar(100) NOT NULL, - setting_id float NOT NULL, + setting_id varchar(30) NOT NULL, covariate_id bigint NOT NULL, covariate_name varchar(max) NOT NULL, analysis_id int NOT NULL, @@ -86,7 +86,7 @@ CREATE TABLE @my_schema.@table_prefixcovariate_ref ( CREATE TABLE @my_schema.@table_prefixcovariates ( database_id varchar(100) NOT NULL, - setting_id float NOT NULL, + setting_id varchar(30) NOT NULL, cohort_type varchar(12), target_cohort_id int, outcome_cohort_id int, @@ -99,7 +99,7 @@ CREATE TABLE @my_schema.@table_prefixcovariates ( CREATE TABLE @my_schema.@table_prefixcovariates_continuous ( database_id varchar(100) NOT NULL, - setting_id float NOT NULL, + setting_id varchar(30) NOT NULL, cohort_type varchar(12), target_cohort_id int, outcome_cohort_id int, @@ -119,7 +119,7 @@ CREATE TABLE @my_schema.@table_prefixcovariates_continuous ( -- covariateSettings CREATE TABLE @my_schema.@table_prefixsettings ( - setting_id float NOT NULL, + setting_id varchar(30) NOT NULL, database_id varchar(100) NOT NULL, covariate_setting_json varchar(MAX), case_covariate_setting_json varchar(MAX), @@ -137,7 +137,7 @@ CREATE TABLE @my_schema.@table_prefixsettings ( -- added this table CREATE TABLE @my_schema.@table_prefixcohort_details ( database_id varchar(100) NOT NULL, - setting_id float NOT NULL, + setting_id varchar(30) NOT NULL, cohort_type varchar(12), target_cohort_id int, outcome_cohort_id int,