From f27e93373f899be09f0fc32e2785f97470fed1a3 Mon Sep 17 00:00:00 2001 From: Klochkova Date: Fri, 14 Jun 2019 13:25:35 +0200 Subject: [PATCH 01/15] added hive support --- pom.xml | 40 +++++++++++++++++++ src/main/extras/bigquery/README.md | 2 +- src/main/extras/hive/README.md | 6 +++ src/main/extras/impala/README.md | 4 +- .../org/ohdsi/webapi/DataAccessConfig.java | 10 ++++- .../java/org/ohdsi/webapi/source/Source.java | 10 +++-- .../webapi/util/DataSourceDTOParserTest.java | 14 +++++++ 7 files changed, 78 insertions(+), 8 deletions(-) create mode 100644 src/main/extras/hive/README.md diff --git a/pom.xml b/pom.xml index 6b5b5c0be1..87bfe3d4bd 100644 --- a/pom.xml +++ b/pom.xml @@ -1166,5 +1166,45 @@ + + webapi-hive + + true + + ${basedir}/src/main/extras/hive + + + + org.apache.hive + hive-jdbc + 3.1.0.3.0.1.0-187 + + + + + + org.apache.maven.plugins + maven-install-plugin + 2.5.2 + + + hive-jdbc + initialize + + install-file + + + org.apache.hive + hive-jdbc + 3.1.0.3.0.1.0-187 + jar + ${hive.classpath}/hive-jdbc-standalone.jar + + + + + + + diff --git a/src/main/extras/bigquery/README.md b/src/main/extras/bigquery/README.md index 1ba409964c..1b52d3fe98 100644 --- a/src/main/extras/bigquery/README.md +++ b/src/main/extras/bigquery/README.md @@ -1,6 +1,6 @@ To build WebAPI with Google BigQuery support do the following: 1. Go to https://cloud.google.com/bigquery/partners/simba-drivers/ and 2. Download the JDBC 4.2 driver -3. Unpack archive and and set the bigquery.classpath property in your settings.xml to the unpacked archive location (ie: C://downloads/bigquery) inside the webapi-bigquery profile. +3. Unpack archive and set the bigquery.classpath property in your settings.xml to the unpacked archive location (ie: C://downloads/bigquery) inside the webapi-bigquery profile. 4. Build WebAPI with webapi-bigquery profile. * mvn -Pwebapi-postgresql,webapi-bigquery clean package diff --git a/src/main/extras/hive/README.md b/src/main/extras/hive/README.md new file mode 100644 index 0000000000..864f491d48 --- /dev/null +++ b/src/main/extras/hive/README.md @@ -0,0 +1,6 @@ +To build WebAPI with Apache Hive LLAP support do the following: +1. Add to your hosts line: " sandbox-hdp.hortonworks.com" +2. Download the JDBC driver from http://sandbox-hdp.hortonworks.com:10002/jdbcjar +3. Set the hive.classpath property in your settings.xml to the archive location (ie: C://downloads/hive) inside the webapi-hive profile +4. Build WebAPI with webapi-hive profile: + * mvn -Pwebapi-hive clean package \ No newline at end of file diff --git a/src/main/extras/impala/README.md b/src/main/extras/impala/README.md index dc2dacd636..3270a144d0 100644 --- a/src/main/extras/impala/README.md +++ b/src/main/extras/impala/README.md @@ -1,7 +1,7 @@ To build WebAPI with Impala support do the following: 1. Go to the https://www.cloudera.com/downloads/connectors/impala/jdbc/2-5-43.html -2. Register to clouder if you did not registered earlier or sign in to your Cloudera account +2. Register to Cloudera if you did not registered earlier or sign in to your Cloudera account 3. Download the latest Impala JDBC drivers -4. Unpack archive and and set the impala.classpath property in your settings.xml to the unpacked archive location (ie: C://downloads/impalaJDBC) inside the webapi-impala profile. +4. Unpack archive and set the impala.classpath property in your settings.xml to the unpacked archive location (ie: C://downloads/impalaJDBC) inside the webapi-impala profile. 5. Build WebAPI with webapi-impala profile. * mvn -Pwebapi-postgresql,webapi-impala clean package diff --git a/src/main/java/org/ohdsi/webapi/DataAccessConfig.java b/src/main/java/org/ohdsi/webapi/DataAccessConfig.java index 91ee21efa3..5e8a2893e4 100644 --- a/src/main/java/org/ohdsi/webapi/DataAccessConfig.java +++ b/src/main/java/org/ohdsi/webapi/DataAccessConfig.java @@ -77,7 +77,15 @@ public DataSource primaryDataSource() { //note autocommit defaults vary across vendors. use provided @Autowired TransactionTemplate String[] supportedDrivers; - supportedDrivers = new String[]{"org.postgresql.Driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver", "oracle.jdbc.driver.OracleDriver", "com.amazon.redshift.jdbc.Driver", "com.cloudera.impala.jdbc41.Driver", "net.starschema.clouddb.jdbc.BQDriver", "org.netezza.Driver", "com.simba.googlebigquery.jdbc42.Driver"}; + supportedDrivers = new String[]{"org.postgresql.Driver", + "com.microsoft.sqlserver.jdbc.SQLServerDriver", + "oracle.jdbc.driver.OracleDriver", + "com.amazon.redshift.jdbc.Driver", + "com.cloudera.impala.jdbc41.Driver", + "net.starschema.clouddb.jdbc.BQDriver", + "org.netezza.Driver", + "com.simba.googlebigquery.jdbc42.Driver", + "org.apache.hive.jdbc.HiveDriver"}; for (String driverName : supportedDrivers) { try { Class.forName(driverName); diff --git a/src/main/java/org/ohdsi/webapi/source/Source.java b/src/main/java/org/ohdsi/webapi/source/Source.java index 6402bd8371..d5e77a47f2 100644 --- a/src/main/java/org/ohdsi/webapi/source/Source.java +++ b/src/main/java/org/ohdsi/webapi/source/Source.java @@ -117,12 +117,14 @@ public String getTableQualifier(DaimonType daimonType) { } public String getTableQualifierOrNull(DaimonType daimonType) { - for (SourceDaimon sourceDaimon : this.getDaimons()) { - if (sourceDaimon.getDaimonType() == daimonType) { - return sourceDaimon.getTableQualifier(); + if (this.getDaimons() != null){ + for (SourceDaimon sourceDaimon : this.getDaimons()) { + if (sourceDaimon.getDaimonType() == daimonType) { + return sourceDaimon.getTableQualifier(); + } } } - return null; + return null; } public String getSourceKey() { diff --git a/src/test/java/org/ohdsi/webapi/util/DataSourceDTOParserTest.java b/src/test/java/org/ohdsi/webapi/util/DataSourceDTOParserTest.java index b78547e209..40fa1b3a70 100644 --- a/src/test/java/org/ohdsi/webapi/util/DataSourceDTOParserTest.java +++ b/src/test/java/org/ohdsi/webapi/util/DataSourceDTOParserTest.java @@ -28,6 +28,7 @@ public class DataSourceDTOParserTest { "OAuthPvtKeyPath=C:\\SecureFiles\\ServiceKeyFile.p12;"; public static final String ORACLE_WO_PWD_CONN_STR = "jdbc:oracle:thin:@myhost:1521:orcl"; public static final String ORACLE_WITH_PWD_CONN_STR = "jdbc:oracle:thin:scott/tiger@myhost:1521:orcl"; + public static final String HIVE_CONN_STR = "jdbc:hive2://sandbox-hdp.hortonworks.com:2181/synpuf_531_orc;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2"; @Test public void parseDTO() { @@ -103,6 +104,12 @@ public void parseDTO() { assertThat(dto.getConnectionString(), is(ORACLE_WITH_PWD_CONN_STR)); assertThat(dto.getUsername(), is("scott")); assertThat(dto.getPassword(), is("tiger")); + + dto = DataSourceDTOParser.parseDTO(getHiveSource()); + assertThat(dto.getType(), is(DBMSType.HIVE)); + assertThat(dto.getConnectionString(), is(HIVE_CONN_STR)); + assertThat(dto.getUsername(), is(nullValue())); + assertThat(dto.getPassword(), is(nullValue())); } private Source getPostgreSQLPasswordSource() { @@ -190,4 +197,11 @@ private Source getPostgreSQLSource() { source.setSourceConnection(PGSQL_CONN_STR); return source; } + + private Source getHiveSource() { + Source source = new Source(); + source.setSourceDialect("hive"); + source.setSourceConnection(HIVE_CONN_STR); + return source; + } } \ No newline at end of file From bbc6a3fb1cb86d2f1756513d27696554e6791a0f Mon Sep 17 00:00:00 2001 From: Anastasiia Klochkova Date: Fri, 14 Jun 2019 13:31:58 +0200 Subject: [PATCH 02/15] added hive support --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 87bfe3d4bd..60e6e7c896 100644 --- a/pom.xml +++ b/pom.xml @@ -1170,7 +1170,7 @@ webapi-hive true - + ${basedir}/src/main/extras/hive From f817341f82dcee3e9daef101177238a86ff05e88 Mon Sep 17 00:00:00 2001 From: Anastasiia Klochkova Date: Fri, 14 Jun 2019 13:32:23 +0200 Subject: [PATCH 03/15] added hive support --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 60e6e7c896..87bfe3d4bd 100644 --- a/pom.xml +++ b/pom.xml @@ -1170,7 +1170,7 @@ webapi-hive true - + ${basedir}/src/main/extras/hive From a6f2eff355e35e2ddeee8ff3c3563ed949ba1f10 Mon Sep 17 00:00:00 2001 From: Vitaly Koulakov Date: Tue, 25 Jun 2019 14:25:55 +0300 Subject: [PATCH 04/15] hive support: result schema --- pom.xml | 2 +- .../org/ohdsi/webapi/service/DDLService.java | 28 ++- .../results/init_hive_heracles_analysis.sql | 215 ++++++++++++++++++ 3 files changed, 235 insertions(+), 10 deletions(-) create mode 100644 src/main/resources/ddl/results/init_hive_heracles_analysis.sql diff --git a/pom.xml b/pom.xml index 87bfe3d4bd..1ee158c6ee 100644 --- a/pom.xml +++ b/pom.xml @@ -139,7 +139,7 @@ - 1.14.0-SNAPSHOT + 1.15.0-SNAPSHOT 2.25.1 600000 10000 diff --git a/src/main/java/org/ohdsi/webapi/service/DDLService.java b/src/main/java/org/ohdsi/webapi/service/DDLService.java index 869087200d..8c203a8164 100644 --- a/src/main/java/org/ohdsi/webapi/service/DDLService.java +++ b/src/main/java/org/ohdsi/webapi/service/DDLService.java @@ -20,21 +20,18 @@ import static org.ohdsi.webapi.service.SqlRenderService.translateSQL; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; +import java.util.*; import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; import javax.ws.rs.Path; import javax.ws.rs.Produces; import javax.ws.rs.QueryParam; + +import com.odysseusinc.arachne.commons.types.DBMSType; import org.apache.commons.lang3.ObjectUtils; import org.ohdsi.circe.helper.ResourceHelper; import org.ohdsi.webapi.sqlrender.SourceStatement; import org.ohdsi.webapi.sqlrender.TranslatedStatement; -import org.ohdsi.webapi.util.SessionUtils; import org.springframework.stereotype.Component; @Path("/ddl/") @@ -76,9 +73,14 @@ public class DDLService { "/ddl/results/pathway_analysis_stats.sql" ); + private static final String INIT_HERACLES_PERIODS = "/ddl/results/init_heracles_periods.sql"; + public static final Collection RESULT_INIT_FILE_PATHS = Arrays.asList( - "/ddl/results/init_heracles_analysis.sql", - "/ddl/results/init_heracles_periods.sql" + "/ddl/results/init_heracles_analysis.sql", INIT_HERACLES_PERIODS + ); + + public static final Collection HIVE_RESULT_INIT_FILE_PATHS = Arrays.asList( + "/ddl/results/init_hive_heracles_analysis.sql", INIT_HERACLES_PERIODS ); public static final Collection INIT_CONCEPT_HIERARCHY_FILE_PATHS = Arrays.asList( @@ -123,7 +125,15 @@ public String generateResultSQL( put(TEMP_SCHEMA, oracleTempSchema); }}; - return generateSQL(dialect, params, resultDDLFilePaths, RESULT_INIT_FILE_PATHS, RESULT_INDEX_FILE_PATHS); + return generateSQL(dialect, params, resultDDLFilePaths, getResultInitFilePaths(dialect), RESULT_INDEX_FILE_PATHS); + } + + private Collection getResultInitFilePaths(String dialect) { + if (Objects.equals(DBMSType.HIVE.getOhdsiDB(), dialect)) { + return HIVE_RESULT_INIT_FILE_PATHS; + } else { + return RESULT_INIT_FILE_PATHS; + } } @GET diff --git a/src/main/resources/ddl/results/init_hive_heracles_analysis.sql b/src/main/resources/ddl/results/init_hive_heracles_analysis.sql new file mode 100644 index 0000000000..604122f41a --- /dev/null +++ b/src/main/resources/ddl/results/init_hive_heracles_analysis.sql @@ -0,0 +1,215 @@ +TRUNCATE TABLE @results_schema.heracles_analysis; + +insert into table @results_schema.heracles_analysis (analysis_id,analysis_name,stratum_1_name,stratum_2_name,stratum_3_name,stratum_4_name,stratum_5_name,analysis_type) +values + (0, CAST('Source name' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PERSON' as VARCHAR(255))), + (1, CAST('Number of persons' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PERSON' as VARCHAR(255))), + (2, CAST('Number of persons by gender' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PERSON' as VARCHAR(255))), + (3, CAST('Number of persons by year of birth' as VARCHAR(255)), CAST('year_of_birth' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PERSON' as VARCHAR(255))), + (4, CAST('Number of persons by race' as VARCHAR(255)), CAST('race_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PERSON' as VARCHAR(255))), + (5, CAST('Number of persons by ethnicity' as VARCHAR(255)), CAST('ethnicity_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PERSON' as VARCHAR(255))), + (7, CAST('Number of persons with invalid provider_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PERSON' as VARCHAR(255))), + (8, CAST('Number of persons with invalid location_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PERSON' as VARCHAR(255))), + (9, CAST('Number of persons with invalid care_site_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PERSON' as VARCHAR(255))), + (101, CAST('Number of persons by age, with age at first observation period' as VARCHAR(255)), CAST('age' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (102, CAST('Number of persons by gender by age, with age at first observation period' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST('age' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (103, CAST('Distribution of age at first observation period' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (104, CAST('Distribution of age at first observation period by gender' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (105, CAST('Length of observation (days) of first observation period' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (106, CAST('Length of observation (days) of first observation period by gender' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (107, CAST('Length of observation (days) of first observation period by age decile' as VARCHAR(255)), CAST('age decile' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (108, CAST('Number of persons by length of first observation period, in 30d increments' as VARCHAR(255)), CAST('Observation period length 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (109, CAST('Number of persons with continuous observation in each year' as VARCHAR(255)), CAST('calendar year' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (110, CAST('Number of persons with continuous observation in each month' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (111, CAST('Number of persons by observation period start month' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (112, CAST('Number of persons by observation period end month' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (113, CAST('Number of persons by number of observation periods' as VARCHAR(255)), CAST('number of observation periods' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (114, CAST('Number of persons with observation period before year-of-birth' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (115, CAST('Number of persons with observation period end < observation period start' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (116, CAST('Number of persons with at least one day of observation in each year by gender and age decile' as VARCHAR(255)), CAST('calendar year' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST('age decile' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (117, CAST('Number of persons with at least one day of observation in each month' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (200, CAST('Number of persons with at least one visit occurrence, by visit_concept_id' as VARCHAR(255)), CAST('visit_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('VISITS' as VARCHAR(255))), + (201, CAST('Number of visit occurrence records, by visit_concept_id' as VARCHAR(255)), CAST('visit_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('VISITS' as VARCHAR(255))), + (202, CAST('Number of persons by visit occurrence start month, by visit_concept_id' as VARCHAR(255)), CAST('visit_concept_id' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('VISITS' as VARCHAR(255))), + (203, CAST('Number of distinct visit occurrence concepts per person' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('VISITS' as VARCHAR(255))), + (204, CAST('Number of persons with at least one visit occurrence, by visit_concept_id by calendar year by gender by age decile' as VARCHAR(255)), CAST('visit_concept_id' as VARCHAR(255)), CAST('calendar year' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST('age decile' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('VISITS' as VARCHAR(255))), + (206, CAST('Distribution of age by visit_concept_id' as VARCHAR(255)), CAST('visit_concept_id' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('VISITS' as VARCHAR(255))), + (207, CAST('Number of visit records with invalid person_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('VISITS' as VARCHAR(255))), + (208, CAST('Number of visit records outside valid observation period' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('VISITS' as VARCHAR(255))), + (209, CAST('Number of visit records with end date < start date' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('VISITS' as VARCHAR(255))), + (210, CAST('Number of visit records with invalid care_site_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('VISITS' as VARCHAR(255))), + (211, CAST('Distribution of length of stay by visit_concept_id' as VARCHAR(255)), CAST('visit_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('VISITS' as VARCHAR(255))), + (220, CAST('Number of visit occurrence records by visit occurrence start month' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('VISITS' as VARCHAR(255))), + (400, CAST('Number of persons with at least one condition occurrence, by condition_concept_id' as VARCHAR(255)), CAST('condition_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION' as VARCHAR(255))), + (401, CAST('Number of condition occurrence records, by condition_concept_id' as VARCHAR(255)), CAST('condition_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION' as VARCHAR(255))), + (402, CAST('Number of persons by condition occurrence start month, by condition_concept_id' as VARCHAR(255)), CAST('condition_concept_id' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION' as VARCHAR(255))), + (403, CAST('Number of distinct condition occurrence concepts per person' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION' as VARCHAR(255))), + (404, CAST('Number of persons with at least one condition occurrence, by condition_concept_id by calendar year by gender by age decile' as VARCHAR(255)), CAST('condition_concept_id' as VARCHAR(255)), CAST('calendar year' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST('age decile' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION' as VARCHAR(255))), + (405, CAST('Number of condition occurrence records, by condition_concept_id by condition_type_concept_id' as VARCHAR(255)), CAST('condition_concept_id' as VARCHAR(255)), CAST('condition_type_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION' as VARCHAR(255))), + (406, CAST('Distribution of age by condition_concept_id' as VARCHAR(255)), CAST('condition_concept_id' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION' as VARCHAR(255))), + (409, CAST('Number of condition occurrence records with invalid person_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION' as VARCHAR(255))), + (410, CAST('Number of condition occurrence records outside valid observation period' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION' as VARCHAR(255))), + (411, CAST('Number of condition occurrence records with end date < start date' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION' as VARCHAR(255))), + (412, CAST('Number of condition occurrence records with invalid provider_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION' as VARCHAR(255))), + (413, CAST('Number of condition occurrence records with invalid visit_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION' as VARCHAR(255))), + (420, CAST('Number of condition occurrence records by condition occurrence start month' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION' as VARCHAR(255))), + (500, CAST('Number of persons with death, by cause_of_death_concept_id' as VARCHAR(255)), CAST('cause_of_death_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DEATH' as VARCHAR(255))), + (501, CAST('Number of records of death, by cause_of_death_concept_id' as VARCHAR(255)), CAST('cause_of_death_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DEATH' as VARCHAR(255))), + (502, CAST('Number of persons by death month' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DEATH' as VARCHAR(255))), + (504, CAST('Number of persons with a death, by calendar year by gender by age decile' as VARCHAR(255)), CAST('calendar year' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST('age decile' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DEATH' as VARCHAR(255))), + (505, CAST('Number of death records, by death_type_concept_id' as VARCHAR(255)), CAST('death_type_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DEATH' as VARCHAR(255))), + (506, CAST('Distribution of age at death by gender' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DEATH' as VARCHAR(255))), + (509, CAST('Number of death records with invalid person_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DEATH' as VARCHAR(255))), + (510, CAST('Number of death records outside valid observation period' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DEATH' as VARCHAR(255))), + (511, CAST('Distribution of time from death to last condition' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DEATH' as VARCHAR(255))), + (512, CAST('Distribution of time from death to last drug' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DEATH' as VARCHAR(255))), + (513, CAST('Distribution of time from death to last visit' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DEATH' as VARCHAR(255))), + (514, CAST('Distribution of time from death to last procedure' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DEATH' as VARCHAR(255))), + (515, CAST('Distribution of time from death to last observation' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DEATH' as VARCHAR(255))), + (600, CAST('Number of persons with at least one procedure occurrence, by procedure_concept_id' as VARCHAR(255)), CAST('procedure_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PROCEDURE' as VARCHAR(255))), + (601, CAST('Number of procedure occurrence records, by procedure_concept_id' as VARCHAR(255)), CAST('procedure_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PROCEDURE' as VARCHAR(255))), + (602, CAST('Number of persons by procedure occurrence start month, by procedure_concept_id' as VARCHAR(255)), CAST('procedure_concept_id' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PROCEDURE' as VARCHAR(255))), + (603, CAST('Number of distinct procedure occurrence concepts per person' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PROCEDURE' as VARCHAR(255))), + (604, CAST('Number of persons with at least one procedure occurrence, by procedure_concept_id by calendar year by gender by age decile' as VARCHAR(255)), CAST('procedure_concept_id' as VARCHAR(255)), CAST('calendar year' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST('age decile' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PROCEDURE' as VARCHAR(255))), + (605, CAST('Number of procedure occurrence records, by procedure_concept_id by procedure_type_concept_id' as VARCHAR(255)), CAST('procedure_concept_id' as VARCHAR(255)), CAST('procedure_type_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PROCEDURE' as VARCHAR(255))), + (606, CAST('Distribution of age by procedure_concept_id' as VARCHAR(255)), CAST('procedure_concept_id' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PROCEDURE' as VARCHAR(255))), + (609, CAST('Number of procedure occurrence records with invalid person_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PROCEDURE' as VARCHAR(255))), + (610, CAST('Number of procedure occurrence records outside valid observation period' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PROCEDURE' as VARCHAR(255))), + (612, CAST('Number of procedure occurrence records with invalid provider_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PROCEDURE' as VARCHAR(255))), + (613, CAST('Number of procedure occurrence records with invalid visit_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PROCEDURE' as VARCHAR(255))), + (620, CAST('Number of procedure occurrence records by procedure occurrence start month' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('PROCEDURE' as VARCHAR(255))), + (700, CAST('Number of persons with at least one drug exposure, by drug_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG' as VARCHAR(255))), + (701, CAST('Number of drug exposure records, by drug_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG' as VARCHAR(255))), + (702, CAST('Number of persons by drug exposure start month, by drug_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG' as VARCHAR(255))), + (703, CAST('Number of distinct drug exposure concepts per person' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG' as VARCHAR(255))), + (704, CAST('Number of persons with at least one drug exposure, by drug_concept_id by calendar year by gender by age decile' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST('calendar year' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST('age decile' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG' as VARCHAR(255))), + (705, CAST('Number of drug exposure records, by drug_concept_id by drug_type_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST('drug_type_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG' as VARCHAR(255))), + (706, CAST('Distribution of age by drug_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG' as VARCHAR(255))), + (709, CAST('Number of drug exposure records with invalid person_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG' as VARCHAR(255))), + (710, CAST('Number of drug exposure records outside valid observation period' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG' as VARCHAR(255))), + (711, CAST('Number of drug exposure records with end date < start date' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG' as VARCHAR(255))), + (712, CAST('Number of drug exposure records with invalid provider_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG' as VARCHAR(255))), + (713, CAST('Number of drug exposure records with invalid visit_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG' as VARCHAR(255))), + (715, CAST('Distribution of days_supply by drug_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG' as VARCHAR(255))), + (716, CAST('Distribution of refills by drug_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG' as VARCHAR(255))), + (717, CAST('Distribution of quantity by drug_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG' as VARCHAR(255))), + (720, CAST('Number of drug exposure records by drug exposure start month' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG' as VARCHAR(255))), + (800, CAST('Number of persons with at least one observation occurrence, by observation_concept_id' as VARCHAR(255)), CAST('observation_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (801, CAST('Number of observation occurrence records, by observation_concept_id' as VARCHAR(255)), CAST('observation_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (802, CAST('Number of persons by observation occurrence start month, by observation_concept_id' as VARCHAR(255)), CAST('observation_concept_id' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (803, CAST('Number of distinct observation occurrence concepts per person' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (804, CAST('Number of persons with at least one observation occurrence, by observation_concept_id by calendar year by gender by age decile' as VARCHAR(255)), CAST('observation_concept_id' as VARCHAR(255)), CAST('calendar year' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST('age decile' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (805, CAST('Number of observation occurrence records, by observation_concept_id by observation_type_concept_id' as VARCHAR(255)), CAST('observation_concept_id' as VARCHAR(255)), CAST('observation_type_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (806, CAST('Distribution of age by observation_concept_id' as VARCHAR(255)), CAST('observation_concept_id' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (807, CAST('Number of observation occurrence records, by observation_concept_id and unit_concept_id' as VARCHAR(255)), CAST('observation_concept_id' as VARCHAR(255)), CAST('unit_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (809, CAST('Number of observation records with invalid person_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (810, CAST('Number of observation records outside valid observation period' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (812, CAST('Number of observation records with invalid provider_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (813, CAST('Number of observation records with invalid visit_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (814, CAST('Number of observation records with no value (numeric, string, or concept)' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (815, CAST('Distribution of numeric values, by observation_concept_id and unit_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (816, CAST('Distribution of low range, by observation_concept_id and unit_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (817, CAST('Distribution of high range, by observation_concept_id and unit_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (818, CAST('Number of observation records below/within/above normal range, by observation_concept_id and unit_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (820, CAST('Number of observation records by observation start month' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('OBSERVATION' as VARCHAR(255))), + (900, CAST('Number of persons with at least one drug era, by drug_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG_ERA' as VARCHAR(255))), + (901, CAST('Number of drug era records, by drug_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG_ERA' as VARCHAR(255))), + (902, CAST('Number of persons by drug era start month, by drug_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG_ERA' as VARCHAR(255))), + (903, CAST('Number of distinct drug era concepts per person' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG_ERA' as VARCHAR(255))), + (904, CAST('Number of persons with at least one drug era, by drug_concept_id by calendar year by gender by age decile' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST('calendar year' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST('age decile' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG_ERA' as VARCHAR(255))), + (906, CAST('Distribution of age by drug_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG_ERA' as VARCHAR(255))), + (907, CAST('Distribution of drug era length, by drug_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG_ERA' as VARCHAR(255))), + (908, CAST('Number of drug eras without valid person' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG_ERA' as VARCHAR(255))), + (909, CAST('Number of drug eras outside valid observation period' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG_ERA' as VARCHAR(255))), + (910, CAST('Number of drug eras with end date < start date' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG_ERA' as VARCHAR(255))), + (920, CAST('Number of drug era records by drug era start month' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('DRUG_ERA' as VARCHAR(255))), + (1000, CAST('Number of persons with at least one condition era, by condition_concept_id' as VARCHAR(255)), CAST('condition_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION_ERA' as VARCHAR(255))), + (1001, CAST('Number of condition era records, by condition_concept_id' as VARCHAR(255)), CAST('condition_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION_ERA' as VARCHAR(255))), + (1002, CAST('Number of persons by condition era start month, by condition_concept_id' as VARCHAR(255)), CAST('condition_concept_id' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION_ERA' as VARCHAR(255))), + (1003, CAST('Number of distinct condition era concepts per person' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION_ERA' as VARCHAR(255))), + (1004, CAST('Number of persons with at least one condition era, by condition_concept_id by calendar year by gender by age decile' as VARCHAR(255)), CAST('condition_concept_id' as VARCHAR(255)), CAST('calendar year' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST('age decile' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION_ERA' as VARCHAR(255))), + (1006, CAST('Distribution of age by condition_concept_id' as VARCHAR(255)), CAST('condition_concept_id' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION_ERA' as VARCHAR(255))), + (1007, CAST('Distribution of condition era length, by condition_concept_id' as VARCHAR(255)), CAST('condition_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION_ERA' as VARCHAR(255))), + (1008, CAST('Number of condition eras without valid person' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION_ERA' as VARCHAR(255))), + (1009, CAST('Number of condition eras outside valid observation period' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION_ERA' as VARCHAR(255))), + (1010, CAST('Number of condition eras with end date < start date' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION_ERA' as VARCHAR(255))), + (1020, CAST('Number of condition era records by condition era start month' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CONDITION_ERA' as VARCHAR(255))), + (1100, CAST('Number of persons by location 3-digit zip' as VARCHAR(255)), CAST('3-digit zip' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('LOCATION' as VARCHAR(255))), + (1101, CAST('Number of persons by location state' as VARCHAR(255)), CAST('state' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('LOCATION' as VARCHAR(255))), + (1200, CAST('Number of persons by place of service' as VARCHAR(255)), CAST('place_of_service_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CARE_SITE' as VARCHAR(255))), + (1201, CAST('Number of visits by place of service' as VARCHAR(255)), CAST('place_of_service_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('CARE_SITE' as VARCHAR(255))), + (1300, CAST('Number of persons with at least one measurement occurrence, by measurement_concept_id' as VARCHAR(255)), CAST('measurement_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1301, CAST('Number of measurement occurrence records, by measurement_concept_id' as VARCHAR(255)), CAST('measurement_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1302, CAST('Number of persons by measurement occurrence start month, by measurement_concept_id' as VARCHAR(255)), CAST('measurement_concept_id' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1303, CAST('Number of distinct measurement occurrence concepts per person' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1304, CAST('Number of persons with at least one measurement occurrence, by measurement_concept_id by calendar year by gender by age decile' as VARCHAR(255)), CAST('measurement_concept_id' as VARCHAR(255)), CAST('calendar year' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST('age decile' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1305, CAST('Number of measurement occurrence records, by measurement_concept_id by measurement_type_concept_id' as VARCHAR(255)), CAST('measurement_concept_id' as VARCHAR(255)), CAST('measurement_type_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1306, CAST('Distribution of age by measurement_concept_id' as VARCHAR(255)), CAST('measurement_concept_id' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1307, CAST('Number of measurement occurrence records, by measurement_concept_id and unit_concept_id' as VARCHAR(255)), CAST('measurement_concept_id' as VARCHAR(255)), CAST('unit_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1309, CAST('Number of measurement records with invalid person_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1310, CAST('Number of measurement records outside valid measurement period' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1312, CAST('Number of measurement records with invalid provider_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1313, CAST('Number of measurement records with invalid visit_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1314, CAST('Number of measurement records with no value (numeric, string, or concept)' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1315, CAST('Distribution of numeric values, by measurement_concept_id and unit_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1316, CAST('Distribution of low range, by measurement_concept_id and unit_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1317, CAST('Distribution of high range, by measurement_concept_id and unit_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1318, CAST('Number of measurement records below/within/above normal range, by measurement_concept_id and unit_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1320, CAST('Number of measurement records by measurement start month' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('MEASUREMENT' as VARCHAR(255))), + (1700, CAST('Number of records by cohort_definition_id' as VARCHAR(255)), CAST('cohort_definition_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT' as VARCHAR(255))), + (1701, CAST('Number of records with cohort end date < cohort start date' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT' as VARCHAR(255))), + (1800, CAST('Number of persons by age, with age at cohort start' as VARCHAR(255)), CAST('age' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1801, CAST('Distribution of age at cohort start' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1802, CAST('Distribution of age at cohort start by gender' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1803, CAST('Distribution of age at cohort start by cohort start year' as VARCHAR(255)), CAST('calendar year' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1804, CAST('Number of persons by duration from cohort start to cohort end, in 30d increments' as VARCHAR(255)), CAST('Cohort period length 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1805, CAST('Number of persons by duration from observation start to cohort start, in 30d increments' as VARCHAR(255)), CAST('Baseline period length 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1806, CAST('Number of persons by duration from cohort start to observation end, in 30d increments' as VARCHAR(255)), CAST('Follow-up period length 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1807, CAST('Number of persons by duration from cohort end to observation end, in 30d increments' as VARCHAR(255)), CAST('Post-cohort period length 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1808, CAST('Distribution of duration (days) from cohort start to cohort end' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1809, CAST('Distribution of duration (days) from cohort start to cohort end, by gender' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)),CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1810, CAST('Distribution of duration (days) from cohort start to cohort end, by age decile' as VARCHAR(255)), CAST('age decile' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1811, CAST('Distribution of duration (days) from observation start to cohort start' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1812, CAST('Distribution of duration (days) from cohort start to observation end' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1813, CAST('Distribution of duration (days) from cohort end to observation end' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1814, CAST('Number of persons by cohort start year by gender by age decile' as VARCHAR(255)), CAST('calendar year' as VARCHAR(255)), CAST('gender_concept_id' as VARCHAR(255)), CAST('age decile' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1815, CAST('Number of persons by cohort start month' as VARCHAR(255)), CAST('calendar month' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1816, CAST('Number of persons by number of cohort periods' as VARCHAR(255)), CAST('number of cohort periods' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1820, CAST('Number of persons by duration from cohort start to first occurrence of condition occurrence, by condition_concept_id' as VARCHAR(255)), CAST('condition_concept_id' as VARCHAR(255)), CAST('time-to-event 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1821, CAST('Number of events by duration from cohort start to all occurrences of condition occurrence, by condition_concept_id' as VARCHAR(255)), CAST('condition_concept_id' as VARCHAR(255)), CAST('time-to-event 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1830, CAST('Number of persons by duration from cohort start to first occurrence of procedure occurrence, by procedure_concept_id' as VARCHAR(255)), CAST('procedure_concept_id' as VARCHAR(255)), CAST('time-to-event 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1831, CAST('Number of events by duration from cohort start to all occurrences of procedure occurrence, by procedure_concept_id' as VARCHAR(255)), CAST('procedure_concept_id' as VARCHAR(255)), CAST('time-to-event 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1840, CAST('Number of persons by duration from cohort start to first occurrence of drug exposure, by drug_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST('time-to-event 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1841, CAST('Number of events by duration from cohort start to all occurrences of drug exposure, by drug_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST('time-to-event 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1850, CAST('Number of persons by duration from cohort start to first occurrence of observation, by observation_concept_id' as VARCHAR(255)), CAST('observation_concept_id' as VARCHAR(255)), CAST('time-to-event 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1851, CAST('Number of events by duration from cohort start to all occurrences of observation, by observation_concept_id' as VARCHAR(255)), CAST('observation_concept_id' as VARCHAR(255)), CAST('time-to-event 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1860, CAST('Number of persons by duration from cohort start to first occurrence of condition era, by condition_concept_id' as VARCHAR(255)), CAST('condition_concept_id' as VARCHAR(255)), CAST('time-to-event 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1861, CAST('Number of events by duration from cohort start to all occurrences of condition era, by condition_concept_id' as VARCHAR(255)), CAST('condition_concept_id' as VARCHAR(255)), CAST('time-to-event 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1870, CAST('Number of persons by duration from cohort start to first occurrence of drug era, by drug_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST('time-to-event 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (1871, CAST('Number of events by duration from cohort start to all occurrences of drug era, by drug_concept_id' as VARCHAR(255)), CAST('drug_concept_id' as VARCHAR(255)), CAST('time-to-event 30d increments' as VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST(NULL AS VARCHAR(255)), CAST('COHORT_SPECIFIC_ANALYSES' as VARCHAR(255))), + (4000, CAST('Distribution of observation period days by period_id in the 365 days prior to first cohort_start_date' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL,CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4001, CAST('Number of subjects with visits by period_id, by visit_concept_id, by visit_type_concept_id in the 365d prior to first cohort start date' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4002, CAST('Distribution of number of visit occurrence records per subject by period_id, by visit_concept_id, by visit_type_concept_id in 365d prior to cohort start date' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4003, CAST('Distribution of number of visit dates per subject by period_id, by visit_concept_id, by visit_type_concept_id in 365d prior to first cohort start date' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4003, CAST('Distribution of number of visit dates per subject by period_id, by visit_concept_id, by visit_type_concept_id in 365d prior to first cohort start date' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4004, CAST('Distribution of number of care_site+visit dates per subject by period_id, by visit_concept_id, by visit_type_concept_id in 365d prior to first cohort start date' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4005, CAST('Distribution of length of stay for inpatient visits per subject by period_id, by visit_concept_id, by visit_type_concept_id in the 365 days prior to first cohort_start_date' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4006, CAST('Distribution of observation period days per subject, by period_id during cohort period' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4007, CAST('Number of subjects with visits by period_id, by visit_concept_id, by visit_type_concept_id during the cohort period' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4008, CAST('Distribution of number of visit occurrence records per subject by period_id, by visit_concept_id, by visit_type_concept_id during the cohort period' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4009, CAST('Distribution of number of visit dates per subject by period_id, by visit_concept_id, by visit_type_concept_id during the cohort period' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4010, CAST('Distribution of number of care_site+visit dates per subject by period_id, by visit_concept_id, by visit_type_concept_id during the cohort period' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4011, CAST('Distribution of length of stay for inpatient visits per subject by period_id, by visit_concept_id, by visit_type_concept_id during cohort period' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4012, CAST('Number of subjects with Drug Exposure by period_id, by drug_concept_id, by drug_type_concept_id in the 365d prior to first cohort start date' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4013, CAST('Distribution of number of Drug Exposure records per subject, by period_id, by drug_concept_id in 365d prior to first cohort start date' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4014, CAST('Distribution of greater than 0 drug day supply per subject by period_id, by drug_concept_id in the 365d prior to first cohort start date' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4015, CAST('Distribution of greater than 0 drug quantity per subject by period_id, by drug_concept_id in the 365d prior to first cohort start date' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4016, CAST('Number of subjects with Drug Exposure by period_id, by drug_concept_id, by drug_type_concept_id during the cohort period' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4017, CAST('Distribution of number of Drug Exposure records per subject, by period_id, by drug_concept_id during the cohort period' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4018, CAST('Distribution of greater than 0 drug day supply per subject by period_id, by drug_concept_id during the cohort period' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4019, CAST('Distribution of greater than 0 drug quantity per subject by period_id, by drug_concept_id during the cohort period' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4020, CAST('Distribution of greater than 0 US$ cost per subject by period_id, by visit_concept_id, by visit_type_concept_id, by cost_concept_id, by cost_type_concept_id in the 365d prior to first cohort start date' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4021, CAST('Distribution of greater than 0 US$ cost per subject by period_id, by visit_concept_id, by visit_type_concept_id, by cost_concept_id, by cost_type_concept_id during the cohort period' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4022, CAST('Distribution of greater than 0 US$ cost per subject by period_id, by drug_concept_id, by drug_type_concept_id, by cost_concept_id, by cost_type_concept_id in the 365d prior to first cohort start date' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))), + (4023, CAST('Distribution of greater than 0 US$ cost per subject by period_id, by drug_concept_id, by drug_type_concept_id, by cost_concept_id, by cost_type_concept_id, by cost_type_concept_id during the cohort period' as VARCHAR(255)), NULL, NULL, NULL, NULL, NULL, CAST('HEALTHCARE_UTILIZATION' as VARCHAR(255))) +; From da9d793f42abbc2594f40b7a11456c40f8b3fffe Mon Sep 17 00:00:00 2001 From: Anastasiia Klochkova Date: Mon, 26 Aug 2019 16:42:30 +0200 Subject: [PATCH 05/15] adjusted webapi for running all types of analyses --- pom.xml | 46 +++++-------------- .../DropCohortTableListener.java | 10 +++- ...GenerateCohortCharacterizationTasklet.java | 2 +- .../common/generation/GenerationUtils.java | 4 ++ .../webapi/pathway/PathwayServiceImpl.java | 7 ++- .../resources/pathway/runPathwayAnalysis.sql | 3 ++ 6 files changed, 32 insertions(+), 40 deletions(-) diff --git a/pom.xml b/pom.xml index 005567b407..1151c7601b 100644 --- a/pom.xml +++ b/pom.xml @@ -1191,43 +1191,19 @@ webapi-hive - - true - - ${basedir}/src/main/extras/hive - - - org.apache.hive - hive-jdbc - 3.1.0.3.0.1.0-187 - + + org.apache.hive + hive-jdbc + 3.1.0.3.0.1.0-187 + + + org.eclipse.jetty + * + + + - - - - org.apache.maven.plugins - maven-install-plugin - 2.5.2 - - - hive-jdbc - initialize - - install-file - - - org.apache.hive - hive-jdbc - 3.1.0.3.0.1.0-187 - jar - ${hive.classpath}/hive-jdbc-standalone.jar - - - - - - diff --git a/src/main/java/org/ohdsi/webapi/cohortcharacterization/DropCohortTableListener.java b/src/main/java/org/ohdsi/webapi/cohortcharacterization/DropCohortTableListener.java index ae2ce920fd..0c4d0280bf 100644 --- a/src/main/java/org/ohdsi/webapi/cohortcharacterization/DropCohortTableListener.java +++ b/src/main/java/org/ohdsi/webapi/cohortcharacterization/DropCohortTableListener.java @@ -1,5 +1,8 @@ package org.ohdsi.webapi.cohortcharacterization; +import com.odysseusinc.arachne.commons.types.DBMSType; +import java.util.Objects; +import org.apache.commons.lang3.StringUtils; import org.ohdsi.circe.helper.ResourceHelper; import org.ohdsi.sql.SqlTranslate; import org.ohdsi.webapi.source.SourceService; @@ -44,8 +47,11 @@ private Object doTask(JobParameters parameters) { final Source source = sourceService.findBySourceId(sourceId); final String resultsQualifier = SourceUtils.getResultsQualifier(source); final String tempQualifier = SourceUtils.getTempQualifier(source, resultsQualifier); - jdbcTemplate.execute(SqlTranslate.translateSql(sql, source.getSourceDialect(), null, tempQualifier)); - + String toRemove = SqlTranslate.translateSql(sql, source.getSourceDialect(), null, tempQualifier); + if (Objects.equals(DBMSType.HIVE.getOhdsiDB(), source.getSourceDialect())){ + toRemove = StringUtils.remove(toRemove, ';'); + } + jdbcTemplate.execute(toRemove); return null; } diff --git a/src/main/java/org/ohdsi/webapi/cohortcharacterization/GenerateCohortCharacterizationTasklet.java b/src/main/java/org/ohdsi/webapi/cohortcharacterization/GenerateCohortCharacterizationTasklet.java index 3aa3e90410..2cba14008e 100644 --- a/src/main/java/org/ohdsi/webapi/cohortcharacterization/GenerateCohortCharacterizationTasklet.java +++ b/src/main/java/org/ohdsi/webapi/cohortcharacterization/GenerateCohortCharacterizationTasklet.java @@ -91,7 +91,7 @@ protected String[] prepareQueries(ChunkContext chunkContext, CancelableJdbcTempl * * Therefore, there are two ways: * - either precisely group SQLs into statements so that temp tables aren't re-used in a single statement, - * - or use ‘permenant temporary tables’ + * - or use ‘permanent temporary tables’ * * The second option looks better since such SQL could be exported and executed manually, * which is not the case with the first option. diff --git a/src/main/java/org/ohdsi/webapi/common/generation/GenerationUtils.java b/src/main/java/org/ohdsi/webapi/common/generation/GenerationUtils.java index cd8e944d37..7a0015b9db 100644 --- a/src/main/java/org/ohdsi/webapi/common/generation/GenerationUtils.java +++ b/src/main/java/org/ohdsi/webapi/common/generation/GenerationUtils.java @@ -165,10 +165,14 @@ public SimpleJobBuilder buildJobForExecutionEngineBasedAnalysisTasklet(String an .tasklet(callbackTasklet) .build(); + DropCohortTableListener dropCohortTableListener = new DropCohortTableListener(getSourceJdbcTemplate(source), + transactionTemplate, sourceService, sourceAwareSqlRender); + return jobBuilders.get(analysisTypeName) .start(createAnalysisExecutionStep) .next(runExecutionStep) .next(waitCallbackStep) + .listener(dropCohortTableListener) .listener(new AutoremoveJobListener(jobService)); } } diff --git a/src/main/java/org/ohdsi/webapi/pathway/PathwayServiceImpl.java b/src/main/java/org/ohdsi/webapi/pathway/PathwayServiceImpl.java index f7584e9d1c..38f63f8523 100644 --- a/src/main/java/org/ohdsi/webapi/pathway/PathwayServiceImpl.java +++ b/src/main/java/org/ohdsi/webapi/pathway/PathwayServiceImpl.java @@ -2,6 +2,7 @@ import com.cosium.spring.data.jpa.entity.graph.domain.EntityGraph; import com.google.common.base.MoreObjects; +import com.odysseusinc.arachne.commons.types.DBMSType; import org.hibernate.Hibernate; import org.ohdsi.circe.helper.ResourceHelper; import org.ohdsi.sql.SqlRender; @@ -324,7 +325,8 @@ public String buildAnalysisSql(Long generationId, PathwayAnalysisEntity pathwayA "pathway_target_cohort_id", "max_depth", "combo_window", - "allow_repeats" + "allow_repeats", + "isHive" }; String[] values = new String[]{ generationId.toString(), @@ -335,7 +337,8 @@ public String buildAnalysisSql(Long generationId, PathwayAnalysisEntity pathwayA tc.getCohortDefinition().getId().toString(), pathwayAnalysis.getMaxDepth().toString(), MoreObjects.firstNonNull(pathwayAnalysis.getCombinationWindow(), 1).toString(), - String.valueOf(pathwayAnalysis.isAllowRepeats()) + String.valueOf(pathwayAnalysis.isAllowRepeats()), + String.valueOf(Objects.equals(DBMSType.HIVE.getOhdsiDB(), source.getSourceDialect())) }; String renderedSql = SqlRender.renderSql(analysisSql, params, values); diff --git a/src/main/resources/resources/pathway/runPathwayAnalysis.sql b/src/main/resources/resources/pathway/runPathwayAnalysis.sql index 04b4df6513..26b79a9ee4 100644 --- a/src/main/resources/resources/pathway/runPathwayAnalysis.sql +++ b/src/main/resources/resources/pathway/runPathwayAnalysis.sql @@ -196,3 +196,6 @@ DROP TABLE #date_replacements; TRUNCATE TABLE #raw_events; DROP TABLE #raw_events; + +{@isHive == 'true'}?{DROP TABLE #person_dates; DROP TABLE #marked_dates; DROP TABLE #grouped_dates; DROP TABLE #replacements; +DROP TABLE #cohort_dates; DROP TABLE #time_periods; DROP TABLE #events;} \ No newline at end of file From 350e6e8ffacc66ad74cace65014ea8b6a9f8f459 Mon Sep 17 00:00:00 2001 From: ymolodkov Date: Tue, 27 Aug 2019 13:16:38 +0200 Subject: [PATCH 06/15] upgrade zip4j dependency dependency itself was updated in the common library, here I just fixed code. --- ...sResultFileContentSensitiveInfoServiceImpl.java | 14 ++++++-------- .../service/ScriptExecutionServiceImpl.java | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/ohdsi/webapi/executionengine/service/AnalysisResultFileContentSensitiveInfoServiceImpl.java b/src/main/java/org/ohdsi/webapi/executionengine/service/AnalysisResultFileContentSensitiveInfoServiceImpl.java index 97a0d32894..48e2f3cf14 100644 --- a/src/main/java/org/ohdsi/webapi/executionengine/service/AnalysisResultFileContentSensitiveInfoServiceImpl.java +++ b/src/main/java/org/ohdsi/webapi/executionengine/service/AnalysisResultFileContentSensitiveInfoServiceImpl.java @@ -1,7 +1,7 @@ package org.ohdsi.webapi.executionengine.service; import com.odysseusinc.arachne.execution_engine_common.util.CommonFileUtils; -import net.lingala.zip4j.core.ZipFile; +import net.lingala.zip4j.ZipFile; import net.lingala.zip4j.exception.ZipException; import org.apache.commons.collections.map.HashedMap; import org.apache.commons.io.FileUtils; @@ -156,11 +156,9 @@ private void processArchive(Path zipPath, Map variables) { // Delete archive volumes ZipFile zipFile = new ZipFile(zipPath.toFile()); - List filenames = zipFile.getSplitZipFiles(); - filenames.forEach(filename -> { - File file = new File(filename); - file.delete(); - }); + zipFile + .getSplitZipFiles() + .forEach(File::delete); Files.list(temporaryDir.toPath()).forEach(path -> { try { @@ -170,10 +168,10 @@ private void processArchive(Path zipPath, Map variables) { } }); CommonFileUtils.compressAndSplit(temporaryDir, zipPath.toFile(), null); - } catch (IOException e) { - LOGGER.error("File writing error", e); } catch (ZipException e) { LOGGER.error("Error unzipping file", e); + } catch (IOException e) { + LOGGER.error("File writing error", e); } finally { FileUtils.deleteQuietly(temporaryDir); } diff --git a/src/main/java/org/ohdsi/webapi/executionengine/service/ScriptExecutionServiceImpl.java b/src/main/java/org/ohdsi/webapi/executionengine/service/ScriptExecutionServiceImpl.java index 2603ea57bd..9783ca6e5f 100644 --- a/src/main/java/org/ohdsi/webapi/executionengine/service/ScriptExecutionServiceImpl.java +++ b/src/main/java/org/ohdsi/webapi/executionengine/service/ScriptExecutionServiceImpl.java @@ -145,7 +145,7 @@ public void runScript(Long executionId, Source source, List files, } finally { FileUtils.deleteQuietly(tempDir); } - }catch (ZipException | IOException e) { + }catch (IOException e) { log.error("Failed to compress request files", e); throw new InternalServerErrorException(e); } From 2498b539ee02d4e18887dccfe4220cd84659cb75 Mon Sep 17 00:00:00 2001 From: Vitaly Koulakov Date: Thu, 29 Aug 2019 18:55:18 +0300 Subject: [PATCH 07/15] changes to circe 1.8.1-SNAPHOT --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 1151c7601b..efba43d1cb 100644 --- a/pom.xml +++ b/pom.xml @@ -629,7 +629,7 @@ org.ohdsi circe - 1.8.1 + 1.8.1-SNAPSHOT org.ohdsi From a9d58ca0b5ff4d510c2eb085c72a62ae671ff198 Mon Sep 17 00:00:00 2001 From: ymolodkov Date: Mon, 9 Sep 2019 11:34:33 +0200 Subject: [PATCH 08/15] issue-1168 change hive driver version --- pom.xml | 48 +++++++++++++------ .../DropCohortTableListener.java | 2 +- ...ltFileContentSensitiveInfoServiceImpl.java | 39 ++++++++------- .../service/ScriptExecutionServiceImpl.java | 2 +- 4 files changed, 57 insertions(+), 34 deletions(-) diff --git a/pom.xml b/pom.xml index 1151c7601b..5e27a8d110 100644 --- a/pom.xml +++ b/pom.xml @@ -185,6 +185,8 @@ classpath:config/ehcache.xml + 1.6.3-SNAPSHOT + 3.1.0 WebAPI @@ -438,11 +440,13 @@ org.springframework.boot spring-boot-starter-web + org.springframework.boot spring-boot-starter-tomcat provided + org.springframework.boot spring-boot-starter-batch @@ -490,7 +494,7 @@ org.ohdsi.sql SqlRender - 1.6.3-SNAPSHOT + ${SqlRender.version} commons-dbutils @@ -629,7 +633,7 @@ org.ohdsi circe - 1.8.1 + 1.8.2-SNAPSHOT org.ohdsi @@ -899,7 +903,7 @@ true - ...path/to/impala/jdbc/drivers... + ${basedir}/src/main/extras/impala @@ -1192,17 +1196,33 @@ webapi-hive - - org.apache.hive - hive-jdbc - 3.1.0.3.0.1.0-187 - - - org.eclipse.jetty - * - - - + + org.apache.hive + hive-jdbc + ${hive-jdbc.version} + + + org.apache.tomcat + * + + + org.eclipse.jetty + * + + + slf4j-log4j12 + org.slf4j + + + log4j + log4j + + + servlet-api + javax.servlet + + + diff --git a/src/main/java/org/ohdsi/webapi/cohortcharacterization/DropCohortTableListener.java b/src/main/java/org/ohdsi/webapi/cohortcharacterization/DropCohortTableListener.java index 0c4d0280bf..2be0392656 100644 --- a/src/main/java/org/ohdsi/webapi/cohortcharacterization/DropCohortTableListener.java +++ b/src/main/java/org/ohdsi/webapi/cohortcharacterization/DropCohortTableListener.java @@ -48,7 +48,7 @@ private Object doTask(JobParameters parameters) { final String resultsQualifier = SourceUtils.getResultsQualifier(source); final String tempQualifier = SourceUtils.getTempQualifier(source, resultsQualifier); String toRemove = SqlTranslate.translateSql(sql, source.getSourceDialect(), null, tempQualifier); - if (Objects.equals(DBMSType.HIVE.getOhdsiDB(), source.getSourceDialect())){ + if (Objects.equals(DBMSType.HIVE.getOhdsiDB(), source.getSourceDialect())){ toRemove = StringUtils.remove(toRemove, ';'); } jdbcTemplate.execute(toRemove); diff --git a/src/main/java/org/ohdsi/webapi/executionengine/service/AnalysisResultFileContentSensitiveInfoServiceImpl.java b/src/main/java/org/ohdsi/webapi/executionengine/service/AnalysisResultFileContentSensitiveInfoServiceImpl.java index 97a0d32894..464661a313 100644 --- a/src/main/java/org/ohdsi/webapi/executionengine/service/AnalysisResultFileContentSensitiveInfoServiceImpl.java +++ b/src/main/java/org/ohdsi/webapi/executionengine/service/AnalysisResultFileContentSensitiveInfoServiceImpl.java @@ -1,7 +1,22 @@ package org.ohdsi.webapi.executionengine.service; +import static com.google.common.io.Files.createTempDir; + import com.odysseusinc.arachne.execution_engine_common.util.CommonFileUtils; -import net.lingala.zip4j.core.ZipFile; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import javax.annotation.PostConstruct; +import net.lingala.zip4j.ZipFile; import net.lingala.zip4j.exception.ZipException; import org.apache.commons.collections.map.HashedMap; import org.apache.commons.io.FileUtils; @@ -13,18 +28,6 @@ import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; -import javax.annotation.PostConstruct; -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import static com.google.common.io.Files.createTempDir; - @Service public class AnalysisResultFileContentSensitiveInfoServiceImpl extends AbstractSensitiveInfoService implements AnalysisResultFileContentSensitiveInfoService { private final String EXTENSION_ALL = "*"; @@ -156,9 +159,8 @@ private void processArchive(Path zipPath, Map variables) { // Delete archive volumes ZipFile zipFile = new ZipFile(zipPath.toFile()); - List filenames = zipFile.getSplitZipFiles(); - filenames.forEach(filename -> { - File file = new File(filename); + List filenames = zipFile.getSplitZipFiles(); + filenames.forEach(file -> { file.delete(); }); @@ -170,10 +172,11 @@ private void processArchive(Path zipPath, Map variables) { } }); CommonFileUtils.compressAndSplit(temporaryDir, zipPath.toFile(), null); - } catch (IOException e) { - LOGGER.error("File writing error", e); + } catch (ZipException e) { LOGGER.error("Error unzipping file", e); + } catch (IOException e) { + LOGGER.error("File writing error", e); } finally { FileUtils.deleteQuietly(temporaryDir); } diff --git a/src/main/java/org/ohdsi/webapi/executionengine/service/ScriptExecutionServiceImpl.java b/src/main/java/org/ohdsi/webapi/executionengine/service/ScriptExecutionServiceImpl.java index 2603ea57bd..9783ca6e5f 100644 --- a/src/main/java/org/ohdsi/webapi/executionengine/service/ScriptExecutionServiceImpl.java +++ b/src/main/java/org/ohdsi/webapi/executionengine/service/ScriptExecutionServiceImpl.java @@ -145,7 +145,7 @@ public void runScript(Long executionId, Source source, List files, } finally { FileUtils.deleteQuietly(tempDir); } - }catch (ZipException | IOException e) { + }catch (IOException e) { log.error("Failed to compress request files", e); throw new InternalServerErrorException(e); } From 779ab7bcbe8f3b893df3fdb7d3700f6fcde68dfc Mon Sep 17 00:00:00 2001 From: ymolodkov Date: Tue, 17 Sep 2019 14:20:23 +0200 Subject: [PATCH 09/15] issue-1168 generate hive specific ddl result schema --- .../org/ohdsi/webapi/service/DDLService.java | 78 ++++++++++++------- .../BatchStatementExecutorWithProgress.java | 6 ++ .../ddl/results/hive/heracles_results.sql | 14 ++++ .../results/hive/heracles_results_dist.sql | 23 ++++++ 4 files changed, 92 insertions(+), 29 deletions(-) create mode 100644 src/main/resources/ddl/results/hive/heracles_results.sql create mode 100644 src/main/resources/ddl/results/hive/heracles_results_dist.sql diff --git a/src/main/java/org/ohdsi/webapi/service/DDLService.java b/src/main/java/org/ohdsi/webapi/service/DDLService.java index 8c203a8164..bf6c61eef2 100644 --- a/src/main/java/org/ohdsi/webapi/service/DDLService.java +++ b/src/main/java/org/ohdsi/webapi/service/DDLService.java @@ -21,6 +21,7 @@ import static org.ohdsi.webapi.service.SqlRenderService.translateSQL; import java.util.*; +import java.util.stream.Collectors; import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; import javax.ws.rs.Path; @@ -28,6 +29,7 @@ import javax.ws.rs.QueryParam; import com.odysseusinc.arachne.commons.types.DBMSType; +import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.ObjectUtils; import org.ohdsi.circe.helper.ResourceHelper; import org.ohdsi.webapi.sqlrender.SourceStatement; @@ -43,34 +45,36 @@ public class DDLService { public static final String CEM_SCHEMA = "cem_results_schema"; public static final String TEMP_SCHEMA = "oracle_temp_schema"; - private static final Collection RESULT_DDL_FILE_PATHS = Arrays.asList( - "/ddl/results/cohort.sql", - "/ddl/results/cohort_features.sql", - "/ddl/results/cohort_features_analysis_ref.sql", - "/ddl/results/cohort_features_dist.sql", - "/ddl/results/cohort_features_ref.sql", - "/ddl/results/cohort_inclusion.sql", - "/ddl/results/cohort_inclusion_result.sql", - "/ddl/results/cohort_inclusion_stats.sql", - "/ddl/results/cohort_summary_stats.sql", - "/ddl/results/cohort_censor_stats.sql", - "/ddl/results/feas_study_inclusion_stats.sql", - "/ddl/results/feas_study_index_stats.sql", - "/ddl/results/feas_study_result.sql", - "/ddl/results/heracles_analysis.sql", - "/ddl/results/heracles_heel_results.sql", - "/ddl/results/heracles_results.sql", - "/ddl/results/heracles_results_dist.sql", - "/ddl/results/ir_analysis_dist.sql", - "/ddl/results/ir_analysis_result.sql", - "/ddl/results/ir_analysis_strata_stats.sql", - "/ddl/results/ir_strata.sql", - "/ddl/results/heracles_periods.sql", - "/ddl/results/cohort_characterizations.sql", - "/ddl/results/pathway_analysis_codes.sql", - "/ddl/results/pathway_analysis_events.sql", - "/ddl/results/pathway_analysis_paths.sql", - "/ddl/results/pathway_analysis_stats.sql" + + private static final String RESULT_DDL_ROOT = "/ddl/results"; + private static final Collection RESULT_DDL_FILE_NAMES = Arrays.asList( + "cohort.sql", + "cohort_features.sql", + "cohort_features_analysis_ref.sql", + "cohort_features_dist.sql", + "cohort_features_ref.sql", + "cohort_inclusion.sql", + "cohort_inclusion_result.sql", + "cohort_inclusion_stats.sql", + "cohort_summary_stats.sql", + "cohort_censor_stats.sql", + "feas_study_inclusion_stats.sql", + "feas_study_index_stats.sql", + "feas_study_result.sql", + "heracles_analysis.sql", + "heracles_heel_results.sql", + "heracles_results.sql", + "heracles_results_dist.sql", + "ir_analysis_dist.sql", + "ir_analysis_result.sql", + "ir_analysis_strata_stats.sql", + "ir_strata.sql", + "heracles_periods.sql", + "cohort_characterizations.sql", + "pathway_analysis_codes.sql", + "pathway_analysis_events.sql", + "pathway_analysis_paths.sql", + "pathway_analysis_stats.sql" ); private static final String INIT_HERACLES_PERIODS = "/ddl/results/init_heracles_periods.sql"; @@ -113,7 +117,7 @@ public String generateResultSQL( @DefaultValue("true") @QueryParam("initConceptHierarchy") Boolean initConceptHierarchy, @QueryParam("tempSchema") String tempSchema) { - Collection resultDDLFilePaths = new ArrayList<>(RESULT_DDL_FILE_PATHS); + Collection resultDDLFilePaths = getResultDDLFilePaths(dialect); if (initConceptHierarchy) { resultDDLFilePaths.addAll(INIT_CONCEPT_HIERARCHY_FILE_PATHS); @@ -170,6 +174,22 @@ private String generateSQL(String dialect, Map params, Collectio return result.replaceAll(";", ";\n"); } + private List getResultDDLFilePaths(@QueryParam("dialect") String dialect) { + + return new ArrayList<>(RESULT_DDL_FILE_NAMES).stream() + .map(fileName -> { + String dialectSpecificFilePath = String.format("%s/%s/%s", RESULT_DDL_ROOT, StringUtils.lowerCase(dialect), fileName); + if (isResourceFileExists(dialectSpecificFilePath)) { + return dialectSpecificFilePath; + } + return String.format("%s/%s", RESULT_DDL_ROOT, fileName); + }) + .collect(Collectors.toList()); + } + + private boolean isResourceFileExists(String fileName) { + return ResourceHelper.class.getResource(fileName) != null; + } private String translateSqlFile(String sql, String dialect, Map params) { SourceStatement statement = new SourceStatement(); diff --git a/src/main/java/org/ohdsi/webapi/util/BatchStatementExecutorWithProgress.java b/src/main/java/org/ohdsi/webapi/util/BatchStatementExecutorWithProgress.java index 3794ea28d3..95d1add972 100644 --- a/src/main/java/org/ohdsi/webapi/util/BatchStatementExecutorWithProgress.java +++ b/src/main/java/org/ohdsi/webapi/util/BatchStatementExecutorWithProgress.java @@ -1,6 +1,9 @@ package org.ohdsi.webapi.util; import org.apache.tika.concurrent.SimpleThreadPoolExecutor; +import org.ohdsi.webapi.user.importer.UserImportController; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.dao.DataAccessException; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.jdbc.core.StatementCallback; @@ -15,6 +18,8 @@ public class BatchStatementExecutorWithProgress { + private static final Logger logger = LoggerFactory.getLogger(BatchStatementExecutorWithProgress.class); + private String[] statements; private final TransactionTemplate transactionTemplate; @@ -41,6 +46,7 @@ public int[] execute(Consumer consumer){ try { for (int i = 0; i < totals; i++) { String stmt = statements[i]; + logger.debug("Btch query: {}", stmt); updateCount[i] = jdbcTemplate.execute((StatementCallback) st -> !st.execute(stmt) ? st.getUpdateCount() : 0); if (i % PROGRESS_UPDATE_SIZE == 0 || i == (totals - 1)) { int progress = (int) Math.round(100.0 * i / totals); diff --git a/src/main/resources/ddl/results/hive/heracles_results.sql b/src/main/resources/ddl/results/hive/heracles_results.sql new file mode 100644 index 0000000000..5606950309 --- /dev/null +++ b/src/main/resources/ddl/results/hive/heracles_results.sql @@ -0,0 +1,14 @@ +IF OBJECT_ID('@results_schema.heracles_results', 'U') IS NULL +create table @results_schema.heracles_results +( + analysis_id int, + stratum_1 varchar(255), + stratum_2 varchar(255), + stratum_3 varchar(255), + stratum_4 varchar(255), + stratum_5 varchar(255), + count_value bigint, + last_update_time timestamp +) +PARTITIONED BY(cohort_definition_id int) +clustered by (analysis_id) into 64 buckets; diff --git a/src/main/resources/ddl/results/hive/heracles_results_dist.sql b/src/main/resources/ddl/results/hive/heracles_results_dist.sql new file mode 100644 index 0000000000..2e641471e0 --- /dev/null +++ b/src/main/resources/ddl/results/hive/heracles_results_dist.sql @@ -0,0 +1,23 @@ +IF OBJECT_ID('@results_schema.heracles_results_dist', 'U') IS NULL +create table @results_schema.heracles_results_dist +( + analysis_id int, + stratum_1 varchar(255), + stratum_2 varchar(255), + stratum_3 varchar(255), + stratum_4 varchar(255), + stratum_5 varchar(255), + count_value bigint, + min_value float, + max_value float, + avg_value float, + stdev_value float, + median_value float, + p10_value float, + p25_value float, + p75_value float, + p90_value float, + last_update_time timestamp + ) +PARTITIONED BY(cohort_definition_id int) +clustered by (analysis_id) into 64 buckets; From 5cdf3fe241d6f65d8ce622b5b130664e4fc1ff18 Mon Sep 17 00:00:00 2001 From: ymolodkov Date: Tue, 17 Sep 2019 14:33:08 +0200 Subject: [PATCH 10/15] issue-1168 generate hive specific ddl result schema - add check for for empty dialect variable. --- .../java/org/ohdsi/webapi/service/DDLService.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/ohdsi/webapi/service/DDLService.java b/src/main/java/org/ohdsi/webapi/service/DDLService.java index bf6c61eef2..5cd88647b7 100644 --- a/src/main/java/org/ohdsi/webapi/service/DDLService.java +++ b/src/main/java/org/ohdsi/webapi/service/DDLService.java @@ -174,15 +174,20 @@ private String generateSQL(String dialect, Map params, Collectio return result.replaceAll(";", ";\n"); } - private List getResultDDLFilePaths(@QueryParam("dialect") String dialect) { - + private List getResultDDLFilePaths(String dialect) { return new ArrayList<>(RESULT_DDL_FILE_NAMES).stream() .map(fileName -> { + String filePath = String.format("%s/%s", RESULT_DDL_ROOT, fileName); + if (StringUtils.isEmpty(dialect)) { + return filePath; + } + String dialectSpecificFilePath = String.format("%s/%s/%s", RESULT_DDL_ROOT, StringUtils.lowerCase(dialect), fileName); if (isResourceFileExists(dialectSpecificFilePath)) { return dialectSpecificFilePath; } - return String.format("%s/%s", RESULT_DDL_ROOT, fileName); + + return filePath; }) .collect(Collectors.toList()); } From 6f139d0cd910840f739808ca7fdb494fe1a350fc Mon Sep 17 00:00:00 2001 From: ymolodkov Date: Tue, 17 Sep 2019 17:51:54 +0200 Subject: [PATCH 11/15] Revert "issue-1168 generate hive specific ddl result schema" --- .../org/ohdsi/webapi/service/DDLService.java | 83 +++++++------------ .../BatchStatementExecutorWithProgress.java | 6 -- .../ddl/results/hive/heracles_results.sql | 14 ---- .../results/hive/heracles_results_dist.sql | 23 ----- 4 files changed, 29 insertions(+), 97 deletions(-) delete mode 100644 src/main/resources/ddl/results/hive/heracles_results.sql delete mode 100644 src/main/resources/ddl/results/hive/heracles_results_dist.sql diff --git a/src/main/java/org/ohdsi/webapi/service/DDLService.java b/src/main/java/org/ohdsi/webapi/service/DDLService.java index 5cd88647b7..8c203a8164 100644 --- a/src/main/java/org/ohdsi/webapi/service/DDLService.java +++ b/src/main/java/org/ohdsi/webapi/service/DDLService.java @@ -21,7 +21,6 @@ import static org.ohdsi.webapi.service.SqlRenderService.translateSQL; import java.util.*; -import java.util.stream.Collectors; import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; import javax.ws.rs.Path; @@ -29,7 +28,6 @@ import javax.ws.rs.QueryParam; import com.odysseusinc.arachne.commons.types.DBMSType; -import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.ObjectUtils; import org.ohdsi.circe.helper.ResourceHelper; import org.ohdsi.webapi.sqlrender.SourceStatement; @@ -45,36 +43,34 @@ public class DDLService { public static final String CEM_SCHEMA = "cem_results_schema"; public static final String TEMP_SCHEMA = "oracle_temp_schema"; - - private static final String RESULT_DDL_ROOT = "/ddl/results"; - private static final Collection RESULT_DDL_FILE_NAMES = Arrays.asList( - "cohort.sql", - "cohort_features.sql", - "cohort_features_analysis_ref.sql", - "cohort_features_dist.sql", - "cohort_features_ref.sql", - "cohort_inclusion.sql", - "cohort_inclusion_result.sql", - "cohort_inclusion_stats.sql", - "cohort_summary_stats.sql", - "cohort_censor_stats.sql", - "feas_study_inclusion_stats.sql", - "feas_study_index_stats.sql", - "feas_study_result.sql", - "heracles_analysis.sql", - "heracles_heel_results.sql", - "heracles_results.sql", - "heracles_results_dist.sql", - "ir_analysis_dist.sql", - "ir_analysis_result.sql", - "ir_analysis_strata_stats.sql", - "ir_strata.sql", - "heracles_periods.sql", - "cohort_characterizations.sql", - "pathway_analysis_codes.sql", - "pathway_analysis_events.sql", - "pathway_analysis_paths.sql", - "pathway_analysis_stats.sql" + private static final Collection RESULT_DDL_FILE_PATHS = Arrays.asList( + "/ddl/results/cohort.sql", + "/ddl/results/cohort_features.sql", + "/ddl/results/cohort_features_analysis_ref.sql", + "/ddl/results/cohort_features_dist.sql", + "/ddl/results/cohort_features_ref.sql", + "/ddl/results/cohort_inclusion.sql", + "/ddl/results/cohort_inclusion_result.sql", + "/ddl/results/cohort_inclusion_stats.sql", + "/ddl/results/cohort_summary_stats.sql", + "/ddl/results/cohort_censor_stats.sql", + "/ddl/results/feas_study_inclusion_stats.sql", + "/ddl/results/feas_study_index_stats.sql", + "/ddl/results/feas_study_result.sql", + "/ddl/results/heracles_analysis.sql", + "/ddl/results/heracles_heel_results.sql", + "/ddl/results/heracles_results.sql", + "/ddl/results/heracles_results_dist.sql", + "/ddl/results/ir_analysis_dist.sql", + "/ddl/results/ir_analysis_result.sql", + "/ddl/results/ir_analysis_strata_stats.sql", + "/ddl/results/ir_strata.sql", + "/ddl/results/heracles_periods.sql", + "/ddl/results/cohort_characterizations.sql", + "/ddl/results/pathway_analysis_codes.sql", + "/ddl/results/pathway_analysis_events.sql", + "/ddl/results/pathway_analysis_paths.sql", + "/ddl/results/pathway_analysis_stats.sql" ); private static final String INIT_HERACLES_PERIODS = "/ddl/results/init_heracles_periods.sql"; @@ -117,7 +113,7 @@ public String generateResultSQL( @DefaultValue("true") @QueryParam("initConceptHierarchy") Boolean initConceptHierarchy, @QueryParam("tempSchema") String tempSchema) { - Collection resultDDLFilePaths = getResultDDLFilePaths(dialect); + Collection resultDDLFilePaths = new ArrayList<>(RESULT_DDL_FILE_PATHS); if (initConceptHierarchy) { resultDDLFilePaths.addAll(INIT_CONCEPT_HIERARCHY_FILE_PATHS); @@ -174,27 +170,6 @@ private String generateSQL(String dialect, Map params, Collectio return result.replaceAll(";", ";\n"); } - private List getResultDDLFilePaths(String dialect) { - return new ArrayList<>(RESULT_DDL_FILE_NAMES).stream() - .map(fileName -> { - String filePath = String.format("%s/%s", RESULT_DDL_ROOT, fileName); - if (StringUtils.isEmpty(dialect)) { - return filePath; - } - - String dialectSpecificFilePath = String.format("%s/%s/%s", RESULT_DDL_ROOT, StringUtils.lowerCase(dialect), fileName); - if (isResourceFileExists(dialectSpecificFilePath)) { - return dialectSpecificFilePath; - } - - return filePath; - }) - .collect(Collectors.toList()); - } - - private boolean isResourceFileExists(String fileName) { - return ResourceHelper.class.getResource(fileName) != null; - } private String translateSqlFile(String sql, String dialect, Map params) { SourceStatement statement = new SourceStatement(); diff --git a/src/main/java/org/ohdsi/webapi/util/BatchStatementExecutorWithProgress.java b/src/main/java/org/ohdsi/webapi/util/BatchStatementExecutorWithProgress.java index 95d1add972..3794ea28d3 100644 --- a/src/main/java/org/ohdsi/webapi/util/BatchStatementExecutorWithProgress.java +++ b/src/main/java/org/ohdsi/webapi/util/BatchStatementExecutorWithProgress.java @@ -1,9 +1,6 @@ package org.ohdsi.webapi.util; import org.apache.tika.concurrent.SimpleThreadPoolExecutor; -import org.ohdsi.webapi.user.importer.UserImportController; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.springframework.dao.DataAccessException; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.jdbc.core.StatementCallback; @@ -18,8 +15,6 @@ public class BatchStatementExecutorWithProgress { - private static final Logger logger = LoggerFactory.getLogger(BatchStatementExecutorWithProgress.class); - private String[] statements; private final TransactionTemplate transactionTemplate; @@ -46,7 +41,6 @@ public int[] execute(Consumer consumer){ try { for (int i = 0; i < totals; i++) { String stmt = statements[i]; - logger.debug("Btch query: {}", stmt); updateCount[i] = jdbcTemplate.execute((StatementCallback) st -> !st.execute(stmt) ? st.getUpdateCount() : 0); if (i % PROGRESS_UPDATE_SIZE == 0 || i == (totals - 1)) { int progress = (int) Math.round(100.0 * i / totals); diff --git a/src/main/resources/ddl/results/hive/heracles_results.sql b/src/main/resources/ddl/results/hive/heracles_results.sql deleted file mode 100644 index 5606950309..0000000000 --- a/src/main/resources/ddl/results/hive/heracles_results.sql +++ /dev/null @@ -1,14 +0,0 @@ -IF OBJECT_ID('@results_schema.heracles_results', 'U') IS NULL -create table @results_schema.heracles_results -( - analysis_id int, - stratum_1 varchar(255), - stratum_2 varchar(255), - stratum_3 varchar(255), - stratum_4 varchar(255), - stratum_5 varchar(255), - count_value bigint, - last_update_time timestamp -) -PARTITIONED BY(cohort_definition_id int) -clustered by (analysis_id) into 64 buckets; diff --git a/src/main/resources/ddl/results/hive/heracles_results_dist.sql b/src/main/resources/ddl/results/hive/heracles_results_dist.sql deleted file mode 100644 index 2e641471e0..0000000000 --- a/src/main/resources/ddl/results/hive/heracles_results_dist.sql +++ /dev/null @@ -1,23 +0,0 @@ -IF OBJECT_ID('@results_schema.heracles_results_dist', 'U') IS NULL -create table @results_schema.heracles_results_dist -( - analysis_id int, - stratum_1 varchar(255), - stratum_2 varchar(255), - stratum_3 varchar(255), - stratum_4 varchar(255), - stratum_5 varchar(255), - count_value bigint, - min_value float, - max_value float, - avg_value float, - stdev_value float, - median_value float, - p10_value float, - p25_value float, - p75_value float, - p90_value float, - last_update_time timestamp - ) -PARTITIONED BY(cohort_definition_id int) -clustered by (analysis_id) into 64 buckets; From 595e7cf05381c662b84d9544d02002074925ff94 Mon Sep 17 00:00:00 2001 From: ymolodkov Date: Tue, 17 Sep 2019 17:53:51 +0200 Subject: [PATCH 12/15] issue-1168 use HINTS to generate partition and bucket --- src/main/resources/ddl/results/heracles_results.sql | 2 ++ src/main/resources/ddl/results/heracles_results_dist.sql | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/main/resources/ddl/results/heracles_results.sql b/src/main/resources/ddl/results/heracles_results.sql index 3fd8431ce2..8ae9e40eef 100644 --- a/src/main/resources/ddl/results/heracles_results.sql +++ b/src/main/resources/ddl/results/heracles_results.sql @@ -1,3 +1,5 @@ +--HINT PARTITION(cohort_definition_id) +--HINT BUCKET(analysis_id, 64) IF OBJECT_ID('@results_schema.heracles_results', 'U') IS NULL create table @results_schema.heracles_results ( diff --git a/src/main/resources/ddl/results/heracles_results_dist.sql b/src/main/resources/ddl/results/heracles_results_dist.sql index d6e6307e2f..8c84300d19 100644 --- a/src/main/resources/ddl/results/heracles_results_dist.sql +++ b/src/main/resources/ddl/results/heracles_results_dist.sql @@ -1,3 +1,5 @@ +--HINT PARTITION(cohort_definition_id) +--HINT BUCKET(analysis_id, 64) IF OBJECT_ID('@results_schema.heracles_results_dist', 'U') IS NULL create table @results_schema.heracles_results_dist ( From 04cdf444e7a6409da89e991ab3e424cec6275270 Mon Sep 17 00:00:00 2001 From: YaroslavTir Date: Mon, 30 Sep 2019 13:44:33 +0200 Subject: [PATCH 13/15] issue-1168 use HINTS to generate partition and bucket --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 5e27a8d110..7ea987460a 100644 --- a/pom.xml +++ b/pom.xml @@ -633,7 +633,7 @@ org.ohdsi circe - 1.8.2-SNAPSHOT + 1.8.1-SNAPSHOT org.ohdsi From a95ce2740e257fe9f328ae8d7d72b362877d2da9 Mon Sep 17 00:00:00 2001 From: Vitaly Koulakov Date: Fri, 1 Nov 2019 17:28:39 +0300 Subject: [PATCH 14/15] fix dependency conflict --- pom.xml | 191 +++++++++++++++++++------------------------------------- 1 file changed, 63 insertions(+), 128 deletions(-) diff --git a/pom.xml b/pom.xml index 7ea987460a..bda99277e9 100644 --- a/pom.xml +++ b/pom.xml @@ -186,7 +186,7 @@ classpath:config/ehcache.xml 1.6.3-SNAPSHOT - 3.1.0 + 3.1.2 WebAPI @@ -902,50 +902,15 @@ webapi-impala true - - ${basedir}/src/main/extras/impala + 2.6.15 + + ...path/to/impala/jdbc/drivers... - - hive - hive_metastore - 1.0.0 - - - hive - hive_service - 1.0.0 - com.cloudera - ImpalaJDBC41 - 2.5.43 - - - libfb - libfb303 - 0.9.0 - - - libthrift - libthrift - 0.9.0 - - - ql - ql - 1.0.0 - - - TCLI - TCLI - 1.0.0 - - - org.apache.zookeeper - zookeeper - 3.4.6 - pom + ImpalaJDBC42 + ${impala.driver.version} @@ -963,94 +928,10 @@ com.cloudera - ImpalaJDBC41 - 2.5.43 - jar - ${impala.classpath}/ImpalaJDBC41.jar - - - - hive_metastore - initialize - - install-file - - - hive - hive_metastore - 1.0.0 - jar - ${impala.classpath}/hive_metastore.jar - - - - hive_service - initialize - - install-file - - - hive - hive_service - 1.0.0 - jar - ${impala.classpath}/hive_service.jar - - - - libfb - initialize - - install-file - - - libfb - libfb303 - 0.9.0 - jar - ${impala.classpath}/libfb303-0.9.0.jar - - - - libthrift - initialize - - install-file - - - libthrift - libthrift - 0.9.0 - jar - ${impala.classpath}/libthrift-0.9.0.jar - - - - ql - initialize - - install-file - - - ql - ql - 1.0.0 + ImpalaJDBC42 + ${impala.driver.version} jar - ${impala.classpath}/ql.jar - - - - TCLI - initialize - - install-file - - - TCLI - TCLI - 1.0.0 - jar - ${impala.classpath}/TCLIServiceClient.jar + ${impala.classpath}/ImpalaJDBC42.jar @@ -1221,8 +1102,62 @@ servlet-api javax.servlet + + com.zaxxer + HikariCP + + + com.zaxxer + HikariCP-java7 + + + javax.ws.rs + javax.ws.rs-api + + + com.sun.jersey + jersey-server + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-client + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-server + 1.19.4 + + + com.sun.jersey + jersey-core + 1.19.4 + + + javax.ws.rs + jsr311-api + + + + + com.sun.jersey + jersey-client + 1.19.4 + + + com.sun.jersey + jersey-json + 1.19.4 + From 22e5fef961fdaf37932538d0ca7a919733692982 Mon Sep 17 00:00:00 2001 From: Vitaly Koulakov Date: Tue, 5 Nov 2019 11:52:24 +0300 Subject: [PATCH 15/15] fix dependency conflict --- pom.xml | 4 ++++ .../java/org/ohdsi/webapi/source/SourceDaimon.java | 13 +++---------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/pom.xml b/pom.xml index bda99277e9..f66c7666ee 100644 --- a/pom.xml +++ b/pom.xml @@ -1130,6 +1130,10 @@ com.sun.jersey jersey-json + + com.tdunning + json + diff --git a/src/main/java/org/ohdsi/webapi/source/SourceDaimon.java b/src/main/java/org/ohdsi/webapi/source/SourceDaimon.java index 16d7a7240a..4a608b3328 100644 --- a/src/main/java/org/ohdsi/webapi/source/SourceDaimon.java +++ b/src/main/java/org/ohdsi/webapi/source/SourceDaimon.java @@ -17,15 +17,8 @@ import com.fasterxml.jackson.annotation.JsonIgnore; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; -import javax.persistence.GeneratedValue; -import javax.persistence.Id; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.Table; +import javax.persistence.*; + import org.hibernate.annotations.GenericGenerator; import org.hibernate.annotations.Parameter; import org.hibernate.annotations.SQLDelete; @@ -65,7 +58,7 @@ public SourceDaimon(Source source) { @Column(name="SOURCE_DAIMON_ID") private int sourceDaimonId; - @ManyToOne + @ManyToOne(fetch = FetchType.LAZY) @JsonIgnore @JoinColumn(name="SOURCE_ID", referencedColumnName="SOURCE_ID") private Source source;