From c99a0be29ccf8689ef0e7965d9d15ffaf1b1e786 Mon Sep 17 00:00:00 2001 From: molodkov yaroslav Date: Tue, 11 Feb 2020 14:14:23 +0100 Subject: [PATCH] QA 1.16.0 (#273) * ATLP-446 added kerberos in image added validation check for krb user * New version: 1.14.0 * ATLP-446 fixed user checking * ATLP-446 add ability to append realms to krb5.conf * ATLP-446. Fixed notes * ATLP-446. Moved kinit to jail * enable kerberos for chroot'ed env * enable kerberos for chroot'ed env * ATLP-446. Fixed notes * ARACHNE-2359. Back-merge of multi-statement SQL files execution fix * ATLP-446. Made kinit in chroot mode * ATLP-446. Changed krb auth enum class * ATLP-446. added spaces to template * ATLP-446. removed extra lines * ATLP-446. turned off cleanup environment for debug * ATLP-446. reverted debug notes back * ATLP-446. set krb admin server properly * ATLP-446. Fixed notes * ATLP-446. Removed unnecessary dependency * ATLP-446. Added condition for kerberos init, fixed notes * Refactoring * ATLP-446. made some refactoring * ATLP-446. made some refactoring * ATLP-446. Added [domain-realm] section for correct login to impala (#145) * ATLP-446. Fixed cp command in jail.sh (#147) * ATLP-446. Fixed execution of kinit on host (#150) * ATLP-446. Extended krb5.conf both for host and for jail areas (#151) * ATLP-524. Replaced kerberos login logic with module from ArachneCommons (#159) * Added login to kerberos by password (#160) * ARACHNE-2555. Added connection pooling for DN health check (#154) (cherry picked from commit 2bceaad) * ARACHNE-2556 EE: set default timezone in docker container to UTC (#155) (cherry picked from commit 4de1a00) * Fixed dockerImagePrefix * Switched container timezone from UTC to GMT (#158) (cherry picked from commit e4187dd) * SqlRender version (cherry picked from commit 10c9884) * Reverted dockerImagePrefix * ARACHNE-2575. Changed key used in DataSource-s cache to get a new connection when ds username/password changed (#169) * Changed ojdbc6 and impala driver versions (#170) * ARACHNE-2627. Added logging and moved metadata extraction into async mode [back_merge] (#172) * Fix R libraries versions (#178) * Fix R libraries versions for Atlas 2.7 (#179) * Adds Support of BQ keyfile json for authentication of EE jobs (#180) * Updates DatabaseConnector version (#184) * Upgrades R packages versions (#186) * Adds Netezza support (#185) * Adds cron scheduled job to refresh Kerberos ticket (#187) * Added parameter to define whether execution results should be deleted after callback (#190) * Prevents R packages updates (#191) * Exposes data source name as Environment variable (#194) * Excluded extra folders from analyses results (#196) * added default exclusion for analysis * Added R packages for new CC analysis. Adds ANALYSIS_ID env param (#195) * added DATA_SOURCE_NAME to params (#201) * Added synchronous mode of analysis execution (#202) * Added synchronous mode of analysis execution * Cleanup * Cleanup * Forgotten diff for sync execution mode (#204) * Added synchronous mode of analysis execution * Cleanup * Cleanup * Forgotten diff for sync mode * Updated version of r-env. Removed unused docker files (#206) (#208) (#209) * Updated version of r-env. Removed unused docker files (#206) * rearranged libraries (#206) (cherry picked from commit 1783b94ec22d8ce5138f548c1395c576c709c79e) * Back merge from 1.14 & version increment (#205) * Build JAR that can be used as a dependency in other projects additionally to executable one (#211) * Use R libs file from DockerEnv repo to avoid duplication (#210) * Fixes build process conforming PLP deps (#216) * Fixes docker build after classifier has been set (#217) * Attempt to fix docker build * Fixes docker build * run apt-get update in non-interactive mode * Remove redundant dependencies (#218) * Adds geospatial libs (#219) * Fixes user home dir (#222) * improve swagger documentation (#220) * Re-uses DataSourceAuthResolver-s from arachne-commons (#230) * Fixes NPE during non-Kerberos auth (#231) * Updates R-env image version (#232) * Develop backmerge from qa (#237) * change r-env version (#240) * Upgrade to zip4j 2.1.1 (#242) * Upgrade to zip4j 2.1.1 * Removed unused ZipException declarations * Fixes removal of Keyfile from result zip (#246) * Fixes R dist build. Fixes jail mode for BQ (#248) * Back merge release 1.15.0 (#266) * ARACHNE-2359. Back-merge of multi-statement SQL files execution fix (#138) * ARACHNE-2359. Merge fix (#140) * ATLP-446. Added keytab propagation for Impala (#137) * ATLP-446. Added [domain-realm] section for correct login to impala (#144) * ATLP-446. Fixed cp command in jail.sh (#146) * ATLP-446. Fixed kinit command for user-password mode (#148) * ATLP-446. Fixed execution of kinit on host (#149) * ATLP-446. Extended krb5.conf both for host and for jail areas (#152) * ARACHNE-2556 EE: set default timezone in docker container to UTC (#155) * ARACHNE-2555. Added connection pooling for DN health check (#154) (cherry picked from commit 2bceaad) * Fixed * Fixed * Fixed * Switched container timezone from UTC to GMT (#158) (cherry picked from commit e4187dd) * SqlRender version * Reverted dockerImagePrefix (#165) * ARACHNE-2575. Changed key used in DataSource-s cache to get a new connection when ds username/password changed [back-merge] (#167) * ojdbc version fix * ARACHNE-2627. Added logging and moved metadata extraction into async mode (#171) * fix DatabaseConnector & FeatureExtraction versions (#174) * ARACHNE-2648 Datanode health check does not work as expected * update version to 1.13.1-QA (#188) * 1.14.0-QA (#197) * Merge fix * Merge fix #2 * Removed redundant metadata extraction (merge issue) (#199) * Added DATA_SOURCE_NAME to chroot params (#201) (cherry picked from commit d0c4814) * Start R scripts in isolated PID namespace when chroot mode is enabled (#224) * Start R scripts in isolated PID namespace when chroot mode is enabled * Do not show env variables in ps * Attempt to fix script * fixes issue with non-root user * removes redundant param * Qa backmerge from master (#235) * ARACHNE-1717 1.10.0 * ARACHNE-1854 Updated version * ARACHNE-2005 Release 1.11.2 * ARACHNE-2106 Release 1.11.3 * ARACHNE-2357 Release 1.12.1 * ARACHNE-1668. Updated drivers to support Postgres 10 (cherry picked from commit 41fb866) * Changed version 1.12.2-qa -> 1.12.2 * ARACHNE-2521. Fixed return of execution status * ARACHNE-2539. Fixed execution of SQL containing multiple queries per file in Oracle * Release 1.12.3 * ARACHNE-2555. Added connection pooling for DN health check (#154) * ARACHNE-2556 EE: set default timezone in docker container to UTC (#157) * Switched container timezone from UTC to GMT (#158) * Release 1.14.0 (#203) * Updated version of r-env. Removed unused docker files (#206) (#208) * Updated version of r-env. Removed unused docker files (#206) * rearranged libraries (#206) * change version to qa * change version to qa (#236) * change impala jdbc driver version (#238) * change impala jdbc driver version * change impala jdbc driver version * fix readme * fixes download link * change r-env version (#241) * QA 1.15.0 (#244) * Fixes removal of Keyfile from result zip (#247) * Fixes R dist build. Fixes jail mode for BQ (#249) * Removes extra installs. Updates to reflect changes from R-env (#251) * Adds CDM v5.3.1 detection (#253) * ARACHNE-2722 set right version for sql-render (#261) * Updates python to v3.6 making PLP models run succesfully (#257) * Fixes issue with ojdbc and timezone region (#262) * ARACHNE-2722 - Cohort (simple counts) fails on BigQuery (#264) - use SingleStatementSqlExecutor for BigQuery request * Optimize cdm detect (#267) * optimization for CDM version detection * Hello Spring boot 2! * Fix logger configuration - failing after spring boot 2 migration (#268) * Migrate deprecated properties to spring-boot-2 schema (#269) * Migrate deprecated properties to spring-boot-2 schema Fix tomcat 9.0.26 packed into spring boot 2.1.9 is have a bug, rolling back to 2.1.8 to stable tomcat 9.0.25 - failing after spring boot 2 migration * downgrade embedded tomcat to 9.0.24 * upgrade embedded tomcat to 9.0.27 * Adds support for Apache Hive LLAP (#272) Fixes: https://github.com/OHDSI/WebAPI/issues/1168 Adds support for Apache Hive LLAP Co-authored-by: anastasiia klochkova <38942654+aklochkova@users.noreply.github.com> Co-authored-by: Anton Gackovka Co-authored-by: Vitaly Koulakov Co-authored-by: Pavel Grafkin Co-authored-by: mpozhidaeva <31308645+mpozhidaeva@users.noreply.github.com> Co-authored-by: OdysseusBot <34305057+OdysseusBot@users.noreply.github.com> Co-authored-by: ssuvorov-fls Co-authored-by: AntonStepanof <48437569+AntonStepanof@users.noreply.github.com> Co-authored-by: Alex Cumarav <7705583+acumarav@users.noreply.github.com> --- pom.xml | 253 +++++++++--------- src/main/dist/run_build.sh | 19 +- .../properties/HiveBulkLoadProperties.java | 136 ++++++++++ .../service/VersionDetectionService.java | 33 +++ .../VersionDetectionServiceFactory.java | 30 +++ .../service/impl/AnalysisServiceImpl.java | 5 +- .../impl/BaseVersionDetectionService.java | 141 ++++++++++ .../service/impl/CdmMetadataServiceImpl.java | 126 ++++----- .../impl/DefaultVersionDetectionService.java | 141 ++++++++++ .../impl/ImpalaVersionDetectionService.java | 88 ++++++ .../service/impl/RuntimeServiceImpl.java | 35 ++- .../VersionDetectionServiceFactoryImpl.java | 51 ++++ .../sql/SqlMetadataServiceFactory.java | 1 + .../executionengine/util/DateUtil.java | 8 + src/main/resources/application-base.yml | 39 ++- src/main/resources/application-dev.yml | 3 +- src/main/resources/cdm/v4/cdm_V4_0.json | 10 + src/main/resources/cdm/v4/tableCheck.sql | 16 -- src/main/resources/cdm/v5/cdm_commons.json | 37 +++ src/main/resources/cdm/v5/diff_V5_0.json | 14 + src/main/resources/cdm/v5/diff_V5_0_1.json | 12 + src/main/resources/cdm/v5/diff_V5_1.json | 16 ++ src/main/resources/cdm/v5/diff_V5_2.json | 17 ++ src/main/resources/cdm/v5/diff_V5_3.json | 19 ++ src/main/resources/cdm/v5/diff_V5_3_1.json | 19 ++ src/main/resources/cdm/v5/tableCheck_V5_0.sql | 79 ------ .../resources/cdm/v5/tableCheck_V5_0_1.sql | 74 ----- src/main/resources/cdm/v5/tableCheck_V5_1.sql | 74 ----- src/main/resources/cdm/v5/tableCheck_V5_2.sql | 76 ------ src/main/resources/cdm/v5/tableCheck_V5_3.sql | 80 ------ src/main/resources/cdm/v6/cdm_V6_0.json | 39 +++ 31 files changed, 1055 insertions(+), 636 deletions(-) create mode 100644 src/main/java/com/odysseusinc/arachne/executionengine/config/properties/HiveBulkLoadProperties.java create mode 100644 src/main/java/com/odysseusinc/arachne/executionengine/service/VersionDetectionService.java create mode 100644 src/main/java/com/odysseusinc/arachne/executionengine/service/VersionDetectionServiceFactory.java create mode 100644 src/main/java/com/odysseusinc/arachne/executionengine/service/impl/BaseVersionDetectionService.java create mode 100644 src/main/java/com/odysseusinc/arachne/executionengine/service/impl/DefaultVersionDetectionService.java create mode 100644 src/main/java/com/odysseusinc/arachne/executionengine/service/impl/ImpalaVersionDetectionService.java create mode 100644 src/main/java/com/odysseusinc/arachne/executionengine/service/impl/VersionDetectionServiceFactoryImpl.java create mode 100644 src/main/resources/cdm/v4/cdm_V4_0.json delete mode 100644 src/main/resources/cdm/v4/tableCheck.sql create mode 100644 src/main/resources/cdm/v5/cdm_commons.json create mode 100644 src/main/resources/cdm/v5/diff_V5_0.json create mode 100644 src/main/resources/cdm/v5/diff_V5_0_1.json create mode 100644 src/main/resources/cdm/v5/diff_V5_1.json create mode 100644 src/main/resources/cdm/v5/diff_V5_2.json create mode 100644 src/main/resources/cdm/v5/diff_V5_3.json create mode 100644 src/main/resources/cdm/v5/diff_V5_3_1.json delete mode 100644 src/main/resources/cdm/v5/tableCheck_V5_0.sql delete mode 100644 src/main/resources/cdm/v5/tableCheck_V5_0_1.sql delete mode 100644 src/main/resources/cdm/v5/tableCheck_V5_1.sql delete mode 100644 src/main/resources/cdm/v5/tableCheck_V5_2.sql delete mode 100644 src/main/resources/cdm/v5/tableCheck_V5_3.sql create mode 100644 src/main/resources/cdm/v6/cdm_V6_0.json diff --git a/pom.xml b/pom.xml index 4efb84d1..5f7ead1a 100644 --- a/pom.xml +++ b/pom.xml @@ -6,13 +6,13 @@ execution_engine com.odysseusinc.arachne - 1.15.1-QA + 1.16.0-SNAPSHOT jar 1.8 UTF-8 - 1.5.4.RELEASE + 2.1.9.RELEASE com.odysseusinc.arachne.executionengine.ExecutionEngineStarter @@ -23,12 +23,13 @@ 42.2.1 1.3.1 5.1.30 + 9.0.27 org.springframework.boot spring-boot-starter-parent - 1.5.4.RELEASE + 2.1.9.RELEASE @@ -147,7 +148,7 @@ org.ohdsi.sql SqlRender - 1.6.3 + 1.6.3-SNAPSHOT @@ -167,6 +168,18 @@ 2.7.2 + + com.odysseusinc + data-source-manager + ${project.version} + + + + org.springframework.boot + spring-boot-configuration-processor + true + + @@ -205,49 +218,14 @@ impala /impala/ + 2.6.15 - - hive - hive_metastore - 1.0.0 - - - hive - hive_service - 1.0.0 - com.cloudera - ImpalaJDBC4 - 2.5.42 - - - libfb - libfb303 - 0.9.0 - - - libthrift - libthrift - 0.9.0 - - - ql - ql - 1.0.0 - - - TCLI - TCLI - 1.0.0 - - - org.apache.zookeeper - zookeeper - 3.4.6 - pom + ImpalaJDBC42 + ${impala.driver.version} org.slf4j @@ -272,94 +250,10 @@ com.cloudera - ImpalaJDBC4 - 2.5.42 - jar - ${impala.classpath}/ImpalaJDBC41.jar - - - - hive_metastore - initialize - - install-file - - - hive - hive_metastore - 1.0.0 + ImpalaJDBC42 + ${impala.driver.version} jar - ${impala.classpath}/hive_metastore.jar - - - - hive_service - initialize - - install-file - - - hive - hive_service - 1.0.0 - jar - ${impala.classpath}/hive_service.jar - - - - libfb - initialize - - install-file - - - libfb - libfb303 - 0.9.0 - jar - ${impala.classpath}/libfb303-0.9.0.jar - - - - libthrift - initialize - - install-file - - - libthrift - libthrift - 0.9.0 - jar - ${impala.classpath}/libthrift-0.9.0.jar - - - - ql - initialize - - install-file - - - ql - ql - 1.0.0 - jar - ${impala.classpath}/ql.jar - - - - TCLI - initialize - - install-file - - - TCLI - TCLI - 1.0.0 - jar - ${impala.classpath}/TCLIServiceClient.jar + ${impala.classpath}/ImpalaJDBC42.jar @@ -367,6 +261,99 @@ + + hive + + 3.1.2 + + + + org.apache.hive + hive-jdbc + ${hive-jdbc.version} + + + org.apache.tomcat + * + + + org.eclipse.jetty + * + + + slf4j-log4j12 + org.slf4j + + + log4j + log4j + + + servlet-api + javax.servlet + + + com.zaxxer + HikariCP + + + com.zaxxer + HikariCP-java7 + + + javax.ws.rs + javax.ws.rs-api + + + com.sun.jersey + jersey-server + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-client + + + com.sun.jersey + jersey-json + + + com.tdunning + json + + + + + com.sun.jersey + jersey-server + 1.19.4 + + + com.sun.jersey + jersey-core + 1.19.4 + + + javax.ws.rs + jsr311-api + + + + + com.sun.jersey + jersey-client + 1.19.4 + + + com.sun.jersey + jersey-json + 1.19.4 + + + bigquery @@ -562,7 +549,7 @@ org.springframework.boot spring-boot-maven-plugin - 1.5.0.RELEASE + ${spring.boot.version} @@ -570,6 +557,12 @@ exec + + + com.cloudera + ImpalaJDBC42 + + diff --git a/src/main/dist/run_build.sh b/src/main/dist/run_build.sh index 3aaf7347..6749c6b1 100755 --- a/src/main/dist/run_build.sh +++ b/src/main/dist/run_build.sh @@ -1,7 +1,6 @@ #!/usr/bin/env bash DIST=trusty -CRAN_DIST= ARCH=amd64 BUILD_PATH=./dist WS=`dirname $0` @@ -9,23 +8,24 @@ WS=`dirname $0` BQ_PATH=../extras/bigquery/ IMPALA_PATH=../extras/impala/ NETEZZA_PATH=../extras/netezza/ +HIVE_PATH=../extras/hive/ function print_help { echo "Usage: run_build.sh [OPTIONS]" echo "Available options are:" echo -e " -a i386|amd64 \tDistribution architecture, default is amd64" echo -e " -d DIST_NAME \t\tUbuntu distribution name, e.g. trusty or xenial, default is trusty" - echo -e " -r R_DIST_NAME \t\tUbuntu distribution name from cran with R packages, default is the same as used for DIST_NAME" echo -e " -b BUILDDIR \t\tDirectory where distribution build would be running" echo -e " -f FILE \t\tOutput archive filename" echo -e " -g PATH \t\tPath to BigQuery drivers" echo -e " -i PATH \t\tPath to Impala drivers" echo -e " -n PATH \t\tPath to Netezza drivers" + echo -e " -v PATH \t\tPath to Hive drivers" echo -e " -h \t\t\tPrints this" } OPTIND=1 -while getopts ":a:d:r:b:f:h:g:i:n" opt; do +while getopts ":a:d:b:f:h:g:i:n" opt; do case $opt in a) ARCH=$OPTARG @@ -33,9 +33,6 @@ while getopts ":a:d:r:b:f:h:g:i:n" opt; do d) DIST=$OPTARG ;; - r) - CRAN_DIST=$OPTARG - ;; b) BUILD_PATH=$OPTARG ;; @@ -66,10 +63,6 @@ while getopts ":a:d:r:b:f:h:g:i:n" opt; do esac done -if [[ -z $CRAN_DIST ]]; then - CRAN_DIST=$DIST -fi - if [[ -z $ARCHIVE ]]; then ARCHIVE=../r_base_${DIST}_${ARCH}.tar.gz fi @@ -126,8 +119,12 @@ cp $BQ_PATH/*.jar $BUILD_PATH/bigquery/ mkdir $BUILD_PATH/netezza/ cp $NETEZZA_PATH/*.jar $BUILD_PATH/netezza/ +# Hive drivers +mkdir $BUILD_PATH/hive/ +cp $HIVE_PATH/*.jar $BUILD_PATH/hive/ + sudo chmod +x $BUILD_PATH/root/install_packages.sh -sudo chroot $BUILD_PATH /root/install_packages.sh $CRAN_DIST +sudo chroot $BUILD_PATH /root/install_packages.sh $DIST umount $BUILD_PATH/proc sudo rm -f $BUILD_PATH/root/install_packages.sh diff --git a/src/main/java/com/odysseusinc/arachne/executionengine/config/properties/HiveBulkLoadProperties.java b/src/main/java/com/odysseusinc/arachne/executionengine/config/properties/HiveBulkLoadProperties.java new file mode 100644 index 00000000..2afa3362 --- /dev/null +++ b/src/main/java/com/odysseusinc/arachne/executionengine/config/properties/HiveBulkLoadProperties.java @@ -0,0 +1,136 @@ +/* + * + * Copyright 2019 Odysseus Data Services, inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Company: Odysseus Data Services, Inc. + * Product Owner/Architecture: Gregory Klebanov + * Authors: Pavel Grafkin, Vitaly Koulakov, Anastasiia Klochkova, Yaroslav Molodkov, Alexander Cumarav + * Created: November 1, 2019 + * + */ + +package com.odysseusinc.arachne.executionengine.config.properties; + +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.stereotype.Component; + +@ConfigurationProperties(prefix = "bulkload.hive") +@Component +public class HiveBulkLoadProperties { + private String host; + private SSHProperties ssh; + private HadoopProperties hadoop; + + public String getHost() { + + return host; + } + + public void setHost(String host) { + + this.host = host; + } + + public SSHProperties getSsh() { + + return ssh; + } + + public void setSsh(SSHProperties ssh) { + + this.ssh = ssh; + } + + public HadoopProperties getHadoop() { + + return hadoop; + } + + public void setHadoop(HadoopProperties hadoop) { + + this.hadoop = hadoop; + } + + public static class SSHProperties { + private Integer port; + private String username; + private String password; + private String keyfile; + + public Integer getPort() { + + return port; + } + + public void setPort(Integer port) { + + this.port = port; + } + + public String getUsername() { + + return username; + } + + public void setUsername(String username) { + + this.username = username; + } + + public String getPassword() { + + return password; + } + + public void setPassword(String password) { + + this.password = password; + } + + public String getKeyfile() { + + return keyfile; + } + + public void setKeyfile(String keyfile) { + + this.keyfile = keyfile; + } + } + + public static class HadoopProperties { + private Integer port; + private String username; + + public Integer getPort() { + + return port; + } + + public void setPort(Integer port) { + + this.port = port; + } + + public String getUsername() { + + return username; + } + + public void setUsername(String username) { + + this.username = username; + } + } +} diff --git a/src/main/java/com/odysseusinc/arachne/executionengine/service/VersionDetectionService.java b/src/main/java/com/odysseusinc/arachne/executionengine/service/VersionDetectionService.java new file mode 100644 index 00000000..f359bd58 --- /dev/null +++ b/src/main/java/com/odysseusinc/arachne/executionengine/service/VersionDetectionService.java @@ -0,0 +1,33 @@ +/* + * + * Copyright 2019 Odysseus Data Services, inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Company: Odysseus Data Services, Inc. + * Product Owner/Architecture: Gregory Klebanov + * Authors: Pavel Grafkin, Vitaly Koulakov, Anastasiia Klochkova, Yaroslav Molodkov, Alexander Cumarav + * Created: October 17, 2019 + * + */ + +package com.odysseusinc.arachne.executionengine.service; + +import com.odysseusinc.arachne.commons.types.CommonCDMVersionDTO; +import com.odysseusinc.arachne.execution_engine_common.api.v1.dto.DataSourceUnsecuredDTO; +import java.io.IOException; +import java.sql.SQLException; + +public interface VersionDetectionService { + + CommonCDMVersionDTO detectCDMVersion(DataSourceUnsecuredDTO dataSource) throws SQLException, IOException; +} diff --git a/src/main/java/com/odysseusinc/arachne/executionengine/service/VersionDetectionServiceFactory.java b/src/main/java/com/odysseusinc/arachne/executionengine/service/VersionDetectionServiceFactory.java new file mode 100644 index 00000000..4fc2c068 --- /dev/null +++ b/src/main/java/com/odysseusinc/arachne/executionengine/service/VersionDetectionServiceFactory.java @@ -0,0 +1,30 @@ +/* + * + * Copyright 2019 Odysseus Data Services, inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Company: Odysseus Data Services, Inc. + * Product Owner/Architecture: Gregory Klebanov + * Authors: Pavel Grafkin, Vitaly Koulakov, Anastasiia Klochkova, Yaroslav Molodkov, Alexander Cumarav + * Created: October 21, 2019 + * + */ + +package com.odysseusinc.arachne.executionengine.service; + +import com.odysseusinc.arachne.commons.types.DBMSType; + +public interface VersionDetectionServiceFactory { + + VersionDetectionService getService(DBMSType dbmsType); +} diff --git a/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/AnalysisServiceImpl.java b/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/AnalysisServiceImpl.java index cede83f3..70a1c03d 100644 --- a/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/AnalysisServiceImpl.java +++ b/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/AnalysisServiceImpl.java @@ -77,6 +77,8 @@ public class AnalysisServiceImpl implements AnalysisService, InitializingBean { private String bqDriversLocation; @Value("${drivers.location.netezza}") private String netezzaDriversLocation; + @Value("${drivers.location.hive}") + private String hiveDriversLocation; @Value("${submission.update.interval}") private int submissionUpdateInterval; @@ -129,7 +131,6 @@ public AnalysisRequestStatusDTO analyze(AnalysisSyncRequestDTO analysis, File an break; } } - String executableFileName = analysis.getExecutableFileName(); String fileExtension = Files.getFileExtension(executableFileName).toLowerCase(); @@ -215,7 +216,7 @@ private void saveMetadata(AnalysisSyncRequestDTO analysis, File toDir) { @Override public void afterPropertiesSet() throws Exception { - driverPathExclusions = Stream.of(impalaDriversLocation, bqDriversLocation, netezzaDriversLocation) + driverPathExclusions = Stream.of(impalaDriversLocation, bqDriversLocation, netezzaDriversLocation, hiveDriversLocation) .filter(StringUtils::isNotBlank) .map(path -> path.startsWith("/") ? path.substring(1) : path) .map(path -> path + "/**/*") diff --git a/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/BaseVersionDetectionService.java b/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/BaseVersionDetectionService.java new file mode 100644 index 00000000..71528cf3 --- /dev/null +++ b/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/BaseVersionDetectionService.java @@ -0,0 +1,141 @@ +/* + * + * Copyright 2019 Odysseus Data Services, inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Company: Odysseus Data Services, Inc. + * Product Owner/Architecture: Gregory Klebanov + * Authors: Pavel Grafkin, Vitaly Koulakov, Anastasiia Klochkova, Yaroslav Molodkov, Alexander Cumarav + * Created: October 21, 2019 + * + */ + +package com.odysseusinc.arachne.executionengine.service.impl; + +import static com.odysseusinc.arachne.commons.types.CommonCDMVersionDTO.V4_0; +import static com.odysseusinc.arachne.commons.types.CommonCDMVersionDTO.V5_0; +import static com.odysseusinc.arachne.commons.types.CommonCDMVersionDTO.V5_0_1; +import static com.odysseusinc.arachne.commons.types.CommonCDMVersionDTO.V5_1; +import static com.odysseusinc.arachne.commons.types.CommonCDMVersionDTO.V5_2; +import static com.odysseusinc.arachne.commons.types.CommonCDMVersionDTO.V5_3; +import static com.odysseusinc.arachne.commons.types.CommonCDMVersionDTO.V5_3_1; +import static com.odysseusinc.arachne.commons.types.CommonCDMVersionDTO.V6_0; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.odysseusinc.arachne.commons.types.CommonCDMVersionDTO; +import com.odysseusinc.arachne.executionengine.service.VersionDetectionService; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.function.BinaryOperator; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class BaseVersionDetectionService implements VersionDetectionService { + + protected static final Logger LOGGER = LoggerFactory.getLogger(VersionDetectionService.class); + + protected static final String CDM_V5_RESOURCES = "/cdm/v5/"; + protected static final String SCHEMA_TMPL = CDM_V5_RESOURCES + "diff_%s.json"; + protected static final String COMMONS_SCHEMA = CDM_V5_RESOURCES + "cdm_commons.json"; + protected static final String CDM_V4_SCHEMA = "/cdm/v4/cdm_V4_0.json"; + protected static final String CDM_V6_SCHEMA = "/cdm/v6/cdm_v6_0.json"; + private static final TypeReference>> TREE_MAP_TYPE_REFERENCE = new TypeReference>>() {}; + protected static Collection V5_VERSIONS = new ArrayList<>(6); + protected static Map OTHER_VERSIONS = new TreeMap<>(); + + static { + V5_VERSIONS.add(V5_3_1); + V5_VERSIONS.add(V5_3); + V5_VERSIONS.add(V5_2); + V5_VERSIONS.add(V5_1); + V5_VERSIONS.add(V5_0_1); + V5_VERSIONS.add(V5_0); + } + + static { + OTHER_VERSIONS.put(V6_0, CDM_V6_SCHEMA); + OTHER_VERSIONS.put(V4_0, CDM_V4_SCHEMA); + } + + protected CommonCDMVersionDTO doDetectVersion(Predicate>> schemaPredicate) throws IOException { + + CommonCDMVersionDTO result = null; + Map> commonsSchema = parseSchemaJson(COMMONS_SCHEMA); + if (schemaPredicate.test(commonsSchema)) { //checks is it V5 + for(CommonCDMVersionDTO version : V5_VERSIONS) { + Map> diff = parseSchemaJson(String.format(SCHEMA_TMPL, version.name())); + if (schemaPredicate.test(diff)) { + result = version; + break; + } + } + } else { + for(CommonCDMVersionDTO version : OTHER_VERSIONS.keySet()) { + Map> cdmSchema = parseSchemaJson(OTHER_VERSIONS.get(version)); + if (schemaPredicate.test(cdmSchema)) { + result = version; + break; + } + } + } + return result; + } + + protected Map> parseSchemaJson(String resource) throws IOException { + + ObjectMapper mapper = new ObjectMapper(); + try(Reader reader = new InputStreamReader(getClass().getResourceAsStream(resource))) { + return mapper.readValue(reader, TREE_MAP_TYPE_REFERENCE); + } + } + + protected final List diffTables(Map> schema1, Map> schema2) { + + Set schemaTables = schema2.keySet(); + return schema1.keySet().stream() + .filter(t -> !schemaTables.contains(t)) + .collect(Collectors.toList()); + } + + protected final Map> diffFields(Map> schema1, Map> schema2) { + + BinaryOperator> mergeFunction = (l1, l2) -> { + l1.addAll(l2); + return l1; + }; + List tables = schema1.keySet().stream() + .filter(schema2::containsKey) + .collect(Collectors.toList()); + return tables.stream().map(t -> { + List cdmFields = schema1.get(t); + List dbFields = schema2.get(t); + List diffFieldsList = cdmFields.stream() + .filter(f -> !dbFields.contains(f)) + .collect(Collectors.toList()); + return Pair.of(t, diffFieldsList); + }) + .filter(v -> !v.getValue().isEmpty()) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue, mergeFunction, TreeMap::new)); + } +} diff --git a/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/CdmMetadataServiceImpl.java b/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/CdmMetadataServiceImpl.java index b7c5278b..5a3eaf8e 100644 --- a/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/CdmMetadataServiceImpl.java +++ b/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/CdmMetadataServiceImpl.java @@ -36,50 +36,38 @@ import static org.apache.commons.lang3.StringUtils.defaultString; import com.odysseusinc.arachne.commons.types.CommonCDMVersionDTO; -import com.odysseusinc.arachne.execution_engine_common.api.v1.dto.AnalysisRequestDTO; import com.odysseusinc.arachne.execution_engine_common.api.v1.dto.DataSourceUnsecuredDTO; import com.odysseusinc.arachne.executionengine.aspect.FileDescriptorCount; import com.odysseusinc.arachne.executionengine.model.CdmSource; import com.odysseusinc.arachne.executionengine.model.Vocabulary; import com.odysseusinc.arachne.executionengine.service.CdmMetadataService; +import com.odysseusinc.arachne.executionengine.service.VersionDetectionServiceFactory; import com.odysseusinc.arachne.executionengine.service.sql.SqlMetadataService; import com.odysseusinc.arachne.executionengine.service.sql.SqlMetadataServiceFactory; -import com.odysseusinc.arachne.executionengine.util.SQLUtils; -import com.odysseusinc.arachne.executionengine.util.exception.StatementSQLException; +import com.odysseusinc.arachne.executionengine.util.DateUtil; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; -import java.io.InputStreamReader; -import java.io.Reader; -import java.io.UncheckedIOException; -import java.sql.Connection; -import java.sql.PreparedStatement; -import java.sql.ResultSet; import java.sql.SQLException; +import java.time.Duration; +import java.time.LocalDateTime; import java.util.ArrayList; import java.util.Collections; import java.util.Enumeration; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Objects; +import java.util.Optional; import java.util.Properties; import java.util.TreeSet; -import java.util.concurrent.ConcurrentHashMap; -import org.apache.commons.io.IOUtils; +import java.util.concurrent.Callable; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.text.StrSubstitutor; -import org.ohdsi.sql.SqlRender; -import org.ohdsi.sql.SqlSplit; -import org.ohdsi.sql.SqlTranslate; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.core.io.Resource; -import org.springframework.core.io.ResourceLoader; import org.springframework.stereotype.Service; -import org.springframework.util.ResourceUtils; @Service public class CdmMetadataServiceImpl implements CdmMetadataService { @@ -87,12 +75,8 @@ public class CdmMetadataServiceImpl implements CdmMetadataService { private static final String COMMENT = "CDM database ${database}"; private static final String PROPERTIES_FILE_NAME = "cdm_version.txt"; private static final Logger LOGGER = LoggerFactory.getLogger(CdmMetadataService.class); - private final static String RES_TABLE_CHECK_V4 = "/cdm/v4/tableCheck.sql"; - private final static String RES_TABLE_CHECK_V5 = "/cdm/v5/tableCheck_%s.sql"; private final static CommonCDMVersionDTO V_5_INIT = CommonCDMVersionDTO.V5_0; private final static List V5_VERSIONS = new ArrayList<>(); - public static final String VAR_CDM_SCHEMA = "cdm_schema"; - public final static ConcurrentHashMap detectorSqlMap = new ConcurrentHashMap<>(); static { V5_VERSIONS.add(CommonCDMVersionDTO.V5_3_1); @@ -104,15 +88,15 @@ public class CdmMetadataServiceImpl implements CdmMetadataService { } private final SqlMetadataServiceFactory sqlMetadataServiceFactory; - private final ResourceLoader resourceLoader; - private final String REGEX_V5 = "^V5.*"; + private final VersionDetectionServiceFactory versionDetectionServiceFactory; + private final String REGEX_V5 = "^V(5+|6+)_.*"; @Autowired public CdmMetadataServiceImpl(SqlMetadataServiceFactory sqlMetadataServiceFactory, - ResourceLoader resourceLoader) { + VersionDetectionServiceFactory versionDetectionServiceFactory) { this.sqlMetadataServiceFactory = sqlMetadataServiceFactory; - this.resourceLoader = resourceLoader; + this.versionDetectionServiceFactory = versionDetectionServiceFactory; } @Override @@ -121,8 +105,26 @@ public void extractMetadata(DataSourceUnsecuredDTO dataSource, File dir) throws try { SqlMetadataService metadataService = sqlMetadataServiceFactory.getMetadataService(dataSource); - String cdmVersion = detectCdmVersion(dataSource, metadataService); - List vocabularies = metadataService.getVocabularyVersions(cdmVersion); + String cdmVersion; + try { + cdmVersion = logTime(String.format("[%s] CDM Version detection", dataSource.getType()), + () -> detectCdmVersion(dataSource)); + } catch (Exception e) { + LOGGER.error("Failed to detect CDM Version, {}", e.getMessage()); + cdmVersion = ""; + } + if (LOGGER.isDebugEnabled()) { + LOGGER.debug(String.format("[%s] CDM version: %s", dataSource.getType(), + StringUtils.isNotBlank(cdmVersion) ? cdmVersion : "not detected")); + } + List vocabularies = Collections.emptyList(); + final String cdm = cdmVersion; + try { + vocabularies = logTime(String.format("[%s] vocabulary versions resolving", dataSource.getType()), + () -> metadataService.getVocabularyVersions(cdm)); + } catch (Exception e) { + LOGGER.error("Failed to get metadata, {}", e.getMessage()); + } Properties properties = new Properties() { @Override @@ -183,66 +185,28 @@ private String composeComment(DataSourceUnsecuredDTO dataSource) { return new StrSubstitutor(values).replace(COMMENT); } - private String detectCdmVersion(DataSourceUnsecuredDTO dataSource, SqlMetadataService metadataService) throws SQLException { + private String detectCdmVersion(DataSourceUnsecuredDTO dataSource) throws SQLException { - CommonCDMVersionDTO version = null; + Optional version; try { - for (CommonCDMVersionDTO v : V5_VERSIONS) { - try { - checkCdmTables(dataSource, RES_TABLE_CHECK_V5, v.name()); - version = v; - if (LOGGER.isDebugEnabled()) { - LOGGER.debug("Detected CDM version for {} is {}", dataSource.getConnectionStringForLogging(), version); - } - break; - } catch (StatementSQLException e) { - LOGGER.debug("Failed CDM version check for {} as {} with message: {},\nstatement: {}", dataSource.getConnectionStringForLogging(), v, e.getMessage(), e.getStatement()); - } catch (SQLException e) { - LOGGER.debug("Failed CDM version check for {} as {} with message: {}", dataSource.getConnectionStringForLogging(), v, e.getMessage()); - } - } - if (Objects.isNull(version)) { - checkCdmTables(dataSource, RES_TABLE_CHECK_V4, ""); - } + version = Optional.ofNullable(versionDetectionServiceFactory.getService(dataSource.getType()) + .detectCDMVersion(dataSource)); } catch (IOException e) { LOGGER.error("Failed to determine CDM version", e); - version = null; + version = Optional.empty(); } - return Objects.isNull(version) ? null : version.name(); + return version.map(CommonCDMVersionDTO::name).orElse(""); } - private void checkCdmTables(DataSourceUnsecuredDTO dataSource, String pattern, String version) throws SQLException, IOException { - - String sql = detectorSqlMap.computeIfAbsent( - Objects.hash(dataSource.getType().getOhdsiDB(), pattern, version), - (key) -> { - Resource queryFile = resourceLoader.getResource(ResourceUtils.CLASSPATH_URL_PREFIX + String.format(pattern, version)); - try (Reader r = new InputStreamReader(queryFile.getInputStream())) { - return SqlTranslate.translateSql(IOUtils.toString(r), dataSource.getType().getOhdsiDB()); - } catch (IOException ex) { - throw new UncheckedIOException(ex); - } - } - ); - - String[] params = new String[]{VAR_CDM_SCHEMA}; - String[] values = new String[]{dataSource.getCdmSchema()}; - sql = SqlRender.renderSql(sql, params, values); - - String[] statements = SqlSplit.splitSql(sql); - - try (Connection c = SQLUtils.getConnection(dataSource)) { - for (String query : statements) { - if (StringUtils.isNotBlank(query)) { - try (PreparedStatement stmt = c.prepareStatement(query)) { - stmt.setMaxRows(1); - try (final ResultSet resultSet = stmt.executeQuery()) { - } - } catch (SQLException e) { - throw new StatementSQLException(e.getMessage(), e, query); - } - } - } + private V logTime(String actionName, Callable statement) throws Exception { + + LocalDateTime start = LocalDateTime.now(); + try{ + return statement.call(); + } finally { + LocalDateTime finish = LocalDateTime.now(); + Duration timeElapsed = Duration.between(start, finish); + LOGGER.debug(String.format("Execution of %s took: %s", actionName, DateUtil.formatDuration(timeElapsed))); } } diff --git a/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/DefaultVersionDetectionService.java b/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/DefaultVersionDetectionService.java new file mode 100644 index 00000000..6c809983 --- /dev/null +++ b/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/DefaultVersionDetectionService.java @@ -0,0 +1,141 @@ +/* + * + * Copyright 2019 Odysseus Data Services, inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Company: Odysseus Data Services, Inc. + * Product Owner/Architecture: Gregory Klebanov + * Authors: Pavel Grafkin, Vitaly Koulakov, Anastasiia Klochkova, Yaroslav Molodkov, Alexander Cumarav + * Created: October 17, 2019 + * + */ + +package com.odysseusinc.arachne.executionengine.service.impl; + +import com.odysseusinc.arachne.commons.types.CommonCDMVersionDTO; +import com.odysseusinc.arachne.execution_engine_common.api.v1.dto.DataSourceUnsecuredDTO; +import com.odysseusinc.arachne.executionengine.service.VersionDetectionService; +import com.odysseusinc.arachne.executionengine.util.SQLUtils; +import java.io.IOException; +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.TreeMap; +import java.util.function.Function; +import org.springframework.stereotype.Service; + +@Service +public class DefaultVersionDetectionService extends BaseVersionDetectionService implements VersionDetectionService { + + @Override + public CommonCDMVersionDTO detectCDMVersion(DataSourceUnsecuredDTO dataSource) throws SQLException, IOException { + + Map> databaseSchema = extractMetadata(dataSource); + CommonCDMVersionDTO result = doDetectVersion(schema -> isSchemaIncludedBy(schema, databaseSchema)); + + // Log warnings around version detection process, + // might be useful for db masters + if (Objects.isNull(result) && LOGGER.isDebugEnabled()) { + + LOGGER.debug("CDM version was not detected on datasource: [{}]", dataSource.getName()); + Map> commonsSchema = parseSchemaJson(COMMONS_SCHEMA); + if (isSchemaIncludedBy(commonsSchema, databaseSchema)) { + showCDMCheckWarnings(V5_VERSIONS, databaseSchema, ver -> { + try { + return parseSchemaJson(String.format(SCHEMA_TMPL, ver.name())); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } else { + showCDMCheckWarnings(OTHER_VERSIONS.keySet(), databaseSchema, ver -> { + try { + return parseSchemaJson(OTHER_VERSIONS.get(ver)); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + } + + return result; + } + + /** + * Checks if schema1 included into schema2 + * @param schema1 + * @param schema2 + * @return true when schema2 includes schema1 + */ + private boolean isSchemaIncludedBy(Map> schema1, + Map> schema2) { + + boolean tablesIncluded = schema2.keySet().containsAll(schema1.keySet()); + return tablesIncluded && schema1.keySet().stream() + .allMatch(t -> schema2.get(t).containsAll(schema1.get(t))); + } + + private Map> extractMetadata(DataSourceUnsecuredDTO dataSource) throws SQLException { + + Map> metadataMap = new TreeMap<>(); + final String schema = dataSource.getCdmSchema(); + try(Connection c = SQLUtils.getConnection(dataSource)) { + DatabaseMetaData metaData = c.getMetaData(); + + try(ResultSet columns = metaData.getColumns(null, schema, "%", "%")){ + while (columns.next()) { + String tableName = columns.getString("TABLE_NAME").toLowerCase(); + String columnName = columns.getString("COLUMN_NAME").toLowerCase(); + List tableColumns = metadataMap.getOrDefault(tableName, new ArrayList<>()); + tableColumns.add(columnName); + metadataMap.putIfAbsent(tableName, tableColumns); + } + } + } + return metadataMap; + } + + private void showCDMCheckWarnings(Collection versions, + Map> databaseSchema, + Function>> schemaFunc) throws IOException { + + for(CommonCDMVersionDTO ver : versions){ + Map> diff = schemaFunc.apply(ver); + showSchemaCheckWarnings(diff, databaseSchema, ver); + } + } + + private void showSchemaCheckWarnings(Map> schema, + Map> databaseSchema, + CommonCDMVersionDTO version) throws IOException { + + List missedTables = diffTables(schema, databaseSchema); + if (!missedTables.isEmpty()) { + LOGGER.debug("[{}] Database missed tables: {}", version, + String.join(", ", missedTables)); + } + Map> missedFields = diffFields(schema, databaseSchema); + if (!missedFields.isEmpty()) { + missedFields.keySet().forEach(t -> { + LOGGER.debug("[{}] Database table {} missed fields: {}", version, t, + String.join(", ", missedFields.get(t))); + }); + } + } +} diff --git a/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/ImpalaVersionDetectionService.java b/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/ImpalaVersionDetectionService.java new file mode 100644 index 00000000..f3389ea0 --- /dev/null +++ b/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/ImpalaVersionDetectionService.java @@ -0,0 +1,88 @@ +/* + * + * Copyright 2019 Odysseus Data Services, inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Company: Odysseus Data Services, Inc. + * Product Owner/Architecture: Gregory Klebanov + * Authors: Pavel Grafkin, Vitaly Koulakov, Anastasiia Klochkova, Yaroslav Molodkov, Alexander Cumarav + * Created: October 21, 2019 + * + */ + +package com.odysseusinc.arachne.executionengine.service.impl; + +import com.odysseusinc.arachne.commons.types.CommonCDMVersionDTO; +import com.odysseusinc.arachne.commons.types.DBMSType; +import com.odysseusinc.arachne.execution_engine_common.api.v1.dto.DataSourceUnsecuredDTO; +import com.odysseusinc.arachne.executionengine.service.VersionDetectionService; +import com.odysseusinc.arachne.executionengine.util.SQLUtils; +import java.io.IOException; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.ohdsi.sql.SqlRender; +import org.ohdsi.sql.SqlTranslate; +import org.springframework.stereotype.Service; + +@Service +public class ImpalaVersionDetectionService extends BaseVersionDetectionService implements VersionDetectionService { + + private static String[] CTE_PARAMS = new String[]{ "table", "cdmDatabaseSchema", "fields" }; + + @Override + public CommonCDMVersionDTO detectCDMVersion(DataSourceUnsecuredDTO dataSource) throws SQLException, IOException { + + return doDetectVersion(schema -> { + try { + return checkSchema(dataSource, schema); + } catch (SQLException e) { + throw new RuntimeException(e); + } + }); + } + + private boolean checkSchema(DataSourceUnsecuredDTO dataSource, Map> schema) throws SQLException { + + String cteSql = schema.keySet().stream() + .map(tbl -> { + String fields = schema.get(tbl).stream().map(f -> String.format("`%s`", f)).collect(Collectors.joining(",")); + String[] values = new String[]{ tbl, dataSource.getCdmSchema(), fields }; + String sql = "cte_@table as (select cast('@table' as varchar(50)) as tablename from (select top 1 @fields from @cdmDatabaseSchema.`@table`) as `@table`)"; + return SqlRender.renderSql(sql, CTE_PARAMS, values); + }).collect(Collectors.joining(",")); + String cteAll = schema.keySet().stream() + .map(tbl -> { + String[] values = new String[] { tbl }; + return SqlRender.renderSql("select tablename from cte_@table", new String[]{ "table" }, values); + }).collect(Collectors.joining(" union all ")); + String[] values = new String[]{ cteSql, cteAll }; + String sql = SqlRender.renderSql("with @cteTables, cte_all as (@cteAll) select tablename from cte_all;", + new String[]{ "cteTables", "cteAll" }, values); + sql = SqlTranslate.translateSql(sql, DBMSType.IMPALA.getOhdsiDB()); + try(Connection c = SQLUtils.getConnection(dataSource)) { + try { + PreparedStatement query = c.prepareStatement(sql); + query.executeQuery(); + }catch (SQLException e) { + LOGGER.debug("DBMS: {} detection error: {}", dataSource.getType(), e.getMessage()); + return false; + } + } + return true; + } + +} diff --git a/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/RuntimeServiceImpl.java b/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/RuntimeServiceImpl.java index 426c9f8b..85a23243 100644 --- a/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/RuntimeServiceImpl.java +++ b/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/RuntimeServiceImpl.java @@ -30,6 +30,7 @@ import com.odysseusinc.arachne.execution_engine_common.api.v1.dto.DataSourceUnsecuredDTO; import com.odysseusinc.arachne.execution_engine_common.util.BigQueryUtils; import com.odysseusinc.arachne.executionengine.aspect.FileDescriptorCount; +import com.odysseusinc.arachne.executionengine.config.properties.HiveBulkLoadProperties; import com.odysseusinc.arachne.executionengine.config.runtimeservice.RIsolatedRuntimeProperties; import com.odysseusinc.arachne.executionengine.service.CallbackService; import com.odysseusinc.arachne.executionengine.service.RuntimeService; @@ -120,16 +121,26 @@ public class RuntimeServiceImpl implements RuntimeService { private String bqDriversLocation; @Value("${drivers.location.netezza}") private String netezzaDriversLocation; + @Value("${drivers.location.hive}") + private String hiveDriversLocation; + @Value("${bulkload.enableMPP}") + private Boolean enableMPP; + private final HiveBulkLoadProperties hiveBulkLoadProperties; private RIsolatedRuntimeProperties rIsolatedRuntimeProps; @Autowired - public RuntimeServiceImpl(ThreadPoolTaskExecutor taskExecutor, CallbackService callbackService, ResourceLoader resourceLoader, RIsolatedRuntimeProperties rIsolatedRuntimeProps) { + public RuntimeServiceImpl(ThreadPoolTaskExecutor taskExecutor, + CallbackService callbackService, + ResourceLoader resourceLoader, + HiveBulkLoadProperties hiveBulkLoadProperties, + RIsolatedRuntimeProperties rIsolatedRuntimeProps) { this.taskExecutor = taskExecutor; this.callbackService = callbackService; this.resourceLoader = resourceLoader; + this.hiveBulkLoadProperties = hiveBulkLoadProperties; this.rIsolatedRuntimeProps = rIsolatedRuntimeProps; } @@ -284,10 +295,30 @@ private Map buildRuntimeEnvVariables(DataSourceUnsecuredDTO data environment.put(RUNTIME_ENV_LANG_KEY, RUNTIME_ENV_LANG_VALUE); environment.put(RUNTIME_ENV_LC_ALL_KEY, RUNTIME_ENV_LC_ALL_VALUE); + if (enableMPP) { + exposeMPPEnvironmentVariables(environment); + } + environment.values().removeIf(Objects::isNull); return environment; } + private void exposeMPPEnvironmentVariables(Map environment) { + + environment.put("USE_MPP_BULK_LOAD", Boolean.toString(enableMPP)); + environment.put("HIVE_NODE_HOST", hiveBulkLoadProperties.getHost()); + environment.put("HIVE_SSH_USER", hiveBulkLoadProperties.getSsh().getUsername()); + environment.put("HIVE_SSH_PORT", Integer.toString(hiveBulkLoadProperties.getSsh().getPort())); + environment.put("HIVE_SSH_PASSWORD", hiveBulkLoadProperties.getSsh().getPassword()); + if (StringUtils.isNotBlank(hiveBulkLoadProperties.getSsh().getKeyfile())) { + environment.put("HIVE_KEYFILE", hiveBulkLoadProperties.getSsh().getKeyfile()); + } + if (StringUtils.isNotBlank(hiveBulkLoadProperties.getHadoop().getUsername())) { + environment.put("HADOOP_USER_NAME", hiveBulkLoadProperties.getHadoop().getUsername()); + } + environment.put("HIVE_NODE_PORT", Integer.toString(hiveBulkLoadProperties.getHadoop().getPort())); + } + private String getUserHome() { String userHome = System.getProperty("user.home"); return StringUtils.defaultString(userHome, RUNTIME_ENV_HOME_VALUE); @@ -308,6 +339,8 @@ private String getDriversPath(DataSourceUnsecuredDTO dataSource) { return bqDriversLocation; case NETEZZA: return netezzaDriversLocation; + case HIVE: + return hiveDriversLocation; default: return null; } diff --git a/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/VersionDetectionServiceFactoryImpl.java b/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/VersionDetectionServiceFactoryImpl.java new file mode 100644 index 00000000..31bd6a80 --- /dev/null +++ b/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/VersionDetectionServiceFactoryImpl.java @@ -0,0 +1,51 @@ +/* + * + * Copyright 2019 Odysseus Data Services, inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Company: Odysseus Data Services, Inc. + * Product Owner/Architecture: Gregory Klebanov + * Authors: Pavel Grafkin, Vitaly Koulakov, Anastasiia Klochkova, Yaroslav Molodkov, Alexander Cumarav + * Created: October 21, 2019 + * + */ + +package com.odysseusinc.arachne.executionengine.service.impl; + +import com.odysseusinc.arachne.commons.types.DBMSType; +import com.odysseusinc.arachne.executionengine.service.VersionDetectionService; +import com.odysseusinc.arachne.executionengine.service.VersionDetectionServiceFactory; +import java.util.HashMap; +import java.util.Map; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.stereotype.Service; + +@Service +public class VersionDetectionServiceFactoryImpl implements VersionDetectionServiceFactory { + + private final VersionDetectionService versionDetectionService; + private Map SERVICE_MAP = new HashMap<>(); + + public VersionDetectionServiceFactoryImpl(DefaultVersionDetectionService versionDetectionService, + @Qualifier("impalaVersionDetectionService") VersionDetectionService impalaVersionDetectionService) { + + this.versionDetectionService = versionDetectionService; + SERVICE_MAP.put(DBMSType.IMPALA, impalaVersionDetectionService); + } + + @Override + public VersionDetectionService getService(DBMSType dbmsType) { + + return SERVICE_MAP.getOrDefault(dbmsType, versionDetectionService); + } +} diff --git a/src/main/java/com/odysseusinc/arachne/executionengine/service/sql/SqlMetadataServiceFactory.java b/src/main/java/com/odysseusinc/arachne/executionengine/service/sql/SqlMetadataServiceFactory.java index 5d393283..dd51e689 100644 --- a/src/main/java/com/odysseusinc/arachne/executionengine/service/sql/SqlMetadataServiceFactory.java +++ b/src/main/java/com/odysseusinc/arachne/executionengine/service/sql/SqlMetadataServiceFactory.java @@ -52,6 +52,7 @@ public SqlMetadataService getMetadataService(DataSourceUnsecuredDTO dataSource) break; case IMPALA: case BIGQUERY: + case HIVE: result = new ImpalaMetadataService(dataSource); break; case NETEZZA: diff --git a/src/main/java/com/odysseusinc/arachne/executionengine/util/DateUtil.java b/src/main/java/com/odysseusinc/arachne/executionengine/util/DateUtil.java index 9f26387d..5632046b 100644 --- a/src/main/java/com/odysseusinc/arachne/executionengine/util/DateUtil.java +++ b/src/main/java/com/odysseusinc/arachne/executionengine/util/DateUtil.java @@ -22,7 +22,9 @@ package com.odysseusinc.arachne.executionengine.util; +import java.time.Duration; import java.util.Date; +import org.apache.commons.lang3.time.DurationFormatUtils; public class DateUtil { @@ -30,4 +32,10 @@ public static String defaultFormat(String format, Date date) { return date != null ? String.format(format, date) : ""; } + + public static String formatDuration(Duration duration) { + + return duration.toMillis() < 1000 ? String.format("%d ms", duration.toMillis()) : + DurationFormatUtils.formatDurationWords(duration.toMillis(), true, true); + } } diff --git a/src/main/resources/application-base.yml b/src/main/resources/application-base.yml index 25c25e76..8cd1eb7a 100644 --- a/src/main/resources/application-base.yml +++ b/src/main/resources/application-base.yml @@ -10,15 +10,18 @@ server: port: 8888 logging: - level: info + level: + root: info descriptor: count: enabled: false spring: - http: + jmx: + unique-names: true + servlet: multipart: - max-request-size: 256MB max-file-size: 256MB + max-request-size: 256MB executor: corePoolSize: 4 @@ -46,15 +49,16 @@ swagger: csv: separator: ',' -endpoints: - jolokia: - enabled: true - path: /jolokia - jmx: - unique-names: true - management: - port: 9999 + endpoint: + jolokia: + enabled: true + server: + port: 9999 + endpoints: + web: + path-mapping: + jolokia: /jolokia runtimeservice: dist: @@ -76,3 +80,16 @@ drivers: impala: /impala bq: /bigquery netezza: /netezza + hive: /hive +bulkload: + enableMPP: false + hive: + host: + ssh: + port: 2222 + username: + password: + keyfile: + hadoop: + port: 8020 + username: diff --git a/src/main/resources/application-dev.yml b/src/main/resources/application-dev.yml index 796f384d..a840d956 100644 --- a/src/main/resources/application-dev.yml +++ b/src/main/resources/application-dev.yml @@ -1,5 +1,6 @@ logging: - level: debug + level: + root: debug executor: queueCapacity: 200 submission: diff --git a/src/main/resources/cdm/v4/cdm_V4_0.json b/src/main/resources/cdm/v4/cdm_V4_0.json new file mode 100644 index 00000000..f1166357 --- /dev/null +++ b/src/main/resources/cdm/v4/cdm_V4_0.json @@ -0,0 +1,10 @@ +{ + "concept" : [ "concept_id", "concept_name", "concept_level", "concept_class", "vocabulary_id", "concept_code", "valid_start_date", "valid_end_date", "invalid_reason" ], + "concept_ancestor" : [ "ancestor_concept_id", "descendant_concept_id", "min_levels_of_separation", "max_levels_of_separation" ], + "concept_relationship" : [ "concept_id_1", "concept_id_2", "relationship_id", "valid_start_date", "valid_end_date", "invalid_reason" ], + "concept_synonym" : [ "concept_synonym_id", "concept_id", "concept_synonym_name" ], + "drug_strength" : [ "drug_concept_id", "ingredient_concept_id", "amount_value", "amount_unit", "concentration_value", "concentration_enum_unit", "concentration_denom_unit", "valid_start_date", "valid_end_date", "invalid_reason" ], + "relationship" : [ "relationship_id", "relationship_name", "is_hierarchical", "defines_ancestry", "reverse_relationship" ], + "source_to_concept_map" : [ "source_code", "source_vocabulary_id", "source_code_description", "target_concept_id", "target_vocabulary_id", "mapping_type", "primary_map", "valid_start_date", "valid_end_date", "invalid_reason" ], + "vocabulary" : [ "vocabulary_id", "vocabulary_name" ] +} \ No newline at end of file diff --git a/src/main/resources/cdm/v4/tableCheck.sql b/src/main/resources/cdm/v4/tableCheck.sql deleted file mode 100644 index a7301094..00000000 --- a/src/main/resources/cdm/v4/tableCheck.sql +++ /dev/null @@ -1,16 +0,0 @@ -SELECT concept_id, concept_name, concept_level, concept_class, vocabulary_id, concept_code, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.concept; -SELECT ancestor_concept_id, descendant_concept_id, min_levels_of_separation, max_levels_of_separation -FROM @cdm_schema.concept_ancestor; -SELECT concept_id_1, concept_id_2, relationship_id, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.concept_relationship; -SELECT concept_synonym_id, concept_id, concept_synonym_name -FROM @cdm_schema.concept_synonym; -SELECT drug_concept_id, ingredient_concept_id, amount_value, amount_unit, concentration_value, concentration_enum_unit, concentration_denom_unit, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.drug_strength; -SELECT relationship_id, relationship_name, is_hierarchical, defines_ancestry, reverse_relationship -FROM @cdm_schema.relationship; -SELECT source_code, source_vocabulary_id, source_code_description, target_concept_id, target_vocabulary_id, mapping_type, primary_map, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.source_to_concept_map; -SELECT vocabulary_id, vocabulary_name -FROM @cdm_schema.vocabulary; diff --git a/src/main/resources/cdm/v5/cdm_commons.json b/src/main/resources/cdm/v5/cdm_commons.json new file mode 100644 index 00000000..57583c8a --- /dev/null +++ b/src/main/resources/cdm/v5/cdm_commons.json @@ -0,0 +1,37 @@ +{ + "attribute_definition" : [ "attribute_definition_id", "attribute_name", "attribute_description", "attribute_type_concept_id", "attribute_syntax" ], + "care_site" : [ "care_site_id", "care_site_name", "place_of_service_concept_id", "location_id", "care_site_source_value", "place_of_service_source_value" ], + "cdm_source" : [ "cdm_source_name", "cdm_source_abbreviation", "cdm_holder", "source_description", "source_documentation_reference", "cdm_etl_reference", "source_release_date", "cdm_release_date", "cdm_version", "vocabulary_version" ], + "cohort" : [ "cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date" ], + "cohort_attribute" : [ "cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date", "attribute_definition_id", "value_as_number", "value_as_concept_id" ], + "cohort_definition" : [ "cohort_definition_id", "cohort_definition_name", "cohort_definition_description", "definition_type_concept_id", "cohort_definition_syntax", "subject_concept_id", "cohort_initiation_date" ], + "concept" : [ "concept_id", "concept_name", "domain_id", "vocabulary_id", "concept_class_id", "standard_concept", "concept_code", "valid_start_date", "valid_end_date", "invalid_reason" ], + "concept_ancestor" : [ "ancestor_concept_id", "descendant_concept_id", "min_levels_of_separation", "max_levels_of_separation" ], + "concept_class" : [ "concept_class_id", "concept_class_name", "concept_class_concept_id" ], + "concept_relationship" : [ "concept_id_1", "concept_id_2", "relationship_id", "valid_start_date", "valid_end_date", "invalid_reason" ], + "concept_synonym" : [ "concept_id", "concept_synonym_name", "language_concept_id" ], + "condition_era" : [ "condition_era_id", "person_id", "condition_concept_id", "condition_era_start_date", "condition_era_end_date", "condition_occurrence_count" ], + "condition_occurrence" : [ "condition_occurrence_id", "person_id", "condition_concept_id", "condition_start_date", "condition_end_date", "condition_type_concept_id", "stop_reason", "provider_id", "visit_occurrence_id", "condition_source_value", "condition_source_concept_id" ], + "death" : [ "person_id", "death_date", "death_type_concept_id", "cause_concept_id", "cause_source_value", "cause_source_concept_id" ], + "device_exposure" : [ "device_exposure_id", "person_id", "device_concept_id", "device_exposure_start_date", "device_exposure_end_date", "device_type_concept_id", "unique_device_id", "quantity", "provider_id", "visit_occurrence_id", "device_source_value", "device_source_concept_id" ], + "domain" : [ "domain_id", "domain_name", "domain_concept_id" ], + "dose_era" : [ "dose_era_id", "person_id", "drug_concept_id", "unit_concept_id", "dose_value", "dose_era_start_date", "dose_era_end_date" ], + "drug_era" : [ "drug_era_id", "person_id", "drug_concept_id", "drug_era_start_date", "drug_era_end_date", "drug_exposure_count", "gap_days" ], + "drug_exposure" : [ "drug_exposure_id", "person_id", "drug_concept_id", "drug_exposure_start_date", "drug_exposure_end_date", "drug_type_concept_id", "stop_reason", "refills", "quantity", "days_supply", "sig", "route_concept_id", "lot_number", "provider_id", "visit_occurrence_id", "drug_source_value", "drug_source_concept_id", "route_source_value", "dose_unit_source_value" ], + "drug_strength" : [ "drug_concept_id", "ingredient_concept_id", "amount_value", "amount_unit_concept_id", "numerator_value", "numerator_unit_concept_id", "denominator_unit_concept_id", "valid_start_date", "valid_end_date", "invalid_reason" ], + "fact_relationship" : [ "domain_concept_id_1", "fact_id_1", "domain_concept_id_2", "fact_id_2", "relationship_concept_id" ], + "location" : [ "location_id", "address_1", "address_2", "city", "state", "zip", "county", "location_source_value" ], + "measurement" : [ "measurement_id", "person_id", "measurement_concept_id", "measurement_date", "measurement_type_concept_id", "operator_concept_id", "value_as_number", "value_as_concept_id", "unit_concept_id", "range_low", "range_high", "provider_id", "visit_occurrence_id", "measurement_source_value", "measurement_source_concept_id", "unit_source_value", "value_source_value" ], + "note" : [ "note_id", "person_id", "note_date", "note_type_concept_id", "note_text", "provider_id", "visit_occurrence_id", "note_source_value" ], + "observation" : [ "observation_id", "person_id", "observation_concept_id", "observation_date", "observation_type_concept_id", "value_as_number", "value_as_string", "value_as_concept_id", "qualifier_concept_id", "unit_concept_id", "provider_id", "visit_occurrence_id", "observation_source_value", "observation_source_concept_id", "unit_source_value", "qualifier_source_value" ], + "observation_period" : [ "observation_period_id", "person_id", "observation_period_start_date", "observation_period_end_date", "period_type_concept_id" ], + "payer_plan_period" : [ "payer_plan_period_id", "person_id", "payer_plan_period_start_date", "payer_plan_period_end_date", "payer_source_value", "plan_source_value", "family_source_value" ], + "person" : [ "person_id", "gender_concept_id", "year_of_birth", "month_of_birth", "day_of_birth", "race_concept_id", "ethnicity_concept_id", "location_id", "provider_id", "care_site_id", "person_source_value", "gender_source_value", "gender_source_concept_id", "race_source_value", "race_source_concept_id", "ethnicity_source_value", "ethnicity_source_concept_id" ], + "procedure_occurrence" : [ "procedure_occurrence_id", "person_id", "procedure_concept_id", "procedure_date", "procedure_type_concept_id", "modifier_concept_id", "quantity", "provider_id", "visit_occurrence_id", "procedure_source_value", "procedure_source_concept_id" ], + "provider" : [ "provider_id", "provider_name", "npi", "dea", "specialty_concept_id", "care_site_id", "year_of_birth", "gender_concept_id", "provider_source_value", "specialty_source_value", "specialty_source_concept_id", "gender_source_value", "gender_source_concept_id" ], + "relationship" : [ "relationship_id", "relationship_name", "is_hierarchical", "defines_ancestry", "reverse_relationship_id", "relationship_concept_id" ], + "source_to_concept_map" : [ "source_code", "source_concept_id", "source_vocabulary_id", "source_code_description", "target_concept_id", "target_vocabulary_id", "valid_start_date", "valid_end_date", "invalid_reason" ], + "specimen" : [ "specimen_id", "person_id", "specimen_concept_id", "specimen_type_concept_id", "specimen_date", "quantity", "unit_concept_id", "anatomic_site_concept_id", "disease_status_concept_id", "specimen_source_id", "specimen_source_value", "unit_source_value", "anatomic_site_source_value", "disease_status_source_value" ], + "visit_occurrence" : [ "visit_occurrence_id", "person_id", "visit_concept_id", "visit_start_date", "visit_end_date", "visit_type_concept_id", "provider_id", "care_site_id", "visit_source_value", "visit_source_concept_id" ], + "vocabulary" : [ "vocabulary_id", "vocabulary_name", "vocabulary_reference", "vocabulary_version", "vocabulary_concept_id" ] +} \ No newline at end of file diff --git a/src/main/resources/cdm/v5/diff_V5_0.json b/src/main/resources/cdm/v5/diff_V5_0.json new file mode 100644 index 00000000..a62974f4 --- /dev/null +++ b/src/main/resources/cdm/v5/diff_V5_0.json @@ -0,0 +1,14 @@ +{ + "device_cost" : [ "device_cost_id", "device_exposure_id", "currency_concept_id", "paid_copay", "paid_coinsurance", "paid_toward_deductible", "paid_by_payer", "paid_by_coordination_benefits", "total_out_of_pocket", "total_paid", "payer_plan_period_id" ], + "drug_cost" : [ "drug_cost_id", "drug_exposure_id", "currency_concept_id", "paid_copay", "paid_coinsurance", "paid_toward_deductible", "paid_by_payer", "paid_by_coordination_benefits", "total_out_of_pocket", "total_paid", "ingredient_cost", "dispensing_fee", "average_wholesale_price", "payer_plan_period_id" ], + "drug_exposure" : [ "effective_drug_dose", "dose_unit_concept_id" ], + "measurement" : [ "measurement_time" ], + "note" : [ "note_time" ], + "observation" : [ "observation_time" ], + "person" : [ "time_of_birth" ], + "procedure_cost" : [ "procedure_cost_id", "procedure_occurrence_id", "currency_concept_id", "paid_copay", "paid_coinsurance", "paid_toward_deductible", "paid_by_payer", "paid_by_coordination_benefits", "total_out_of_pocket", "total_paid", "revenue_code_concept_id", "payer_plan_period_id", "revenue_code_source_value" ], + "procedure_occurrence" : [ "qualifier_source_value" ], + "specimen" : [ "specimen_time" ], + "visit_cost" : [ "visit_cost_id", "visit_occurrence_id", "currency_concept_id", "paid_copay", "paid_coinsurance", "paid_toward_deductible", "paid_by_payer", "paid_by_coordination_benefits", "total_out_of_pocket", "total_paid", "payer_plan_period_id" ], + "visit_occurrence" : [ "visit_start_time", "visit_end_time" ] +} \ No newline at end of file diff --git a/src/main/resources/cdm/v5/diff_V5_0_1.json b/src/main/resources/cdm/v5/diff_V5_0_1.json new file mode 100644 index 00000000..dda9931a --- /dev/null +++ b/src/main/resources/cdm/v5/diff_V5_0_1.json @@ -0,0 +1,12 @@ +{ + "cost" : [ "cost_id", "cost_event_id", "cost_domain_id", "cost_type_concept_id", "currency_concept_id", "total_charge", "total_cost", "total_paid", "paid_by_payer", "paid_by_patient", "paid_patient_copay", "paid_patient_coinsurance", "paid_patient_deductible", "paid_by_primary", "paid_ingredient_cost", "paid_dispensing_fee", "payer_plan_period_id", "amount_allowed", "revenue_code_concept_id", "reveue_code_source_value" ], + "drug_exposure" : [ "effective_drug_dose", "dose_unit_concept_id" ], + "drug_strength" : [ "denominator_value" ], + "measurement" : [ "measurement_time" ], + "note" : [ "note_time" ], + "observation" : [ "observation_time" ], + "person" : [ "time_of_birth" ], + "procedure_occurrence" : [ "qualifier_source_value" ], + "specimen" : [ "specimen_time" ], + "visit_occurrence" : [ "visit_start_time", "visit_end_time" ] +} \ No newline at end of file diff --git a/src/main/resources/cdm/v5/diff_V5_1.json b/src/main/resources/cdm/v5/diff_V5_1.json new file mode 100644 index 00000000..157630b2 --- /dev/null +++ b/src/main/resources/cdm/v5/diff_V5_1.json @@ -0,0 +1,16 @@ +{ + "condition_occurrence" : [ "condition_start_datetime", "condition_end_datetime" ], + "cost" : [ "cost_id", "cost_event_id", "cost_domain_id", "cost_type_concept_id", "currency_concept_id", "total_charge", "total_cost", "total_paid", "paid_by_payer", "paid_by_patient", "paid_patient_copay", "paid_patient_coinsurance", "paid_patient_deductible", "paid_by_primary", "paid_ingredient_cost", "paid_dispensing_fee", "payer_plan_period_id", "amount_allowed", "revenue_code_concept_id", "reveue_code_source_value" ], + "death" : [ "death_datetime" ], + "device_exposure" : [ "device_exposure_start_datetime", "device_exposure_end_datetime" ], + "drug_exposure" : [ "drug_exposure_start_datetime", "drug_exposure_end_datetime", "effective_drug_dose", "dose_unit_concept_id" ], + "drug_strength" : [ "denominator_value", "box_size" ], + "measurement" : [ "measurement_datetime" ], + "note" : [ "note_datetime" ], + "observation" : [ "observation_datetime" ], + "observation_period" : [ "observation_period_start_datetime", "observation_period_end_datetime" ], + "person" : [ "birth_datetime" ], + "procedure_occurrence" : [ "procedure_datetime", "qualifier_source_value" ], + "specimen" : [ "specimen_datetime" ], + "visit_occurrence" : [ "visit_start_datetime", "visit_end_datetime" ] +} \ No newline at end of file diff --git a/src/main/resources/cdm/v5/diff_V5_2.json b/src/main/resources/cdm/v5/diff_V5_2.json new file mode 100644 index 00000000..ae829e8c --- /dev/null +++ b/src/main/resources/cdm/v5/diff_V5_2.json @@ -0,0 +1,17 @@ +{ + "condition_occurrence" : [ "condition_start_datetime", "condition_end_datetime", "condition_status_source_value", "condition_status_concept_id" ], + "cost" : [ "cost_id", "cost_event_id", "cost_domain_id", "cost_type_concept_id", "currency_concept_id", "total_charge", "total_cost", "total_paid", "paid_by_payer", "paid_by_patient", "paid_patient_copay", "paid_patient_coinsurance", "paid_patient_deductible", "paid_by_primary", "paid_ingredient_cost", "paid_dispensing_fee", "payer_plan_period_id", "amount_allowed", "revenue_code_concept_id", "reveue_code_source_value", "drg_concept_id", "drg_source_value" ], + "death" : [ "death_datetime" ], + "device_exposure" : [ "device_exposure_start_datetime", "device_exposure_end_datetime" ], + "drug_exposure" : [ "drug_exposure_start_datetime", "drug_exposure_end_datetime", "verbatim_end_date" ], + "drug_strength" : [ "denominator_value", "box_size" ], + "measurement" : [ "measurement_datetime" ], + "note" : [ "note_datetime", "note_class_concept_id", "note_title", "encoding_concept_id", "language_concept_id" ], + "note_nlp" : [ "note_nlp_id", "note_id", "section_concept_id", "snippet", "offset", "lexical_variant", "note_nlp_concept_id", "note_nlp_source_concept_id", "nlp_system", "nlp_date", "nlp_datetime", "term_exists", "term_temporal", "term_modifiers" ], + "observation" : [ "observation_datetime" ], + "observation_period" : [ "observation_period_start_datetime", "observation_period_end_datetime" ], + "person" : [ "birth_datetime" ], + "procedure_occurrence" : [ "procedure_datetime", "qualifier_source_value" ], + "specimen" : [ "specimen_datetime" ], + "visit_occurrence" : [ "visit_start_datetime", "visit_end_datetime", "admitting_source_concept_id", "admitting_source_value", "discharge_to_concept_id", "discharge_to_source_value", "preceding_visit_occurrence_id" ] +} \ No newline at end of file diff --git a/src/main/resources/cdm/v5/diff_V5_3.json b/src/main/resources/cdm/v5/diff_V5_3.json new file mode 100644 index 00000000..4f5c8da6 --- /dev/null +++ b/src/main/resources/cdm/v5/diff_V5_3.json @@ -0,0 +1,19 @@ +{ + "condition_occurrence" : [ "condition_start_datetime", "condition_end_datetime", "visit_detail_id", "condition_status_source_value", "condition_status_concept_id" ], + "cost" : [ "cost_id", "cost_event_id", "cost_domain_id", "cost_type_concept_id", "currency_concept_id", "total_charge", "total_cost", "total_paid", "paid_by_payer", "paid_by_patient", "paid_patient_copay", "paid_patient_coinsurance", "paid_patient_deductible", "paid_by_primary", "paid_ingredient_cost", "paid_dispensing_fee", "payer_plan_period_id", "amount_allowed", "revenue_code_concept_id", "reveue_code_source_value", "drg_concept_id", "drg_source_value" ], + "death" : [ "death_datetime" ], + "device_exposure" : [ "device_exposure_start_datetime", "device_exposure_end_datetime", "visit_detail_id" ], + "drug_exposure" : [ "drug_exposure_start_datetime", "drug_exposure_end_datetime", "verbatim_end_date", "visit_detail_id" ], + "drug_strength" : [ "denominator_value", "box_size" ], + "measurement" : [ "measurement_time", "measurement_datetime", "visit_detail_id" ], + "metadata" : [ "metadata_concept_id", "metadata_type_concept_id", "name", "value_as_string", "value_as_concept_id", "metadata_date", "metadata_datetime" ], + "note" : [ "note_datetime", "note_class_concept_id", "note_title", "encoding_concept_id", "language_concept_id", "visit_detail_id" ], + "note_nlp" : [ "note_nlp_id", "note_id", "section_concept_id", "snippet", "offset", "lexical_variant", "note_nlp_concept_id", "note_nlp_source_concept_id", "nlp_system", "nlp_date", "nlp_datetime", "term_exists", "term_temporal", "term_modifiers" ], + "observation" : [ "observation_datetime", "visit_detail_id" ], + "payer_plan_period" : [ "payer_concept_id", "payer_source_concept_id", "plan_concept_id", "plan_source_concept_id", "sponsor_concept_id", "sponsor_source_value", "sponsor_source_concept_id", "stop_reason_concept_id", "stop_reason_source_value", "stop_reason_source_concept_id" ], + "person" : [ "birth_datetime" ], + "procedure_occurrence" : [ "procedure_datetime", "visit_detail_id", "modifier_source_value" ], + "specimen" : [ "specimen_datetime" ], + "visit_detail" : [ "visit_detail_id", "person_id", "visit_detail_concept_id", "visit_start_date", "visit_start_datetime", "visit_end_date", "visit_end_datetime", "visit_type_concept_id", "provider_id", "care_site_id", "admitting_source_concept_id", "discharge_to_concept_id", "preceding_visit_detail_id", "visit_source_value", "visit_source_concept_id", "admitting_source_value", "discharge_to_source_value", "visit_detail_parent_id", "visit_occurrence_id" ], + "visit_occurrence" : [ "visit_start_datetime", "visit_end_datetime", "admitting_source_concept_id", "admitting_source_value", "discharge_to_concept_id", "discharge_to_source_value", "preceding_visit_occurrence_id" ] +} \ No newline at end of file diff --git a/src/main/resources/cdm/v5/diff_V5_3_1.json b/src/main/resources/cdm/v5/diff_V5_3_1.json new file mode 100644 index 00000000..cb42313c --- /dev/null +++ b/src/main/resources/cdm/v5/diff_V5_3_1.json @@ -0,0 +1,19 @@ +{ + "condition_occurrence" : [ "condition_start_datetime", "condition_end_datetime", "visit_detail_id", "condition_status_source_value", "condition_status_concept_id" ], + "cost" : [ "cost_id", "cost_event_id", "cost_domain_id", "cost_type_concept_id", "currency_concept_id", "total_charge", "total_cost", "total_paid", "paid_by_payer", "paid_by_patient", "paid_patient_copay", "paid_patient_coinsurance", "paid_patient_deductible", "paid_by_primary", "paid_ingredient_cost", "paid_dispensing_fee", "payer_plan_period_id", "amount_allowed", "revenue_code_concept_id", "reveue_code_source_value", "drg_concept_id", "drg_source_value" ], + "death" : [ "death_datetime" ], + "device_exposure" : [ "device_exposure_start_datetime", "device_exposure_end_datetime", "visit_detail_id" ], + "drug_exposure" : [ "drug_exposure_start_datetime", "drug_exposure_end_datetime", "verbatim_end_date", "visit_detail_id" ], + "drug_strength" : [ "denominator_value", "box_size" ], + "measurement" : [ "measurement_datetime", "measurement_time", "visit_detail_id" ], + "metadata" : [ "metadata_concept_id", "metadata_type_concept_id", "name", "value_as_string", "value_as_concept_id", "metadata_date", "metadata_datetime" ], + "note" : [ "note_datetime", "note_class_concept_id", "note_title", "encoding_concept_id", "language_concept_id", "visit_detail_id" ], + "note_nlp" : [ "note_nlp_id", "note_id", "section_concept_id", "snippet", "offset", "lexical_variant", "note_nlp_concept_id", "note_nlp_source_concept_id", "nlp_system", "nlp_date", "nlp_datetime", "term_exists", "term_temporal", "term_modifiers" ], + "observation" : [ "observation_datetime", "visit_detail_id" ], + "payer_plan_period" : [ "payer_concept_id", "payer_source_concept_id", "plan_concept_id", "plan_source_concept_id", "sponsor_concept_id", "sponsor_source_value", "sponsor_source_concept_id", "stop_reason_concept_id", "stop_reason_source_value", "stop_reason_source_concept_id" ], + "person" : [ "birth_datetime" ], + "procedure_occurrence" : [ "procedure_datetime", "visit_detail_id", "modifier_source_value" ], + "specimen" : [ "specimen_datetime" ], + "visit_detail" : [ "visit_detail_id", "person_id", "visit_detail_concept_id", "visit_detail_start_date", "visit_detail_start_datetime", "visit_detail_end_date", "visit_detail_end_datetime", "visit_detail_type_concept_id", "provider_id", "care_site_id", "admitting_source_concept_id", "discharge_to_concept_id", "preceding_visit_detail_id", "visit_detail_source_value", "visit_detail_source_concept_id", "admitting_source_value", "discharge_to_source_value", "visit_detail_parent_id", "visit_occurrence_id" ], + "visit_occurrence" : [ "visit_start_datetime", "visit_end_datetime", "admitting_source_concept_id", "admitting_source_value", "discharge_to_concept_id", "discharge_to_source_value", "preceding_visit_occurrence_id" ] +} \ No newline at end of file diff --git a/src/main/resources/cdm/v5/tableCheck_V5_0.sql b/src/main/resources/cdm/v5/tableCheck_V5_0.sql deleted file mode 100644 index 58dff545..00000000 --- a/src/main/resources/cdm/v5/tableCheck_V5_0.sql +++ /dev/null @@ -1,79 +0,0 @@ -SELECT TOP 1 attribute_definition_id, attribute_name, attribute_description, attribute_type_concept_id, attribute_syntax -FROM @cdm_schema.attribute_definition; -SELECT TOP 1 care_site_id, care_site_name, place_of_service_concept_id, location_id, care_site_source_value, place_of_service_source_value -FROM @cdm_schema.care_site; -SELECT TOP 1 cdm_source_name, cdm_source_abbreviation, cdm_holder, source_description, source_documentation_reference, cdm_etl_reference, source_release_date, cdm_release_date, cdm_version, vocabulary_version -FROM @cdm_schema.cdm_source; -SELECT TOP 1 cohort_definition_id, subject_id, cohort_start_date, cohort_end_date -FROM @cdm_schema.cohort; -SELECT TOP 1 cohort_definition_id, cohort_start_date, cohort_end_date, subject_id, attribute_definition_id, value_as_number, value_as_concept_id -FROM @cdm_schema.cohort_attribute; -SELECT TOP 1 cohort_definition_id, cohort_definition_name, cohort_definition_description, definition_type_concept_id, cohort_definition_syntax, subject_concept_id --- , cohort_initiation_date -- due to unclarity in https://github.com/OHDSI/CommonDataModel/issues/178 -FROM @cdm_schema.cohort_definition; -SELECT TOP 1 concept_id, concept_name, domain_id, vocabulary_id, concept_class_id, standard_concept, concept_code, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.concept; -SELECT TOP 1 ancestor_concept_id, descendant_concept_id, min_levels_of_separation, max_levels_of_separation -FROM @cdm_schema.concept_ancestor; -SELECT TOP 1 concept_class_id, concept_class_name, concept_class_concept_id -FROM @cdm_schema.concept_class; -SELECT TOP 1 concept_id_1, concept_id_2, relationship_id, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.concept_relationship; -SELECT TOP 1 concept_id, concept_synonym_name, language_concept_id -FROM @cdm_schema.concept_synonym; -SELECT TOP 1 condition_era_id, person_id, condition_concept_id, condition_era_start_date, condition_era_end_date, condition_occurrence_count -FROM @cdm_schema.condition_era; -SELECT TOP 1 condition_occurrence_id, person_id, condition_concept_id, condition_start_date, condition_end_date, condition_type_concept_id, stop_reason, provider_id, visit_occurrence_id, condition_source_value, condition_source_concept_id -FROM @cdm_schema.condition_occurrence; -SELECT TOP 1 person_id, death_date, death_type_concept_id, cause_concept_id, cause_source_value, cause_source_concept_id -FROM @cdm_schema.death; -SELECT TOP 1 device_cost_id, device_exposure_id, currency_concept_id, paid_copay, paid_coinsurance, paid_toward_deductible, paid_by_payer, paid_by_coordination_benefits, total_out_of_pocket, total_paid, payer_plan_period_id -FROM @cdm_schema.device_cost; -SELECT TOP 1 device_exposure_id, person_id, device_concept_id, device_exposure_start_date, device_exposure_end_date, device_type_concept_id, unique_device_id, quantity, provider_id, visit_occurrence_id, device_source_value, device_source_concept_id -FROM @cdm_schema.device_exposure; -SELECT TOP 1 domain_id, domain_name, domain_concept_id -FROM @cdm_schema."domain"; -SELECT TOP 1 dose_era_id, person_id, drug_concept_id, unit_concept_id, dose_value, dose_era_start_date, dose_era_end_date -FROM @cdm_schema.dose_era; -SELECT TOP 1 drug_cost_id, drug_exposure_id, currency_concept_id, paid_copay, paid_coinsurance, paid_toward_deductible, paid_by_payer, paid_by_coordination_benefits, total_out_of_pocket, total_paid, ingredient_cost, dispensing_fee, average_wholesale_price, payer_plan_period_id -FROM @cdm_schema.drug_cost; -SELECT TOP 1 drug_era_id, person_id, drug_concept_id, drug_era_start_date, drug_era_end_date, drug_exposure_count, gap_days -FROM @cdm_schema.drug_era; -SELECT TOP 1 drug_exposure_id, person_id, drug_concept_id, drug_exposure_start_date, drug_exposure_end_date, drug_type_concept_id, stop_reason, refills, quantity, days_supply, sig, route_concept_id, effective_drug_dose, dose_unit_concept_id, lot_number, provider_id, visit_occurrence_id, drug_source_value, drug_source_concept_id, route_source_value, dose_unit_source_value -FROM @cdm_schema.drug_exposure; -SELECT TOP 1 drug_concept_id, ingredient_concept_id, amount_value, amount_unit_concept_id, numerator_value, numerator_unit_concept_id, denominator_unit_concept_id, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.drug_strength; -SELECT TOP 1 domain_concept_id_1, fact_id_1, domain_concept_id_2, fact_id_2, relationship_concept_id -FROM @cdm_schema.fact_relationship; -SELECT TOP 1 location_id, address_1, address_2, city, state, zip, county, location_source_value -FROM @cdm_schema.location; -SELECT TOP 1 measurement_id, person_id, measurement_concept_id, measurement_date, measurement_time, measurement_type_concept_id, operator_concept_id, value_as_number, value_as_concept_id, unit_concept_id, range_low, range_high, provider_id, visit_occurrence_id, measurement_source_value, measurement_source_concept_id, unit_source_value, value_source_value -FROM @cdm_schema.measurement; -SELECT TOP 1 note_id, person_id, note_date, note_time, note_type_concept_id, note_text, provider_id, visit_occurrence_id, note_source_value -FROM @cdm_schema.note; -SELECT TOP 1 observation_id, person_id, observation_concept_id, observation_date, observation_time, observation_type_concept_id, value_as_number, value_as_string, value_as_concept_id, qualifier_concept_id, unit_concept_id, provider_id, visit_occurrence_id, observation_source_value, observation_source_concept_id, unit_source_value, qualifier_source_value -FROM @cdm_schema.observation; -SELECT TOP 1 observation_period_id, person_id, observation_period_start_date, observation_period_end_date, period_type_concept_id -FROM @cdm_schema.observation_period; -SELECT TOP 1 payer_plan_period_id, person_id, payer_plan_period_start_date, payer_plan_period_end_date, payer_source_value, plan_source_value, family_source_value -FROM @cdm_schema.payer_plan_period; -SELECT TOP 1 person_id, gender_concept_id, year_of_birth, month_of_birth, day_of_birth, time_of_birth, race_concept_id, ethnicity_concept_id, location_id, provider_id, care_site_id, person_source_value, gender_source_value, gender_source_concept_id, race_source_value, race_source_concept_id, ethnicity_source_value, ethnicity_source_concept_id -FROM @cdm_schema.person; -SELECT TOP 1 procedure_cost_id, procedure_occurrence_id, currency_concept_id, paid_copay, paid_coinsurance, paid_toward_deductible, paid_by_payer, paid_by_coordination_benefits, total_out_of_pocket, total_paid, revenue_code_concept_id, payer_plan_period_id, revenue_code_source_value -FROM @cdm_schema.procedure_cost; -SELECT TOP 1 procedure_occurrence_id, person_id, procedure_concept_id, procedure_date, procedure_type_concept_id, modifier_concept_id, quantity, provider_id, visit_occurrence_id, procedure_source_value, procedure_source_concept_id, qualifier_source_value -FROM @cdm_schema.procedure_occurrence; -SELECT TOP 1 provider_id, provider_name, npi, dea, specialty_concept_id, care_site_id, year_of_birth, gender_concept_id, provider_source_value, specialty_source_value, specialty_source_concept_id, gender_source_value, gender_source_concept_id -FROM @cdm_schema.provider; -SELECT TOP 1 relationship_id, relationship_name, is_hierarchical, defines_ancestry, reverse_relationship_id, relationship_concept_id -FROM @cdm_schema.relationship; -SELECT TOP 1 source_code, source_concept_id, source_vocabulary_id, source_code_description, target_concept_id, target_vocabulary_id, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.source_to_concept_map; -SELECT TOP 1 specimen_id, person_id, specimen_concept_id, specimen_type_concept_id, specimen_date, specimen_time, quantity, unit_concept_id, anatomic_site_concept_id, disease_status_concept_id, specimen_source_id, specimen_source_value, unit_source_value, anatomic_site_source_value, disease_status_source_value -FROM @cdm_schema.specimen; -SELECT TOP 1 visit_cost_id, visit_occurrence_id, currency_concept_id, paid_copay, paid_coinsurance, paid_toward_deductible, paid_by_payer, paid_by_coordination_benefits, total_out_of_pocket, total_paid, payer_plan_period_id -FROM @cdm_schema.visit_cost; -SELECT TOP 1 visit_occurrence_id, person_id, visit_concept_id, visit_start_date, visit_start_time, visit_end_date, visit_end_time, visit_type_concept_id, provider_id, care_site_id, visit_source_value, visit_source_concept_id -FROM @cdm_schema.visit_occurrence; -SELECT TOP 1 vocabulary_id, vocabulary_name, vocabulary_reference, vocabulary_version, vocabulary_concept_id -FROM @cdm_schema.vocabulary; \ No newline at end of file diff --git a/src/main/resources/cdm/v5/tableCheck_V5_0_1.sql b/src/main/resources/cdm/v5/tableCheck_V5_0_1.sql deleted file mode 100644 index 3f11836a..00000000 --- a/src/main/resources/cdm/v5/tableCheck_V5_0_1.sql +++ /dev/null @@ -1,74 +0,0 @@ -SELECT TOP 1 attribute_definition_id, attribute_name, attribute_description, attribute_type_concept_id, attribute_syntax -FROM @cdm_schema.attribute_definition; -SELECT TOP 1 care_site_id, care_site_name, place_of_service_concept_id, location_id, care_site_source_value, place_of_service_source_value -FROM @cdm_schema.care_site; -SELECT TOP 1 cdm_source_name, cdm_source_abbreviation, cdm_holder, source_description, source_documentation_reference, cdm_etl_reference, source_release_date, cdm_release_date, cdm_version, vocabulary_version -FROM @cdm_schema.cdm_source; -SELECT TOP 1 cohort_definition_id, subject_id, cohort_start_date, cohort_end_date -FROM @cdm_schema.cohort; -SELECT TOP 1 cohort_definition_id, cohort_start_date, cohort_end_date, subject_id, attribute_definition_id, value_as_number, value_as_concept_id -FROM @cdm_schema.cohort_attribute; -SELECT TOP 1 cohort_definition_id, cohort_definition_name, cohort_definition_description, definition_type_concept_id, cohort_definition_syntax, subject_concept_id --- , cohort_initiation_date -- due to unclarity in https://github.com/OHDSI/CommonDataModel/issues/178 -FROM @cdm_schema.cohort_definition; -SELECT TOP 1 concept_id, concept_name, domain_id, vocabulary_id, concept_class_id, standard_concept, concept_code, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.concept; -SELECT TOP 1 ancestor_concept_id, descendant_concept_id, min_levels_of_separation, max_levels_of_separation -FROM @cdm_schema.concept_ancestor; -SELECT TOP 1 concept_class_id, concept_class_name, concept_class_concept_id -FROM @cdm_schema.concept_class; -SELECT TOP 1 concept_id_1, concept_id_2, relationship_id, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.concept_relationship; -SELECT TOP 1 concept_id, concept_synonym_name, language_concept_id -FROM @cdm_schema.concept_synonym; -SELECT TOP 1 condition_era_id, person_id, condition_concept_id, condition_era_start_date, condition_era_end_date, condition_occurrence_count -FROM @cdm_schema.condition_era; -SELECT TOP 1 condition_occurrence_id, person_id, condition_concept_id, condition_start_date, condition_end_date, condition_type_concept_id, stop_reason, provider_id, visit_occurrence_id, condition_source_value, condition_source_concept_id -FROM @cdm_schema.condition_occurrence; -SELECT TOP 1 cost_id, cost_event_id, cost_domain_id, cost_type_concept_id, currency_concept_id, total_charge, total_cost, total_paid, paid_by_payer, paid_by_patient, paid_patient_copay, paid_patient_coinsurance, paid_patient_deductible, paid_by_primary, paid_ingredient_cost, paid_dispensing_fee, payer_plan_period_id, amount_allowed, revenue_code_concept_id --- , reveue_code_source_value -FROM @cdm_schema.cost; -SELECT TOP 1 person_id, death_date, death_type_concept_id, cause_concept_id, cause_source_value, cause_source_concept_id -FROM @cdm_schema.death; -SELECT TOP 1 device_exposure_id, person_id, device_concept_id, device_exposure_start_date, device_exposure_end_date, device_type_concept_id, unique_device_id, quantity, provider_id, visit_occurrence_id, device_source_value, device_source_concept_id -FROM @cdm_schema.device_exposure; -SELECT TOP 1 domain_id, domain_name, domain_concept_id -FROM @cdm_schema."domain"; -SELECT TOP 1 dose_era_id, person_id, drug_concept_id, unit_concept_id, dose_value, dose_era_start_date, dose_era_end_date -FROM @cdm_schema.dose_era; -SELECT TOP 1 drug_era_id, person_id, drug_concept_id, drug_era_start_date, drug_era_end_date, drug_exposure_count, gap_days -FROM @cdm_schema.drug_era; -SELECT TOP 1 drug_exposure_id, person_id, drug_concept_id, drug_exposure_start_date, drug_exposure_end_date, drug_type_concept_id, stop_reason, refills, quantity, days_supply, sig, route_concept_id, effective_drug_dose, dose_unit_concept_id, lot_number, provider_id, visit_occurrence_id, drug_source_value, drug_source_concept_id, route_source_value, dose_unit_source_value -FROM @cdm_schema.drug_exposure; -SELECT TOP 1 drug_concept_id, ingredient_concept_id, amount_value, amount_unit_concept_id, numerator_value, numerator_unit_concept_id, denominator_value, denominator_unit_concept_id, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.drug_strength; -SELECT TOP 1 domain_concept_id_1, fact_id_1, domain_concept_id_2, fact_id_2, relationship_concept_id -FROM @cdm_schema.fact_relationship; -SELECT TOP 1 location_id, address_1, address_2, city, state, zip, county, location_source_value -FROM @cdm_schema.location; -SELECT TOP 1 measurement_id, person_id, measurement_concept_id, measurement_date, measurement_time, measurement_type_concept_id, operator_concept_id, value_as_number, value_as_concept_id, unit_concept_id, range_low, range_high, provider_id, visit_occurrence_id, measurement_source_value, measurement_source_concept_id, unit_source_value, value_source_value -FROM @cdm_schema.measurement; -SELECT TOP 1 note_id, person_id, note_date, note_time, note_type_concept_id, note_text, provider_id, visit_occurrence_id, note_source_value -FROM @cdm_schema.note; -SELECT TOP 1 observation_id, person_id, observation_concept_id, observation_date, observation_time, observation_type_concept_id, value_as_number, value_as_string, value_as_concept_id, qualifier_concept_id, unit_concept_id, provider_id, visit_occurrence_id, observation_source_value, observation_source_concept_id, unit_source_value, qualifier_source_value -FROM @cdm_schema.observation; -SELECT TOP 1 observation_period_id, person_id, observation_period_start_date, observation_period_end_date, period_type_concept_id -FROM @cdm_schema.observation_period; -SELECT TOP 1 payer_plan_period_id, person_id, payer_plan_period_start_date, payer_plan_period_end_date, payer_source_value, plan_source_value, family_source_value -FROM @cdm_schema.payer_plan_period; -SELECT TOP 1 person_id, gender_concept_id, year_of_birth, month_of_birth, day_of_birth, time_of_birth, race_concept_id, ethnicity_concept_id, location_id, provider_id, care_site_id, person_source_value, gender_source_value, gender_source_concept_id, race_source_value, race_source_concept_id, ethnicity_source_value, ethnicity_source_concept_id -FROM @cdm_schema.person; -SELECT TOP 1 procedure_occurrence_id, person_id, procedure_concept_id, procedure_date, procedure_type_concept_id, modifier_concept_id, quantity, provider_id, visit_occurrence_id, procedure_source_value, procedure_source_concept_id, qualifier_source_value -FROM @cdm_schema.procedure_occurrence; -SELECT TOP 1 provider_id, provider_name, npi, dea, specialty_concept_id, care_site_id, year_of_birth, gender_concept_id, provider_source_value, specialty_source_value, specialty_source_concept_id, gender_source_value, gender_source_concept_id -FROM @cdm_schema.provider; -SELECT TOP 1 relationship_id, relationship_name, is_hierarchical, defines_ancestry, reverse_relationship_id, relationship_concept_id -FROM @cdm_schema.relationship; -SELECT TOP 1 source_code, source_concept_id, source_vocabulary_id, source_code_description, target_concept_id, target_vocabulary_id, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.source_to_concept_map; -SELECT TOP 1 specimen_id, person_id, specimen_concept_id, specimen_type_concept_id, specimen_date, specimen_time, quantity, unit_concept_id, anatomic_site_concept_id, disease_status_concept_id, specimen_source_id, specimen_source_value, unit_source_value, anatomic_site_source_value, disease_status_source_value -FROM @cdm_schema.specimen; -SELECT TOP 1 visit_occurrence_id, person_id, visit_concept_id, visit_start_date, visit_start_time, visit_end_date, visit_end_time, visit_type_concept_id, provider_id, care_site_id, visit_source_value, visit_source_concept_id -FROM @cdm_schema.visit_occurrence; -SELECT TOP 1 vocabulary_id, vocabulary_name, vocabulary_reference, vocabulary_version, vocabulary_concept_id -FROM @cdm_schema.vocabulary; diff --git a/src/main/resources/cdm/v5/tableCheck_V5_1.sql b/src/main/resources/cdm/v5/tableCheck_V5_1.sql deleted file mode 100644 index 6c821908..00000000 --- a/src/main/resources/cdm/v5/tableCheck_V5_1.sql +++ /dev/null @@ -1,74 +0,0 @@ -SELECT TOP 1 attribute_definition_id, attribute_name, attribute_description, attribute_type_concept_id, attribute_syntax -FROM @cdm_schema.attribute_definition; -SELECT TOP 1 care_site_id, care_site_name, place_of_service_concept_id, location_id, care_site_source_value, place_of_service_source_value -FROM @cdm_schema.care_site; -SELECT TOP 1 cdm_source_name, cdm_source_abbreviation, cdm_holder, source_description, source_documentation_reference, cdm_etl_reference, source_release_date, cdm_release_date, cdm_version, vocabulary_version -FROM @cdm_schema.cdm_source; -SELECT TOP 1 cohort_definition_id, subject_id, cohort_start_date, cohort_end_date -FROM @cdm_schema.cohort; -SELECT TOP 1 cohort_definition_id, cohort_start_date, cohort_end_date, subject_id, attribute_definition_id, value_as_number, value_as_concept_id -FROM @cdm_schema.cohort_attribute; -SELECT TOP 1 cohort_definition_id, cohort_definition_name, cohort_definition_description, definition_type_concept_id, cohort_definition_syntax, subject_concept_id --- , cohort_initiation_date -- due to unclarity in https://github.com/OHDSI/CommonDataModel/issues/178 -FROM @cdm_schema.cohort_definition; -SELECT TOP 1 concept_id, concept_name, domain_id, vocabulary_id, concept_class_id, standard_concept, concept_code, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.concept; -SELECT TOP 1 ancestor_concept_id, descendant_concept_id, min_levels_of_separation, max_levels_of_separation -FROM @cdm_schema.concept_ancestor; -SELECT TOP 1 concept_class_id, concept_class_name, concept_class_concept_id -FROM @cdm_schema.concept_class; -SELECT TOP 1 concept_id_1, concept_id_2, relationship_id, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.concept_relationship; -SELECT TOP 1 concept_id, concept_synonym_name, language_concept_id -FROM @cdm_schema.concept_synonym; -SELECT TOP 1 condition_era_id, person_id, condition_concept_id, condition_era_start_date, condition_era_end_date, condition_occurrence_count -FROM @cdm_schema.condition_era; -SELECT TOP 1 condition_occurrence_id, person_id, condition_concept_id, condition_start_date, condition_start_datetime, condition_end_date, condition_end_datetime, condition_type_concept_id, stop_reason, provider_id, visit_occurrence_id, condition_source_value, condition_source_concept_id -FROM @cdm_schema.condition_occurrence; -SELECT TOP 1 cost_id, cost_event_id, cost_domain_id, cost_type_concept_id, currency_concept_id, total_charge, total_cost, total_paid, paid_by_payer, paid_by_patient, paid_patient_copay, paid_patient_coinsurance, paid_patient_deductible, paid_by_primary, paid_ingredient_cost, paid_dispensing_fee, payer_plan_period_id, amount_allowed, revenue_code_concept_id ---, reveue_code_source_value -FROM @cdm_schema.cost; -SELECT TOP 1 person_id, death_date, death_datetime, death_type_concept_id, cause_concept_id, cause_source_value, cause_source_concept_id -FROM @cdm_schema.death; -SELECT TOP 1 device_exposure_id, person_id, device_concept_id, device_exposure_start_date, device_exposure_start_datetime, device_exposure_end_date, device_exposure_end_datetime, device_type_concept_id, unique_device_id, quantity, provider_id, visit_occurrence_id, device_source_value, device_source_concept_id -FROM @cdm_schema.device_exposure; -SELECT TOP 1 domain_id, domain_name, domain_concept_id -FROM @cdm_schema."domain"; -SELECT TOP 1 dose_era_id, person_id, drug_concept_id, unit_concept_id, dose_value, dose_era_start_date, dose_era_end_date -FROM @cdm_schema.dose_era; -SELECT TOP 1 drug_era_id, person_id, drug_concept_id, drug_era_start_date, drug_era_end_date, drug_exposure_count, gap_days -FROM @cdm_schema.drug_era; -SELECT TOP 1 drug_exposure_id, person_id, drug_concept_id, drug_exposure_start_date, drug_exposure_start_datetime, drug_exposure_end_date, drug_exposure_end_datetime, drug_type_concept_id, stop_reason, refills, quantity, days_supply, sig, route_concept_id, effective_drug_dose, dose_unit_concept_id, lot_number, provider_id, visit_occurrence_id, drug_source_value, drug_source_concept_id, route_source_value, dose_unit_source_value -FROM @cdm_schema.drug_exposure; -SELECT TOP 1 drug_concept_id, ingredient_concept_id, amount_value, amount_unit_concept_id, numerator_value, numerator_unit_concept_id, denominator_value, denominator_unit_concept_id, box_size, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.drug_strength; -SELECT TOP 1 domain_concept_id_1, fact_id_1, domain_concept_id_2, fact_id_2, relationship_concept_id -FROM @cdm_schema.fact_relationship; -SELECT TOP 1 location_id, address_1, address_2, city, state, zip, county, location_source_value -FROM @cdm_schema.location; -SELECT TOP 1 measurement_id, person_id, measurement_concept_id, measurement_date, measurement_datetime, measurement_type_concept_id, operator_concept_id, value_as_number, value_as_concept_id, unit_concept_id, range_low, range_high, provider_id, visit_occurrence_id, measurement_source_value, measurement_source_concept_id, unit_source_value, value_source_value -FROM @cdm_schema.measurement; -SELECT TOP 1 note_id, person_id, note_date, note_datetime, note_type_concept_id, note_text, provider_id, visit_occurrence_id, note_source_value -FROM @cdm_schema.note; -SELECT TOP 1 observation_id, person_id, observation_concept_id, observation_date, observation_datetime, observation_type_concept_id, value_as_number, value_as_string, value_as_concept_id, qualifier_concept_id, unit_concept_id, provider_id, visit_occurrence_id, observation_source_value, observation_source_concept_id, unit_source_value, qualifier_source_value -FROM @cdm_schema.observation; -SELECT TOP 1 observation_period_id, person_id, observation_period_start_date, observation_period_start_datetime, observation_period_end_date, observation_period_end_datetime, period_type_concept_id -FROM @cdm_schema.observation_period; -SELECT TOP 1 payer_plan_period_id, person_id, payer_plan_period_start_date, payer_plan_period_end_date, payer_source_value, plan_source_value, family_source_value -FROM @cdm_schema.payer_plan_period; -SELECT TOP 1 person_id, gender_concept_id, year_of_birth, month_of_birth, day_of_birth, birth_datetime, race_concept_id, ethnicity_concept_id, location_id, provider_id, care_site_id, person_source_value, gender_source_value, gender_source_concept_id, race_source_value, race_source_concept_id, ethnicity_source_value, ethnicity_source_concept_id -FROM @cdm_schema.person; -SELECT TOP 1 procedure_occurrence_id, person_id, procedure_concept_id, procedure_date, procedure_datetime, procedure_type_concept_id, modifier_concept_id, quantity, provider_id, visit_occurrence_id, procedure_source_value, procedure_source_concept_id, qualifier_source_value -FROM @cdm_schema.procedure_occurrence; -SELECT TOP 1 provider_id, provider_name, npi, dea, specialty_concept_id, care_site_id, year_of_birth, gender_concept_id, provider_source_value, specialty_source_value, specialty_source_concept_id, gender_source_value, gender_source_concept_id -FROM @cdm_schema.provider; -SELECT TOP 1 relationship_id, relationship_name, is_hierarchical, defines_ancestry, reverse_relationship_id, relationship_concept_id -FROM @cdm_schema.relationship; -SELECT TOP 1 source_code, source_concept_id, source_vocabulary_id, source_code_description, target_concept_id, target_vocabulary_id, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.source_to_concept_map; -SELECT TOP 1 specimen_id, person_id, specimen_concept_id, specimen_type_concept_id, specimen_date, specimen_datetime, quantity, unit_concept_id, anatomic_site_concept_id, disease_status_concept_id, specimen_source_id, specimen_source_value, unit_source_value, anatomic_site_source_value, disease_status_source_value -FROM @cdm_schema.specimen; -SELECT TOP 1 visit_occurrence_id, person_id, visit_concept_id, visit_start_date, visit_start_datetime, visit_end_date, visit_end_datetime, visit_type_concept_id, provider_id, care_site_id, visit_source_value, visit_source_concept_id -FROM @cdm_schema.visit_occurrence; -SELECT TOP 1 vocabulary_id, vocabulary_name, vocabulary_reference, vocabulary_version, vocabulary_concept_id -FROM @cdm_schema.vocabulary; diff --git a/src/main/resources/cdm/v5/tableCheck_V5_2.sql b/src/main/resources/cdm/v5/tableCheck_V5_2.sql deleted file mode 100644 index 73c77d9f..00000000 --- a/src/main/resources/cdm/v5/tableCheck_V5_2.sql +++ /dev/null @@ -1,76 +0,0 @@ -SELECT TOP 1 attribute_definition_id, attribute_name, attribute_description, attribute_type_concept_id, attribute_syntax -FROM @cdm_schema.attribute_definition; -SELECT TOP 1 care_site_id, care_site_name, place_of_service_concept_id, location_id, care_site_source_value, place_of_service_source_value -FROM @cdm_schema.care_site; -SELECT TOP 1 cdm_source_name, cdm_source_abbreviation, cdm_holder, source_description, source_documentation_reference, cdm_etl_reference, source_release_date, cdm_release_date, cdm_version, vocabulary_version -FROM @cdm_schema.cdm_source; -SELECT TOP 1 cohort_definition_id, subject_id, cohort_start_date, cohort_end_date -FROM @cdm_schema.cohort; -SELECT TOP 1 cohort_definition_id, cohort_start_date, cohort_end_date, subject_id, attribute_definition_id, value_as_number, value_as_concept_id -FROM @cdm_schema.cohort_attribute; -SELECT TOP 1 cohort_definition_id, cohort_definition_name, cohort_definition_description, definition_type_concept_id, cohort_definition_syntax, subject_concept_id --- , cohort_initiation_date -- due to unclarity in https://github.com/OHDSI/CommonDataModel/issues/178 -FROM @cdm_schema.cohort_definition; -SELECT TOP 1 concept_id, concept_name, domain_id, vocabulary_id, concept_class_id, standard_concept, concept_code, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.concept; -SELECT TOP 1 ancestor_concept_id, descendant_concept_id, min_levels_of_separation, max_levels_of_separation -FROM @cdm_schema.concept_ancestor; -SELECT TOP 1 concept_class_id, concept_class_name, concept_class_concept_id -FROM @cdm_schema.concept_class; -SELECT TOP 1 concept_id_1, concept_id_2, relationship_id, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.concept_relationship; -SELECT TOP 1 concept_id, concept_synonym_name, language_concept_id -FROM @cdm_schema.concept_synonym; -SELECT TOP 1 condition_era_id, person_id, condition_concept_id, condition_era_start_date, condition_era_end_date, condition_occurrence_count -FROM @cdm_schema.condition_era; -SELECT TOP 1 condition_occurrence_id, person_id, condition_concept_id, condition_start_date, condition_start_datetime, condition_end_date, condition_end_datetime, condition_type_concept_id, stop_reason, provider_id, visit_occurrence_id, condition_source_value, condition_source_concept_id, condition_status_source_value, condition_status_concept_id -FROM @cdm_schema.condition_occurrence; -SELECT TOP 1 cost_id, cost_event_id, cost_domain_id, cost_type_concept_id, currency_concept_id, total_charge, total_cost, total_paid, paid_by_payer, paid_by_patient, paid_patient_copay, paid_patient_coinsurance, paid_patient_deductible, paid_by_primary, paid_ingredient_cost, paid_dispensing_fee, payer_plan_period_id, amount_allowed, revenue_code_concept_id, drg_concept_id, drg_source_value ---, reveue_code_source_value -FROM @cdm_schema.cost; -SELECT TOP 1 person_id, death_date, death_datetime, death_type_concept_id, cause_concept_id, cause_source_value, cause_source_concept_id -FROM @cdm_schema.death; -SELECT TOP 1 device_exposure_id, person_id, device_concept_id, device_exposure_start_date, device_exposure_start_datetime, device_exposure_end_date, device_exposure_end_datetime, device_type_concept_id, unique_device_id, quantity, provider_id, visit_occurrence_id, device_source_value, device_source_concept_id -FROM @cdm_schema.device_exposure; -SELECT TOP 1 domain_id, domain_name, domain_concept_id -FROM @cdm_schema."domain"; -SELECT TOP 1 dose_era_id, person_id, drug_concept_id, unit_concept_id, dose_value, dose_era_start_date, dose_era_end_date -FROM @cdm_schema.dose_era; -SELECT TOP 1 drug_era_id, person_id, drug_concept_id, drug_era_start_date, drug_era_end_date, drug_exposure_count, gap_days -FROM @cdm_schema.drug_era; -SELECT TOP 1 drug_exposure_id, person_id, drug_concept_id, drug_exposure_start_date, drug_exposure_start_datetime, drug_exposure_end_date, drug_exposure_end_datetime, verbatim_end_date, drug_type_concept_id, stop_reason, refills, quantity, days_supply, sig, route_concept_id, lot_number, provider_id, visit_occurrence_id, drug_source_value, drug_source_concept_id, route_source_value, dose_unit_source_value -FROM @cdm_schema.drug_exposure; -SELECT TOP 1 drug_concept_id, ingredient_concept_id, amount_value, amount_unit_concept_id, numerator_value, numerator_unit_concept_id, denominator_value, denominator_unit_concept_id, box_size, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.drug_strength; -SELECT TOP 1 domain_concept_id_1, fact_id_1, domain_concept_id_2, fact_id_2, relationship_concept_id -FROM @cdm_schema.fact_relationship; -SELECT TOP 1 location_id, address_1, address_2, city, state, zip, county, location_source_value -FROM @cdm_schema.location; -SELECT TOP 1 measurement_id, person_id, measurement_concept_id, measurement_date, measurement_datetime, measurement_type_concept_id, operator_concept_id, value_as_number, value_as_concept_id, unit_concept_id, range_low, range_high, provider_id, visit_occurrence_id, measurement_source_value, measurement_source_concept_id, unit_source_value, value_source_value -FROM @cdm_schema.measurement; -SELECT TOP 1 note_id, person_id, note_date, note_datetime, note_type_concept_id, note_class_concept_id, note_title, note_text, encoding_concept_id, language_concept_id, provider_id, visit_occurrence_id, note_source_value -FROM @cdm_schema.note; -SELECT TOP 1 note_nlp_id, note_id, section_concept_id, snippet, "offset", lexical_variant, note_nlp_concept_id, note_nlp_source_concept_id, nlp_system, nlp_date, nlp_datetime, term_exists, term_temporal, term_modifiers -FROM @cdm_schema.note_nlp; -SELECT TOP 1 observation_id, person_id, observation_concept_id, observation_date, observation_datetime, observation_type_concept_id, value_as_number, value_as_string, value_as_concept_id, qualifier_concept_id, unit_concept_id, provider_id, visit_occurrence_id, observation_source_value, observation_source_concept_id, unit_source_value, qualifier_source_value -FROM @cdm_schema.observation; -SELECT TOP 1 observation_period_id, person_id, observation_period_start_date, observation_period_end_date, period_type_concept_id -FROM @cdm_schema.observation_period; -SELECT TOP 1 payer_plan_period_id, person_id, payer_plan_period_start_date, payer_plan_period_end_date, payer_source_value, plan_source_value, family_source_value -FROM @cdm_schema.payer_plan_period; -SELECT TOP 1 person_id, gender_concept_id, year_of_birth, month_of_birth, day_of_birth, birth_datetime, race_concept_id, ethnicity_concept_id, location_id, provider_id, care_site_id, person_source_value, gender_source_value, gender_source_concept_id, race_source_value, race_source_concept_id, ethnicity_source_value, ethnicity_source_concept_id -FROM @cdm_schema.person; -SELECT TOP 1 procedure_occurrence_id, person_id, procedure_concept_id, procedure_date, procedure_datetime, procedure_type_concept_id, modifier_concept_id, quantity, provider_id, visit_occurrence_id, procedure_source_value, procedure_source_concept_id, qualifier_source_value -FROM @cdm_schema.procedure_occurrence; -SELECT TOP 1 provider_id, provider_name, npi, dea, specialty_concept_id, care_site_id, year_of_birth, gender_concept_id, provider_source_value, specialty_source_value, specialty_source_concept_id, gender_source_value, gender_source_concept_id -FROM @cdm_schema.provider; -SELECT TOP 1 relationship_id, relationship_name, is_hierarchical, defines_ancestry, reverse_relationship_id, relationship_concept_id -FROM @cdm_schema.relationship; -SELECT TOP 1 source_code, source_concept_id, source_vocabulary_id, source_code_description, target_concept_id, target_vocabulary_id, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.source_to_concept_map; -SELECT TOP 1 specimen_id, person_id, specimen_concept_id, specimen_type_concept_id, specimen_date, specimen_datetime, quantity, unit_concept_id, anatomic_site_concept_id, disease_status_concept_id, specimen_source_id, specimen_source_value, unit_source_value, anatomic_site_source_value, disease_status_source_value -FROM @cdm_schema.specimen; -SELECT TOP 1 visit_occurrence_id, person_id, visit_concept_id, visit_start_date, visit_start_datetime, visit_end_date, visit_end_datetime, visit_type_concept_id, provider_id, care_site_id, visit_source_value, visit_source_concept_id, admitting_source_concept_id, admitting_source_value, discharge_to_concept_id, discharge_to_source_value, preceding_visit_occurrence_id -FROM @cdm_schema.visit_occurrence; -SELECT TOP 1 vocabulary_id, vocabulary_name, vocabulary_reference, vocabulary_version, vocabulary_concept_id -FROM @cdm_schema.vocabulary; diff --git a/src/main/resources/cdm/v5/tableCheck_V5_3.sql b/src/main/resources/cdm/v5/tableCheck_V5_3.sql deleted file mode 100644 index d70f9efb..00000000 --- a/src/main/resources/cdm/v5/tableCheck_V5_3.sql +++ /dev/null @@ -1,80 +0,0 @@ -SELECT TOP 1 attribute_definition_id, attribute_name, attribute_description, attribute_type_concept_id, attribute_syntax -FROM @cdm_schema.attribute_definition; -SELECT TOP 1 care_site_id, care_site_name, place_of_service_concept_id, location_id, care_site_source_value, place_of_service_source_value -FROM @cdm_schema.care_site; -SELECT TOP 1 cdm_source_name, cdm_source_abbreviation, cdm_holder, source_description, source_documentation_reference, cdm_etl_reference, source_release_date, cdm_release_date, cdm_version, vocabulary_version -FROM @cdm_schema.cdm_source; -SELECT TOP 1 cohort_definition_id, subject_id, cohort_start_date, cohort_end_date -FROM @cdm_schema.cohort; -SELECT TOP 1 cohort_definition_id, subject_id, cohort_start_date, cohort_end_date, attribute_definition_id, value_as_number, value_as_concept_id -FROM @cdm_schema.cohort_attribute; -SELECT TOP 1 cohort_definition_id, cohort_definition_name, cohort_definition_description, definition_type_concept_id, cohort_definition_syntax, subject_concept_id --- , cohort_initiation_date -- due to unclarity in https://github.com/OHDSI/CommonDataModel/issues/178 -FROM @cdm_schema.cohort_definition; -SELECT TOP 1 concept_id, concept_name, domain_id, vocabulary_id, concept_class_id, standard_concept, concept_code, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.concept; -SELECT TOP 1 ancestor_concept_id, descendant_concept_id, min_levels_of_separation, max_levels_of_separation -FROM @cdm_schema.concept_ancestor; -SELECT TOP 1 concept_class_id, concept_class_name, concept_class_concept_id -FROM @cdm_schema.concept_class; -SELECT TOP 1 concept_id_1, concept_id_2, relationship_id, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.concept_relationship; -SELECT TOP 1 concept_id, concept_synonym_name, language_concept_id -FROM @cdm_schema.concept_synonym; -SELECT TOP 1 condition_era_id, person_id, condition_concept_id, condition_era_start_date, condition_era_end_date, condition_occurrence_count -FROM @cdm_schema.condition_era; -SELECT TOP 1 condition_occurrence_id, person_id, condition_concept_id, condition_start_date, condition_start_datetime, condition_end_date, condition_end_datetime, condition_type_concept_id, stop_reason, provider_id, visit_occurrence_id, visit_detail_id, condition_source_value, condition_source_concept_id, condition_status_source_value, condition_status_concept_id -FROM @cdm_schema.condition_occurrence; -SELECT TOP 1 cost_id, cost_event_id, cost_domain_id, cost_type_concept_id, currency_concept_id, total_charge, total_cost, total_paid, paid_by_payer, paid_by_patient, paid_patient_copay, paid_patient_coinsurance, paid_patient_deductible, paid_by_primary, paid_ingredient_cost, paid_dispensing_fee, payer_plan_period_id, amount_allowed, revenue_code_concept_id, drg_concept_id, drg_source_value --- ,reveue_code_source_value -FROM @cdm_schema.cost; -SELECT TOP 1 person_id, death_date, death_datetime, death_type_concept_id, cause_concept_id, cause_source_value, cause_source_concept_id -FROM @cdm_schema.death; -SELECT TOP 1 device_exposure_id, person_id, device_concept_id, device_exposure_start_date, device_exposure_start_datetime, device_exposure_end_date, device_exposure_end_datetime, device_type_concept_id, unique_device_id, quantity, provider_id, visit_occurrence_id, visit_detail_id, device_source_value, device_source_concept_id -FROM @cdm_schema.device_exposure; -SELECT TOP 1 domain_id, domain_name, domain_concept_id -FROM @cdm_schema."domain"; -SELECT TOP 1 dose_era_id, person_id, drug_concept_id, unit_concept_id, dose_value, dose_era_start_date, dose_era_end_date -FROM @cdm_schema.dose_era; -SELECT TOP 1 drug_era_id, person_id, drug_concept_id, drug_era_start_date, drug_era_end_date, drug_exposure_count, gap_days -FROM @cdm_schema.drug_era; -SELECT TOP 1 drug_exposure_id, person_id, drug_concept_id, drug_exposure_start_date, drug_exposure_start_datetime, drug_exposure_end_date, drug_exposure_end_datetime, verbatim_end_date, drug_type_concept_id, stop_reason, refills, quantity, days_supply, sig, route_concept_id, lot_number, provider_id, visit_occurrence_id, visit_detail_id, drug_source_value, drug_source_concept_id, route_source_value, dose_unit_source_value -FROM @cdm_schema.drug_exposure; -SELECT TOP 1 drug_concept_id, ingredient_concept_id, amount_value, amount_unit_concept_id, numerator_value, numerator_unit_concept_id, denominator_value, denominator_unit_concept_id, box_size, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.drug_strength; -SELECT TOP 1 domain_concept_id_1, fact_id_1, domain_concept_id_2, fact_id_2, relationship_concept_id -FROM @cdm_schema.fact_relationship; -SELECT TOP 1 location_id, address_1, address_2, city, state, zip, county, location_source_value -FROM @cdm_schema.location; -SELECT TOP 1 measurement_id, person_id, measurement_concept_id, measurement_date, measurement_time, measurement_datetime, measurement_type_concept_id, operator_concept_id, value_as_number, value_as_concept_id, unit_concept_id, range_low, range_high, provider_id, visit_occurrence_id, visit_detail_id, measurement_source_value, measurement_source_concept_id, unit_source_value, value_source_value -FROM @cdm_schema.measurement; -SELECT TOP 1 metadata_concept_id, metadata_type_concept_id, "name", value_as_string, value_as_concept_id, metadata_date, metadata_datetime -FROM @cdm_schema.metadata; -SELECT TOP 1 note_id, person_id, note_date, note_datetime, note_type_concept_id, note_class_concept_id, note_title, note_text, encoding_concept_id, language_concept_id, provider_id, visit_occurrence_id, visit_detail_id, note_source_value -FROM @cdm_schema.note; -SELECT TOP 1 note_nlp_id, note_id, section_concept_id, snippet, "offset", lexical_variant, note_nlp_concept_id, note_nlp_source_concept_id, nlp_system, nlp_date, nlp_datetime, term_exists, term_temporal, term_modifiers -FROM @cdm_schema.note_nlp; -SELECT TOP 1 observation_id, person_id, observation_concept_id, observation_date, observation_datetime, observation_type_concept_id, value_as_number, value_as_string, value_as_concept_id, qualifier_concept_id, unit_concept_id, provider_id, visit_occurrence_id, visit_detail_id, observation_source_value, observation_source_concept_id, unit_source_value, qualifier_source_value -FROM @cdm_schema.observation; -SELECT TOP 1 observation_period_id, person_id, observation_period_start_date, observation_period_end_date, period_type_concept_id -FROM @cdm_schema.observation_period; -SELECT TOP 1 payer_plan_period_id, person_id, payer_plan_period_start_date, payer_plan_period_end_date, payer_concept_id, payer_source_value, payer_source_concept_id, plan_concept_id, plan_source_value, plan_source_concept_id, sponsor_concept_id, sponsor_source_value, sponsor_source_concept_id, family_source_value, stop_reason_concept_id, stop_reason_source_value, stop_reason_source_concept_id -FROM @cdm_schema.payer_plan_period; -SELECT TOP 1 person_id, gender_concept_id, year_of_birth, month_of_birth, day_of_birth, birth_datetime, race_concept_id, ethnicity_concept_id, location_id, provider_id, care_site_id, person_source_value, gender_source_value, gender_source_concept_id, race_source_value, race_source_concept_id, ethnicity_source_value, ethnicity_source_concept_id -FROM @cdm_schema.person; -SELECT TOP 1 procedure_occurrence_id, person_id, procedure_concept_id, procedure_date, procedure_datetime, procedure_type_concept_id, modifier_concept_id, quantity, provider_id, visit_occurrence_id, visit_detail_id, procedure_source_value, procedure_source_concept_id, modifier_source_value -FROM @cdm_schema.procedure_occurrence; -SELECT TOP 1 provider_id, provider_name, npi, dea, specialty_concept_id, care_site_id, year_of_birth, gender_concept_id, provider_source_value, specialty_source_value, specialty_source_concept_id, gender_source_value, gender_source_concept_id -FROM @cdm_schema.provider; -SELECT TOP 1 relationship_id, relationship_name, is_hierarchical, defines_ancestry, reverse_relationship_id, relationship_concept_id -FROM @cdm_schema.relationship; -SELECT TOP 1 source_code, source_concept_id, source_vocabulary_id, source_code_description, target_concept_id, target_vocabulary_id, valid_start_date, valid_end_date, invalid_reason -FROM @cdm_schema.source_to_concept_map; -SELECT TOP 1 specimen_id, person_id, specimen_concept_id, specimen_type_concept_id, specimen_date, specimen_datetime, quantity, unit_concept_id, anatomic_site_concept_id, disease_status_concept_id, specimen_source_id, specimen_source_value, unit_source_value, anatomic_site_source_value, disease_status_source_value -FROM @cdm_schema.specimen; -SELECT TOP 1 visit_detail_id, person_id, visit_detail_concept_id, visit_start_date, visit_start_datetime, visit_end_date, visit_end_datetime, visit_type_concept_id, provider_id, care_site_id, admitting_source_concept_id, discharge_to_concept_id, preceding_visit_detail_id, visit_source_value, visit_source_concept_id, admitting_source_value, discharge_to_source_value, visit_detail_parent_id, visit_occurrence_id -FROM @cdm_schema.visit_detail; -SELECT TOP 1 visit_occurrence_id, person_id, visit_concept_id, visit_start_date, visit_start_datetime, visit_end_date, visit_end_datetime, visit_type_concept_id, provider_id, care_site_id, visit_source_value, visit_source_concept_id, admitting_source_concept_id, admitting_source_value, discharge_to_concept_id, discharge_to_source_value, preceding_visit_occurrence_id -FROM @cdm_schema.visit_occurrence; -SELECT TOP 1 vocabulary_id, vocabulary_name, vocabulary_reference, vocabulary_version, vocabulary_concept_id -FROM @cdm_schema.vocabulary; diff --git a/src/main/resources/cdm/v6/cdm_V6_0.json b/src/main/resources/cdm/v6/cdm_V6_0.json new file mode 100644 index 00000000..06e2b248 --- /dev/null +++ b/src/main/resources/cdm/v6/cdm_V6_0.json @@ -0,0 +1,39 @@ +{ + "attribute_definition" : [ "attribute_definition_id", "attribute_name", "attribute_description", "attribute_type_concept_id", "attribute_syntax" ], + "care_site" : [ "care_site_id", "care_site_name", "place_of_service_concept_id", "location_id", "care_site_source_value", "place_of_service_source_value" ], + "cdm_source" : [ "cdm_source_name", "cdm_source_abbreviation", "cdm_holder", "source_description", "source_documentation_reference", "cdm_etl_reference", "source_release_date", "cdm_release_date", "cdm_version", "vocabulary_version" ], + "concept" : [ "concept_id", "concept_name", "domain_id", "vocabulary_id", "concept_class_id", "standard_concept", "concept_code", "valid_start_date", "valid_end_date", "invalid_reason" ], + "concept_ancestor" : [ "ancestor_concept_id", "descendant_concept_id", "min_levels_of_separation", "max_levels_of_separation" ], + "concept_class" : [ "concept_class_id", "concept_class_name", "concept_class_concept_id" ], + "concept_relationship" : [ "concept_id_1", "concept_id_2", "relationship_id", "valid_start_date", "valid_end_date", "invalid_reason" ], + "concept_synonym" : [ "concept_id", "concept_synonym_name", "language_concept_id" ], + "condition_era" : [ "condition_era_id", "person_id", "condition_concept_id", "condition_era_start_datetime", "condition_era_end_datetime", "condition_occurrence_count" ], + "condition_occurrence" : [ "condition_occurrence_id", "person_id", "condition_concept_id", "condition_start_date", "condition_start_datetime", "condition_end_date", "condition_end_datetime", "condition_type_concept_id", "condition_status_concept_id", "stop_reason", "provider_id", "visit_occurrence_id", "visit_detail_id", "condition_source_value", "condition_source_concept_id", "condition_status_source_value" ], + "cost" : [ "cost_id", "person_id", "cost_event_id", "cost_event_field_concept_id", "cost_concept_id", "cost_type_concept_id", "currency_concept_id", "cost", "incurred_date", "billed_date", "paid_date", "revenue_code_concept_id", "drg_concept_id", "cost_source_value", "cost_source_concept_id", "revenue_code_source_value", "drg_source_value", "payer_plan_period_id" ], + "device_exposure" : [ "device_exposure_id", "person_id", "device_concept_id", "device_exposure_start_date", "device_exposure_start_datetime", "device_exposure_end_date", "device_exposure_end_datetime", "device_type_concept_id", "unique_device_id", "quantity", "provider_id", "visit_occurrence_id", "visit_detail_id", "device_source_value", "device_source_concept_id" ], + "domain" : [ "domain_id", "domain_name", "domain_concept_id" ], + "dose_era" : [ "dose_era_id", "person_id", "drug_concept_id", "unit_concept_id", "dose_value", "dose_era_start_datetime", "dose_era_end_datetime" ], + "drug_era" : [ "drug_era_id", "person_id", "drug_concept_id", "drug_era_start_datetime", "drug_era_end_datetime", "drug_exposure_count", "gap_days" ], + "drug_exposure" : [ "drug_exposure_id", "person_id", "drug_concept_id", "drug_exposure_start_date", "drug_exposure_start_datetime", "drug_exposure_end_date", "drug_exposure_end_datetime", "verbatim_end_date", "drug_type_concept_id", "stop_reason", "refills", "quantity", "days_supply", "sig", "route_concept_id", "lot_number", "provider_id", "visit_occurrence_id", "visit_detail_id", "drug_source_value", "drug_source_concept_id", "route_source_value", "dose_unit_source_value" ], + "drug_strength" : [ "drug_concept_id", "ingredient_concept_id", "amount_value", "amount_unit_concept_id", "numerator_value", "numerator_unit_concept_id", "denominator_value", "denominator_unit_concept_id", "box_size", "valid_start_date", "valid_end_date", "invalid_reason" ], + "fact_relationship" : [ "domain_concept_id_1", "fact_id_1", "domain_concept_id_2", "fact_id_2", "relationship_concept_id" ], + "location" : [ "location_id", "address_1", "address_2", "city", "state", "zip", "county", "country", "location_source_value", "latitude", "longitude" ], + "location_history" : [ "location_history_id", "location_id", "relationship_type_concept_id", "domain_id", "entity_id", "start_date", "end_date" ], + "measurement" : [ "measurement_id", "person_id", "measurement_concept_id", "measurement_date", "measurement_datetime", "measurement_time", "measurement_type_concept_id", "operator_concept_id", "value_as_number", "value_as_concept_id", "unit_concept_id", "range_low", "range_high", "provider_id", "visit_occurrence_id", "visit_detail_id", "measurement_source_value", "measurement_source_concept_id", "unit_source_value", "value_source_value" ], + "metadata" : [ "metadata_concept_id", "metadata_type_concept_id", "name", "value_as_string", "value_as_concept_id", "metadata_date", "metadata_datetime" ], + "note" : [ "note_id", "person_id", "note_event_id", "note_event_field_concept_id", "note_date", "note_datetime", "note_type_concept_id", "note_class_concept_id", "note_title", "note_text", "encoding_concept_id", "language_concept_id", "provider_id", "visit_occurrence_id", "visit_detail_id", "note_source_value" ], + "note_nlp" : [ "note_nlp_id", "note_id", "section_concept_id", "snippet", "offset", "lexical_variant", "note_nlp_concept_id", "nlp_system", "nlp_date", "nlp_datetime", "term_exists", "term_temporal", "term_modifiers", "note_nlp_source_concept_id" ], + "observation" : [ "observation_id", "person_id", "observation_concept_id", "observation_date", "observation_datetime", "observation_type_concept_id", "value_as_number", "value_as_string", "value_as_concept_id", "qualifier_concept_id", "unit_concept_id", "provider_id", "visit_occurrence_id", "visit_detail_id", "observation_source_value", "observation_source_concept_id", "unit_source_value", "qualifier_source_value", "observation_event_id", "obs_event_field_concept_id", "value_as_datetime" ], + "observation_period" : [ "observation_period_id", "person_id", "observation_period_start_date", "observation_period_end_date", "period_type_concept_id" ], + "payer_plan_period" : [ "payer_plan_period_id", "person_id", "contract_person_id", "payer_plan_period_start_date", "payer_plan_period_end_date", "payer_concept_id", "plan_concept_id", "contract_concept_id", "sponsor_concept_id", "stop_reason_concept_id", "payer_source_value", "payer_source_concept_id", "plan_source_value", "plan_source_concept_id", "contract_source_value", "contract_source_concept_id", "sponsor_source_value", "sponsor_source_concept_id", "family_source_value", "stop_reason_source_value", "stop_reason_source_concept_id" ], + "person" : [ "person_id", "gender_concept_id", "year_of_birth", "month_of_birth", "day_of_birth", "birth_datetime", "death_datetime", "race_concept_id", "ethnicity_concept_id", "location_id", "provider_id", "care_site_id", "person_source_value", "gender_source_value", "gender_source_concept_id", "race_source_value", "race_source_concept_id", "ethnicity_source_value", "ethnicity_source_concept_id" ], + "procedure_occurrence" : [ "procedure_occurrence_id", "person_id", "procedure_concept_id", "procedure_date", "procedure_datetime", "procedure_type_concept_id", "modifier_concept_id", "quantity", "provider_id", "visit_occurrence_id", "visit_detail_id", "procedure_source_value", "procedure_source_concept_id", "modifier_source_value" ], + "provider" : [ "provider_id", "provider_name", "npi", "dea", "specialty_concept_id", "care_site_id", "year_of_birth", "gender_concept_id", "provider_source_value", "specialty_source_value", "specialty_source_concept_id", "gender_source_value", "gender_source_concept_id" ], + "relationship" : [ "relationship_id", "relationship_name", "is_hierarchical", "defines_ancestry", "reverse_relationship_id", "relationship_concept_id" ], + "source_to_concept_map" : [ "source_code", "source_concept_id", "source_vocabulary_id", "source_code_description", "target_concept_id", "target_vocabulary_id", "valid_start_date", "valid_end_date", "invalid_reason" ], + "specimen" : [ "specimen_id", "person_id", "specimen_concept_id", "specimen_type_concept_id", "specimen_date", "specimen_datetime", "quantity", "unit_concept_id", "anatomic_site_concept_id", "disease_status_concept_id", "specimen_source_id", "specimen_source_value", "unit_source_value", "anatomic_site_source_value", "disease_status_source_value" ], + "survey_conduct" : [ "survey_conduct_id", "person_id", "survey_concept_id", "survey_start_date", "survey_start_datetime", "survey_end_date", "survey_end_datetime", "provider_id", "assisted_concept_id", "respondent_type_concept_id", "timing_concept_id", "collection_method_concept_id", "assisted_source_value", "respondent_type_source_value", "timing_source_value", "collection_method_source_value", "survey_source_value", "survey_source_concept_id", "survey_source_identifier", "validated_survey_concept_id", "validated_survey_source_value", "survey_version_number", "visit_occurrence_id", "visit_detail_id", "response_visit_occurrence_id" ], + "visit_detail" : [ "visit_detail_id", "person_id", "visit_detail_concept_id", "visit_detail_start_date", "visit_detail_start_datetime", "visit_detail_end_date", "visit_detail_end_datetime", "visit_detail_type_concept_id", "provider_id", "care_site_id", "discharge_to_concept_id", "admitted_from_concept_id", "admitted_from_source_value", "visit_detail_source_value", "visit_detail_source_concept_id", "discharge_to_source_value", "preceding_visit_detail_id", "visit_detail_parent_id", "visit_occurrence_id" ], + "visit_occurrence" : [ "visit_occurrence_id", "person_id", "visit_concept_id", "visit_start_date", "visit_start_datetime", "visit_end_date", "visit_end_datetime", "visit_type_concept_id", "provider_id", "care_site_id", "visit_source_value", "visit_source_concept_id", "admitted_from_concept_id", "admitted_from_source_value", "discharge_to_source_value", "discharge_to_concept_id", "preceding_visit_occurrence_id" ], + "vocabulary" : [ "vocabulary_id", "vocabulary_name", "vocabulary_reference", "vocabulary_version", "vocabulary_concept_id" ] +} \ No newline at end of file