Skip to content

Commit

Permalink
Adds support for Apache Hive LLAP (#1418)
Browse files Browse the repository at this point in the history
Fixes: #1168 Adds support for Apache Hive LLAP
  • Loading branch information
wivern authored Feb 10, 2020
1 parent 88c203b commit 955ed3b
Show file tree
Hide file tree
Showing 18 changed files with 407 additions and 30 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,6 @@ It was chosen to use embedded PG instead of H2 for unit tests since H2 doesn't s
- Only Non-SNAPSHOT dependencies should be presented in POM.xml on release branches/tags.

## License
OHDSI WebAPI is licensed under Apache License 2.0
OHDSI WebAPI is licensed under Apache License 2.0


104 changes: 100 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@
<server.ssl.key-store-password></server.ssl.key-store-password>
<server.ssl.key-password></server.ssl.key-password>

<arachne.version>1.15.0-SNAPSHOT</arachne.version>
<arachne.version>1.16.0-SNAPSHOT</arachne.version>
<jersey-media-multipart.version>2.25.1</jersey-media-multipart.version>
<execution.invalidation.period>600000</execution.invalidation.period>
<execution.invalidation.maxage.hours>12</execution.invalidation.maxage.hours>
Expand Down Expand Up @@ -192,13 +192,15 @@
<cdm.result.cache.warming.enable>true</cdm.result.cache.warming.enable>
<cache.generation.invalidAfterDays>7</cache.generation.invalidAfterDays>
<cache.generation.cleanupInterval>60000</cache.generation.cleanupInterval>
<SqlRender.version>1.6.3-SNAPSHOT</SqlRender.version>
<hive-jdbc.version>3.1.2</hive-jdbc.version>
</properties>
<build>
<finalName>WebAPI</finalName>
<sourceDirectory>${basedir}/src/main/java</sourceDirectory>
<testSourceDirectory>${basedir}/src/test/java</testSourceDirectory>
<outputDirectory>${basedir}/target/classes</outputDirectory>
<testOutputDirectory>${basedir}/target/test-classes</testOutputDirectory>
<testOutputDirectory>${basedir}/target/test-classes</testOutputDirectory>
<resources>
<resource>
<directory>src/main/resources</directory>
Expand Down Expand Up @@ -276,7 +278,7 @@
<compilerArgs>
<!-- Prevents recompilation due to missing package-info.class, see MCOMPILER-205 -->
<arg>-Xpkginfo:always</arg>
</compilerArgs>
</compilerArgs>
</configuration>
</plugin>
<plugin>
Expand Down Expand Up @@ -507,7 +509,7 @@
<dependency>
<groupId>org.ohdsi.sql</groupId>
<artifactId>SqlRender</artifactId>
<version>1.6.3-SNAPSHOT</version>
<version>${SqlRender.version}</version>
</dependency>
<dependency>
<groupId>commons-dbutils</groupId>
Expand Down Expand Up @@ -781,6 +783,10 @@
<groupId>org.ohdsi</groupId>
<artifactId>featureExtraction</artifactId>
</exclusion>
<exclusion>
<groupId>org.ohdsi.sql</groupId>
<artifactId>SqlRender</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
Expand Down Expand Up @@ -1118,5 +1124,95 @@
</plugins>
</build>
</profile>
<profile>
<id>webapi-hive</id>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive-jdbc.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.tomcat</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>org.eclipse.jetty</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
<exclusion>
<artifactId>log4j</artifactId>
<groupId>log4j</groupId>
</exclusion>
<exclusion>
<artifactId>servlet-api</artifactId>
<groupId>javax.servlet</groupId>
</exclusion>
<exclusion>
<groupId>com.zaxxer</groupId>
<artifactId>HikariCP</artifactId>
</exclusion>
<exclusion>
<groupId>com.zaxxer</groupId>
<artifactId>HikariCP-java7</artifactId>
</exclusion>
<exclusion>
<groupId>javax.ws.rs</groupId>
<artifactId>javax.ws.rs-api</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-server</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-client</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-json</artifactId>
</exclusion>
<exclusion>
<groupId>com.tdunning</groupId>
<artifactId>json</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-server</artifactId>
<version>1.19.4</version>
</dependency>
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-core</artifactId>
<version>1.19.4</version>
<exclusions>
<exclusion>
<groupId>javax.ws.rs</groupId>
<artifactId>jsr311-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-client</artifactId>
<version>1.19.4</version>
</dependency>
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-json</artifactId>
<version>1.19.4</version>
</dependency>
</dependencies>
</profile>
</profiles>
</project>
2 changes: 1 addition & 1 deletion src/main/extras/bigquery/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
To build WebAPI with Google BigQuery support do the following:
1. Go to https://cloud.google.com/bigquery/partners/simba-drivers/ and
2. Download the JDBC 4.2 driver
3. Unpack archive and and set the bigquery.classpath property in your settings.xml to the unpacked archive location (ie: C://downloads/bigquery) inside the webapi-bigquery profile.
3. Unpack archive and set the bigquery.classpath property in your settings.xml to the unpacked archive location (ie: C://downloads/bigquery) inside the webapi-bigquery profile.
4. Build WebAPI with webapi-bigquery profile.
* mvn -Pwebapi-postgresql,webapi-bigquery clean package
4 changes: 2 additions & 2 deletions src/main/extras/impala/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
To build WebAPI with Impala support do the following:
1. Go to the https://www.cloudera.com/downloads/connectors/impala/jdbc/2-6-15.html
2. Register to clouder if you did not registered earlier or sign in to your Cloudera account
2. Register to Cloudera if you did not registered earlier or sign in to your Cloudera account
3. Download the latest Impala JDBC drivers
4. Unpack archive and and set the impala.classpath property in your settings.xml to the unpacked archive location (ie: C://downloads/impalaJDBC) inside the webapi-impala profile.
4. Unpack archive and set the impala.classpath property in your settings.xml to the unpacked archive location (ie: C://downloads/impalaJDBC) inside the webapi-impala profile.
5. Build WebAPI with webapi-impala profile.
* mvn -Pwebapi-postgresql,webapi-impala clean package
2 changes: 1 addition & 1 deletion src/main/java/org/ohdsi/webapi/DataAccessConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ public DataSource primaryDataSource() {
//note autocommit defaults vary across vendors. use provided @Autowired TransactionTemplate

String[] supportedDrivers;
supportedDrivers = new String[]{"org.postgresql.Driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver", "oracle.jdbc.driver.OracleDriver", "com.amazon.redshift.jdbc.Driver", "com.cloudera.impala.jdbc.Driver", "net.starschema.clouddb.jdbc.BQDriver", "org.netezza.Driver", "com.simba.googlebigquery.jdbc42.Driver"};
supportedDrivers = new String[]{"org.postgresql.Driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver", "oracle.jdbc.driver.OracleDriver", "com.amazon.redshift.jdbc.Driver", "com.cloudera.impala.jdbc.Driver", "net.starschema.clouddb.jdbc.BQDriver", "org.netezza.Driver", "com.simba.googlebigquery.jdbc42.Driver", "org.apache.hive.jdbc.HiveDriver"};
for (String driverName : supportedDrivers) {
try {
Class.forName(driverName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.ohdsi.circe.helper.ResourceHelper;
import org.ohdsi.featureExtraction.FeatureExtraction;
import org.ohdsi.hydra.Hydra;
import org.ohdsi.sql.SqlSplit;
import org.ohdsi.sql.SqlTranslate;
import org.ohdsi.webapi.JobInvalidator;
import org.ohdsi.webapi.cohortcharacterization.converter.SerializedCcToCcConverter;
Expand Down Expand Up @@ -825,6 +826,10 @@ public List<CcPrevalenceStat> getPrevalenceStatsByGenerationId(Long id, Long ana
String prevalenceStats = sourceAwareSqlRender.renderSql(source.getSourceId(), QUERY_PREVALENCE_STATS, PREVALENCE_STATS_PARAMS,
new String[]{ cdmSchema, resultSchema, String.valueOf(id), String.valueOf(analysisId), String.valueOf(cohortId), String.valueOf(covariateId) });
String translatedSql = SqlTranslate.translateSql(prevalenceStats, source.getSourceDialect(), SessionUtils.sessionId(), tempSchema);
String[] stmts = SqlSplit.splitSql(translatedSql);
if (stmts.length == 1) { // Some DBMS like HIVE fails when a single statement ends with dot-comma
translatedSql = StringUtils.removeEnd(translatedSql.trim(), ";");
}
return getSourceJdbcTemplate(source).query(translatedSql, (rs, rowNum) -> {
CcPrevalenceStat stat = new CcPrevalenceStat();
stat.setAvg(rs.getDouble("stat_value"));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package org.ohdsi.webapi.cohortcharacterization;

import com.odysseusinc.arachne.commons.types.DBMSType;
import java.util.Objects;
import org.apache.commons.lang3.StringUtils;
import org.ohdsi.circe.helper.ResourceHelper;
import org.ohdsi.sql.SqlTranslate;
import org.ohdsi.webapi.source.SourceService;
Expand Down Expand Up @@ -44,8 +47,12 @@ private Object doTask(JobParameters parameters) {
final Source source = sourceService.findBySourceId(sourceId);
final String resultsQualifier = SourceUtils.getResultsQualifier(source);
final String tempQualifier = SourceUtils.getTempQualifier(source, resultsQualifier);
jdbcTemplate.execute(SqlTranslate.translateSql(sql, source.getSourceDialect(), null, tempQualifier));
String toRemove = SqlTranslate.translateSql(sql, source.getSourceDialect(), null, tempQualifier);

if (Objects.equals(DBMSType.HIVE.getOhdsiDB(), source.getSourceDialect())){
toRemove = StringUtils.remove(toRemove, ';');
}
jdbcTemplate.execute(toRemove);
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ protected String[] prepareQueries(ChunkContext chunkContext, CancelableJdbcTempl
*
* Therefore, there are two ways:
* - either precisely group SQLs into statements so that temp tables aren't re-used in a single statement,
* - or use ‘permenant temporary tables’
* - or use ‘permanent temporary tables’
*
* The second option looks better since such SQL could be exported and executed manually,
* which is not the case with the first option.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,15 @@ public SimpleJobBuilder buildJobForExecutionEngineBasedAnalysisTasklet(String an
Step waitCallbackStep = stepBuilderFactory.get(analysisTypeName + ".waitForCallback")
.tasklet(callbackTasklet)
.build();

DropCohortTableListener dropCohortTableListener = new DropCohortTableListener(getSourceJdbcTemplate(source),
transactionTemplate, sourceService, sourceAwareSqlRender);

return jobBuilders.get(analysisTypeName)
.start(createAnalysisExecutionStep)
.next(runExecutionStep)
.next(waitCallbackStep)
.listener(dropCohortTableListener)
.listener(new AutoremoveJobListener(jobService));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.cosium.spring.data.jpa.entity.graph.domain.EntityGraph;
import com.google.common.base.MoreObjects;
import com.odysseusinc.arachne.commons.types.DBMSType;
import org.hibernate.Hibernate;
import org.ohdsi.circe.helper.ResourceHelper;
import org.ohdsi.sql.SqlRender;
Expand Down Expand Up @@ -325,7 +326,8 @@ public String buildAnalysisSql(Long generationId, PathwayAnalysisEntity pathwayA
"pathway_target_cohort_id",
"max_depth",
"combo_window",
"allow_repeats"
"allow_repeats",
"isHive"
};
String[] values = new String[]{
generationId.toString(),
Expand All @@ -336,7 +338,8 @@ public String buildAnalysisSql(Long generationId, PathwayAnalysisEntity pathwayA
tc.getCohortDefinition().getId().toString(),
pathwayAnalysis.getMaxDepth().toString(),
MoreObjects.firstNonNull(pathwayAnalysis.getCombinationWindow(), 1).toString(),
String.valueOf(pathwayAnalysis.isAllowRepeats())
String.valueOf(pathwayAnalysis.isAllowRepeats()),
String.valueOf(Objects.equals(DBMSType.HIVE.getOhdsiDB(), source.getSourceDialect()))
};

String renderedSql = SqlRender.renderSql(analysisSql, params, values);
Expand Down
21 changes: 18 additions & 3 deletions src/main/java/org/ohdsi/webapi/service/DDLService.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@

import static org.ohdsi.webapi.service.SqlRenderService.translateSQL;

import com.odysseusinc.arachne.commons.types.DBMSType;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import javax.ws.rs.DefaultValue;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
Expand Down Expand Up @@ -89,9 +91,14 @@ public class DDLService {
"/ddl/results/pathway_analysis_stats.sql"
);

private static final String INIT_HERACLES_PERIODS = "/ddl/results/init_heracles_periods.sql";

public static final Collection<String> RESULT_INIT_FILE_PATHS = Arrays.asList(
"/ddl/results/init_heracles_analysis.sql",
"/ddl/results/init_heracles_periods.sql"
"/ddl/results/init_heracles_analysis.sql", INIT_HERACLES_PERIODS
);

public static final Collection<String> HIVE_RESULT_INIT_FILE_PATHS = Arrays.asList(
"/ddl/results/init_hive_heracles_analysis.sql", INIT_HERACLES_PERIODS
);

public static final Collection<String> INIT_CONCEPT_HIERARCHY_FILE_PATHS = Arrays.asList(
Expand Down Expand Up @@ -136,7 +143,15 @@ public String generateResultSQL(
put(TEMP_SCHEMA, oracleTempSchema);
}};

return generateSQL(dialect, params, resultDDLFilePaths, RESULT_INIT_FILE_PATHS, RESULT_INDEX_FILE_PATHS);
return generateSQL(dialect, params, resultDDLFilePaths, getResultInitFilePaths(dialect), RESULT_INDEX_FILE_PATHS);
}

private Collection<String> getResultInitFilePaths(String dialect) {
if (Objects.equals(DBMSType.HIVE.getOhdsiDB(), dialect)) {
return HIVE_RESULT_INIT_FILE_PATHS;
} else {
return RESULT_INIT_FILE_PATHS;
}
}

@GET
Expand Down
10 changes: 6 additions & 4 deletions src/main/java/org/ohdsi/webapi/source/Source.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,14 @@ public String getTableQualifier(DaimonType daimonType) {
}

public String getTableQualifierOrNull(DaimonType daimonType) {
for (SourceDaimon sourceDaimon : this.getDaimons()) {
if (sourceDaimon.getDaimonType() == daimonType) {
return sourceDaimon.getTableQualifier();
if (this.getDaimons() != null){
for (SourceDaimon sourceDaimon : this.getDaimons()) {
if (sourceDaimon.getDaimonType() == daimonType) {
return sourceDaimon.getTableQualifier();
}
}
}
return null;
return null;
}

public String getSourceKey() {
Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/ddl/results/heracles_results.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
--HINT PARTITION(cohort_definition_id int)
--HINT BUCKET(analysis_id, 64)
IF OBJECT_ID('@results_schema.heracles_results', 'U') IS NULL
create table @results_schema.heracles_results
(
Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/ddl/results/heracles_results_dist.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
--HINT PARTITION(cohort_definition_id int)
--HINT BUCKET(analysis_id, 64)
IF OBJECT_ID('@results_schema.heracles_results_dist', 'U') IS NULL
create table @results_schema.heracles_results_dist
(
Expand Down
Loading

0 comments on commit 955ed3b

Please sign in to comment.