From c86d2eb62eeb999e3c40a05ab0e0e216eab5e8f5 Mon Sep 17 00:00:00 2001 From: Kevin Ge <46853779+KevinGe00@users.noreply.github.com> Date: Fri, 22 Sep 2023 11:57:02 -0400 Subject: [PATCH] [Coral-Service] Add rewriteType field to translation endpoint (#455) --- README.md | 10 +++-- .../controller/TranslationController.java | 44 +++++++++++-------- .../controller/VisualizationController.java | 7 +-- .../entity/TranslateRequestBody.java | 9 ++++ .../coral/coralservice/utils/CommonUtils.java | 14 ++++++ .../coralservice/utils/IncrementalUtils.java | 27 ++++++++++-- .../coral/coralservice/utils/RewriteType.java | 2 +- .../coralservice/utils/TranslationUtils.java | 26 +++++++++++ .../utils/VisualizationUtils.java | 8 +--- 9 files changed, 109 insertions(+), 38 deletions(-) create mode 100644 coral-service/src/main/java/com/linkedin/coral/coralservice/utils/CommonUtils.java diff --git a/README.md b/README.md index bc599a580..915390823 100644 --- a/README.md +++ b/README.md @@ -98,9 +98,10 @@ Please see the [Contribution Agreement](CONTRIBUTING.md). #### /api/translations/translate A **POST** API which takes JSON request body containing following parameters and returns the translated query: -- `fromLanguage`: Input dialect (e.g., spark, trino, hive -- see below for supported inputs) -- `toLanguage`: Output dialect (e.g., spark, trino, hive -- see below for supported outputs) +- `sourceLanguage`: Input dialect (e.g., spark, trino, hive -- see below for supported inputs) +- `targetLanguage`: Output dialect (e.g., spark, trino, hive -- see below for supported outputs) - `query`: SQL query to translate between two dialects +- [Optional] `rewriteType`: Type of Coral IR rewrite (e.g, incremental) #### /api/catalog-ops/execute A **POST** API which takes a SQL statement to create a database/table/view in the local metastore @@ -195,8 +196,8 @@ Creation successful curl --header "Content-Type: application/json" \ --request POST \ --data '{ - "fromLanguage":"hive", - "toLanguage":"trino", + "sourceLanguage":"hive", + "targetLanguage":"trino", "query":"SELECT * FROM db1.airport" }' \ http://localhost:8080/api/translations/translate @@ -216,3 +217,4 @@ FROM "db1"."airport" 2. Hive to Spark 3. Trino to Spark Note: During Trino to Spark translations, views referenced in queries are considered to be defined in HiveQL and hence cannot be used when translating a view from Trino. Currently, only referencing base tables is supported in Trino queries. This translation path is currently a POC and may need further improvements. +4. Spark to Trino diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/controller/TranslationController.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/controller/TranslationController.java index f754fc888..5415d895c 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/controller/TranslationController.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/controller/TranslationController.java @@ -25,7 +25,9 @@ import com.linkedin.coral.coralservice.entity.IncrementalRequestBody; import com.linkedin.coral.coralservice.entity.IncrementalResponseBody; import com.linkedin.coral.coralservice.entity.TranslateRequestBody; +import com.linkedin.coral.coralservice.utils.RewriteType; +import static com.linkedin.coral.coralservice.utils.CommonUtils.*; import static com.linkedin.coral.coralservice.utils.CoralProvider.*; import static com.linkedin.coral.coralservice.utils.IncrementalUtils.*; import static com.linkedin.coral.coralservice.utils.TranslationUtils.*; @@ -59,32 +61,35 @@ public ResponseEntity translate(@RequestBody TranslateRequestBody translateReque final String sourceLanguage = translateRequestBody.getSourceLanguage(); final String targetLanguage = translateRequestBody.getTargetLanguage(); final String query = translateRequestBody.getQuery(); + final RewriteType rewriteType = translateRequestBody.getRewriteType(); + // TODO: Allow translations between the same language if (sourceLanguage.equalsIgnoreCase(targetLanguage)) { return ResponseEntity.status(HttpStatus.BAD_REQUEST) .body("Please choose different languages to translate between.\n"); } + if (!isValidSourceLanguage(sourceLanguage)) { + return ResponseEntity.status(HttpStatus.BAD_REQUEST) + .body("Currently, only Hive, Trino and Spark are supported as source languages.\n"); + } + String translatedSql = null; try { - // TODO: add more translations once n-to-one-to-n is completed - // From Trino - if (sourceLanguage.equalsIgnoreCase("trino")) { - // To Spark - if (targetLanguage.equalsIgnoreCase("spark")) { - translatedSql = translateTrinoToSpark(query); - } - } - // From Hive - else if (sourceLanguage.equalsIgnoreCase("hive")) { - // To Spark - if (targetLanguage.equalsIgnoreCase("spark")) { - translatedSql = translateHiveToSpark(query); - } - // To Trino - else if (targetLanguage.equalsIgnoreCase("trino")) { - translatedSql = translateHiveToTrino(query); + if (rewriteType == null) { + // Invalid rewriteType values are deserialized as null + translatedSql = translateQuery(query, sourceLanguage, targetLanguage); + } else { + switch (rewriteType) { + case INCREMENTAL: + translatedSql = getIncrementalQuery(query, sourceLanguage, targetLanguage); + break; + case DATAMASKING: + case NONE: + default: + translatedSql = translateQuery(query, sourceLanguage, targetLanguage); + break; } } } catch (Throwable t) { @@ -110,14 +115,15 @@ public ResponseEntity getIncrementalInfo(@RequestBody IncrementalRequestBody inc throws JSONException { final String query = incrementalRequestBody.getQuery(); final List tableNames = incrementalRequestBody.getTableNames(); - final String language = incrementalRequestBody.getLanguage(); + final String language = incrementalRequestBody.getLanguage(); // source language // Response will contain incremental query and incremental table names IncrementalResponseBody incrementalResponseBody = new IncrementalResponseBody(); incrementalResponseBody.setIncrementalQuery(null); try { if (language.equalsIgnoreCase("spark")) { - String incrementalQuery = getSparkIncrementalQueryFromUserSql(query); + // TODO: rename language to sourceLanguage and add a targetLanguage field IncrementalRequestBody to use here + String incrementalQuery = getIncrementalQuery(query, language, "spark"); for (String tableName : tableNames) { /* Generate underscore delimited and incremental table names Table name: db.t1 diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/controller/VisualizationController.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/controller/VisualizationController.java index b69bc577a..ba5d95a5e 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/controller/VisualizationController.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/controller/VisualizationController.java @@ -28,6 +28,7 @@ import com.linkedin.coral.coralservice.utils.RewriteType; import com.linkedin.coral.coralservice.utils.VisualizationUtils; +import static com.linkedin.coral.coralservice.utils.CommonUtils.*; import static com.linkedin.coral.coralservice.utils.VisualizationUtils.*; @@ -43,9 +44,9 @@ public ResponseEntity getIRVisualizations(@RequestBody VisualizationRequestBody final String query = visualizationRequestBody.getQuery(); final RewriteType rewriteType = visualizationRequestBody.getRewriteType(); - if (!visualizationUtils.isValidSourceLanguage(sourceLanguage)) { - return ResponseEntity.status(HttpStatus.BAD_REQUEST) - .body("Currently, only Hive, Spark, and Trino are supported as engines to generate graphs using.\n"); + if (!isValidSourceLanguage(sourceLanguage)) { + return ResponseEntity.status(HttpStatus.BAD_REQUEST).body( + "Currently, only Hive, Spark, and Trino SQL are supported as source languages for Coral IR visualization. \n"); } // A list of UUIDs in this order of: diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/entity/TranslateRequestBody.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/entity/TranslateRequestBody.java index c02ab51e1..37bad37f9 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/entity/TranslateRequestBody.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/entity/TranslateRequestBody.java @@ -5,11 +5,16 @@ */ package com.linkedin.coral.coralservice.entity; +import com.linkedin.coral.coralservice.utils.RewriteType; + + public class TranslateRequestBody { private String sourceLanguage; private String targetLanguage; private String query; + private RewriteType rewriteType; + public String getSourceLanguage() { return sourceLanguage; } @@ -21,4 +26,8 @@ public String getTargetLanguage() { public String getQuery() { return query; } + + public RewriteType getRewriteType() { + return rewriteType; + } } diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/CommonUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/CommonUtils.java new file mode 100644 index 000000000..88dd1339a --- /dev/null +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/CommonUtils.java @@ -0,0 +1,14 @@ +/** + * Copyright 2023 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.coralservice.utils; + +public class CommonUtils { + + public static boolean isValidSourceLanguage(String sourceLanguage) { + return sourceLanguage.equalsIgnoreCase("trino") || sourceLanguage.equalsIgnoreCase("hive") + || sourceLanguage.equalsIgnoreCase("spark"); + } +} diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java index a54a125ce..3ee6687f7 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/IncrementalUtils.java @@ -10,17 +10,36 @@ import com.linkedin.coral.hive.hive2rel.HiveToRelConverter; import com.linkedin.coral.incremental.RelNodeIncrementalTransformer; import com.linkedin.coral.spark.CoralSpark; +import com.linkedin.coral.trino.rel2trino.RelToTrinoConverter; +import com.linkedin.coral.trino.trino2rel.TrinoToRelConverter; import static com.linkedin.coral.coralservice.utils.CoralProvider.*; public class IncrementalUtils { + public static String getIncrementalQuery(String query, String sourceLanguage, String targetLanguage) { + RelNode originalNode; + + switch (sourceLanguage.toLowerCase()) { + case "trino": + originalNode = new TrinoToRelConverter(hiveMetastoreClient).convertSql(query); + break; + case "hive": + default: + originalNode = new HiveToRelConverter(hiveMetastoreClient).convertSql(query); + break; + } - public static String getSparkIncrementalQueryFromUserSql(String query) { - RelNode originalNode = new HiveToRelConverter(hiveMetastoreClient).convertSql(query); RelNode incrementalRelNode = RelNodeIncrementalTransformer.convertRelIncremental(originalNode); - CoralSpark coralSpark = CoralSpark.create(incrementalRelNode, hiveMetastoreClient); - return coralSpark.getSparkSql(); + + switch (targetLanguage.toLowerCase()) { + case "trino": + default: + return new RelToTrinoConverter(hiveMetastoreClient).convert(incrementalRelNode); + case "spark": + CoralSpark coralSpark = CoralSpark.create(incrementalRelNode, hiveMetastoreClient); + return coralSpark.getSparkSql(); + } } } diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/RewriteType.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/RewriteType.java index 5baaf9f1a..6f5ee3710 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/RewriteType.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/RewriteType.java @@ -25,7 +25,7 @@ public String toString() { } @JsonCreator - public static RewriteType getDepartmentFromCode(String value) { + public static RewriteType getRewriteTypeFromCode(String value) { for (RewriteType type : RewriteType.values()) { if (type.toString().equals(value)) { return type; diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java index 67dead01f..d0987cd39 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/TranslationUtils.java @@ -33,4 +33,30 @@ public static String translateHiveToSpark(String query) { CoralSpark coralSpark = CoralSpark.create(relNode, hiveMetastoreClient); return coralSpark.getSparkSql(); } + + public static String translateQuery(String query, String sourceLanguage, String targetLanguage) { + String translatedSql = null; + + // TODO: add more translations once n-to-one-to-n is completed + // From Trino + if (sourceLanguage.equalsIgnoreCase("trino")) { + // To Spark + if (targetLanguage.equalsIgnoreCase("spark")) { + translatedSql = translateTrinoToSpark(query); + } + } + // From Hive or Spark + else if (sourceLanguage.equalsIgnoreCase("hive") || sourceLanguage.equalsIgnoreCase("spark")) { + // To Spark + if (targetLanguage.equalsIgnoreCase("spark")) { + translatedSql = translateHiveToSpark(query); + } + // To Trino + else if (targetLanguage.equalsIgnoreCase("trino")) { + translatedSql = translateHiveToTrino(query); + } + } + + return translatedSql; + } } diff --git a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/VisualizationUtils.java b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/VisualizationUtils.java index 124fccff1..f9a52c540 100644 --- a/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/VisualizationUtils.java +++ b/coral-service/src/main/java/com/linkedin/coral/coralservice/utils/VisualizationUtils.java @@ -27,10 +27,6 @@ public static File getImageDir() { return new File(System.getProperty("java.io.tmpdir") + "/images" + UUID.randomUUID()); } - public boolean isValidSourceLanguage(String sourceLanguage) { - return sourceLanguage.equalsIgnoreCase("trino") || sourceLanguage.equalsIgnoreCase("hive"); - } - public ArrayList generateIRVisualizations(String query, String sourceLanguage, File imageDir, RewriteType rewriteType) { ArrayList imageIDList = new ArrayList<>(); @@ -89,10 +85,8 @@ private RelNode getRelNode(String query, String sourceLanguage) { RelNode relNode = null; if (sourceLanguage.equalsIgnoreCase("trino")) { relNode = new TrinoToRelConverter(hiveMetastoreClient).convertSql(query); - } else if (sourceLanguage.equalsIgnoreCase("hive")) { + } else if (sourceLanguage.equalsIgnoreCase("hive") || sourceLanguage.equalsIgnoreCase("spark")) { relNode = new HiveToRelConverter(hiveMetastoreClient).convertSql(query); - } else if (sourceLanguage.equalsIgnoreCase("spark")) { - } return relNode;