Skip to content

Commit

Permalink
[Coral-Trino] Migrate FROM_UNIXTIME and FROM_UTC_TIMESTAMP (#426)
Browse files Browse the repository at this point in the history
* initial commit for timestamp op migrations

* refactored transformations

* rename SqlShuttle class

* enable test and rename var
  • Loading branch information
aastha25 authored Jun 27, 2023
1 parent 6a5d8f2 commit e808370
Show file tree
Hide file tree
Showing 8 changed files with 241 additions and 99 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ protected SqlRexConvertletTable getConvertletTable() {
}

@Override
protected SqlValidator getSqlValidator() {
public SqlValidator getSqlValidator() {
return sqlValidator;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
*/
package com.linkedin.coral.trino.rel2trino;

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
Expand All @@ -28,7 +27,6 @@
import org.apache.calcite.rel.logical.LogicalSort;
import org.apache.calcite.rel.logical.LogicalUnion;
import org.apache.calcite.rel.logical.LogicalValues;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
Expand All @@ -42,12 +40,10 @@
import org.apache.calcite.sql.validate.SqlUserDefinedFunction;

import com.linkedin.coral.com.google.common.collect.ImmutableList;
import com.linkedin.coral.common.functions.FunctionReturnTypes;
import com.linkedin.coral.common.functions.GenericProjectFunction;
import com.linkedin.coral.trino.rel2trino.functions.GenericProjectToTrinoConverter;

import static com.linkedin.coral.trino.rel2trino.CoralTrinoConfigKeys.*;
import static org.apache.calcite.sql.fun.SqlStdOperatorTable.MULTIPLY;
import static org.apache.calcite.sql.type.ReturnTypes.explicit;
import static org.apache.calcite.sql.type.SqlTypeName.*;

Expand Down Expand Up @@ -175,20 +171,6 @@ public RexNode visitCall(RexCall call) {

final String operatorName = call.getOperator().getName();

if (operatorName.equalsIgnoreCase("from_utc_timestamp")) {
Optional<RexNode> modifiedCall = visitFromUtcTimestampCall(call);
if (modifiedCall.isPresent()) {
return modifiedCall.get();
}
}

if (operatorName.equalsIgnoreCase("from_unixtime")) {
Optional<RexNode> modifiedCall = visitFromUnixtime(call);
if (modifiedCall.isPresent()) {
return modifiedCall.get();
}
}

if (operatorName.equalsIgnoreCase("cast")) {
Optional<RexNode> modifiedCall = visitCast(call);
if (modifiedCall.isPresent()) {
Expand Down Expand Up @@ -231,80 +213,6 @@ private Optional<RexNode> visitConcat(RexCall call) {
return Optional.of(rexBuilder.makeCall(op, castOperands));
}

private Optional<RexNode> visitFromUnixtime(RexCall call) {
List<RexNode> convertedOperands = visitList(call.getOperands(), (boolean[]) null);
SqlOperator formatDatetime = createSqlOperatorOfFunction("format_datetime", FunctionReturnTypes.STRING);
SqlOperator fromUnixtime = createSqlOperatorOfFunction("from_unixtime", explicit(TIMESTAMP));
if (convertedOperands.size() == 1) {
return Optional
.of(rexBuilder.makeCall(formatDatetime, rexBuilder.makeCall(fromUnixtime, call.getOperands().get(0)),
rexBuilder.makeLiteral("yyyy-MM-dd HH:mm:ss")));
} else if (convertedOperands.size() == 2) {
return Optional.of(rexBuilder.makeCall(formatDatetime,
rexBuilder.makeCall(fromUnixtime, call.getOperands().get(0)), call.getOperands().get(1)));
}
return Optional.empty();
}

private Optional<RexNode> visitFromUtcTimestampCall(RexCall call) {
RelDataType inputType = call.getOperands().get(0).getType();
// TODO(trinodb/trino#6295) support high-precision timestamp
RelDataType targetType = typeFactory.createSqlType(TIMESTAMP, 3);

List<RexNode> convertedOperands = visitList(call.getOperands(), (boolean[]) null);
RexNode sourceValue = convertedOperands.get(0);
RexNode timezone = convertedOperands.get(1);

// In below definitions we should use `TIMESTATMP WITH TIME ZONE`. As calcite is lacking
// this type we use `TIMESTAMP` instead. It does not have any practical implications as result syntax tree
// is not type-checked, and only used for generating output SQL for a view query.
SqlOperator trinoAtTimeZone =
createSqlOperatorOfFunction("at_timezone", explicit(TIMESTAMP /* should be WITH TIME ZONE */));
SqlOperator trinoWithTimeZone =
createSqlOperatorOfFunction("with_timezone", explicit(TIMESTAMP /* should be WITH TIME ZONE */));
SqlOperator trinoToUnixTime = createSqlOperatorOfFunction("to_unixtime", explicit(DOUBLE));
SqlOperator trinoFromUnixtimeNanos =
createSqlOperatorOfFunction("from_unixtime_nanos", explicit(TIMESTAMP /* should be WITH TIME ZONE */));
SqlOperator trinoFromUnixTime =
createSqlOperatorOfFunction("from_unixtime", explicit(TIMESTAMP /* should be WITH TIME ZONE */));
SqlOperator trinoCanonicalizeHiveTimezoneId =
createSqlOperatorOfFunction("$canonicalize_hive_timezone_id", explicit(VARCHAR));

RelDataType bigintType = typeFactory.createSqlType(BIGINT);
RelDataType doubleType = typeFactory.createSqlType(DOUBLE);

if (inputType.getSqlTypeName() == BIGINT || inputType.getSqlTypeName() == INTEGER
|| inputType.getSqlTypeName() == SMALLINT || inputType.getSqlTypeName() == TINYINT) {

return Optional.of(rexBuilder.makeCast(targetType,
rexBuilder.makeCall(trinoAtTimeZone,
rexBuilder.makeCall(trinoFromUnixtimeNanos,
rexBuilder.makeCall(MULTIPLY, rexBuilder.makeCast(bigintType, sourceValue),
rexBuilder.makeBigintLiteral(BigDecimal.valueOf(1000000)))),
rexBuilder.makeCall(trinoCanonicalizeHiveTimezoneId, timezone))));
}

if (inputType.getSqlTypeName() == DOUBLE || inputType.getSqlTypeName() == FLOAT
|| inputType.getSqlTypeName() == DECIMAL) {

return Optional.of(rexBuilder.makeCast(targetType,
rexBuilder.makeCall(trinoAtTimeZone,
rexBuilder.makeCall(trinoFromUnixTime, rexBuilder.makeCast(doubleType, sourceValue)),
rexBuilder.makeCall(trinoCanonicalizeHiveTimezoneId, timezone))));
}

if (inputType.getSqlTypeName() == TIMESTAMP || inputType.getSqlTypeName() == DATE) {
return Optional.of(rexBuilder.makeCast(targetType,
rexBuilder.makeCall(trinoAtTimeZone,
rexBuilder.makeCall(trinoFromUnixTime,
rexBuilder.makeCall(trinoToUnixTime,
rexBuilder.makeCall(trinoWithTimeZone, sourceValue, rexBuilder.makeLiteral("UTC")))),
rexBuilder.makeCall(trinoCanonicalizeHiveTimezoneId, timezone))));
}

return Optional.empty();
}

// Hive allows passing in a byte array or String to substr/substring, so we can make an effort to emulate the
// behavior by casting non-String input to String
// https://cwiki.apache.org/confluence/display/hive/languagemanual+udf
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import com.linkedin.coral.trino.rel2trino.transformers.CollectListOrSetFunctionTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.CoralRegistryOperatorRenameSqlCallTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.CurrentTimestampTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.FromUnixtimeOperatorTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.GenericCoralRegistryOperatorRenameSqlCallTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.JoinSqlCallTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.MapValueConstructorTransformer;
Expand Down Expand Up @@ -104,7 +105,7 @@ protected SqlCall transform(SqlCall sqlCall) {
+ "{\"op\": \"date\", \"operands\":[{\"op\": \"timestamp\", \"operands\":[{\"input\": 1}]}]}]",
null, null),
new ToDateOperatorTransformer(configs.getOrDefault(AVOID_TRANSFORM_TO_DATE_UDF, false)),
new CurrentTimestampTransformer(),
new CurrentTimestampTransformer(), new FromUnixtimeOperatorTransformer(),

// LinkedIn specific functions
new CoralRegistryOperatorRenameSqlCallTransformer(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/**
* Copyright 2022-2023 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package com.linkedin.coral.trino.rel2trino;

import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.util.SqlShuttle;
import org.apache.calcite.sql.validate.SqlValidator;

import com.linkedin.coral.common.HiveMetastoreClient;
import com.linkedin.coral.common.transformers.SqlCallTransformers;
import com.linkedin.coral.common.utils.TypeDerivationUtil;
import com.linkedin.coral.hive.hive2rel.HiveToRelConverter;
import com.linkedin.coral.trino.rel2trino.transformers.FromUtcTimestampOperatorTransformer;


/**
* DataTypeDerivedSqlCallConverter transforms the sqlCalls
* in the input SqlNode representation to be compatible with Trino engine.
* The transformation may involve change in operator, reordering the operands
* or even re-constructing the SqlNode.
*
* All the transformations performed as part of this shuttle require RelDataType derivation.
*/
public class DataTypeDerivedSqlCallConverter extends SqlShuttle {
private final SqlCallTransformers operatorTransformerList;

public DataTypeDerivedSqlCallConverter(HiveMetastoreClient mscClient, SqlNode topSqlNode) {
SqlValidator sqlValidator = new HiveToRelConverter(mscClient).getSqlValidator();
TypeDerivationUtil typeDerivationUtil = new TypeDerivationUtil(sqlValidator, topSqlNode);
operatorTransformerList = SqlCallTransformers.of(new FromUtcTimestampOperatorTransformer(typeDerivationUtil));
}

@Override
public SqlNode visit(final SqlCall call) {
return operatorTransformerList.apply((SqlCall) super.visit(call));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@
import static com.google.common.base.Preconditions.*;
import static com.linkedin.coral.trino.rel2trino.Calcite2TrinoUDFConverter.convertRel;
import static com.linkedin.coral.trino.rel2trino.CoralTrinoConfigKeys.*;
import static org.apache.calcite.sql.fun.SqlStdOperatorTable.*;
import static org.apache.calcite.sql.parser.SqlParserPos.*;


public class RelToTrinoConverter extends RelToSqlConverter {
Expand Down Expand Up @@ -88,7 +86,12 @@ public RelToTrinoConverter(HiveMetastoreClient mscClient, Map<String, Boolean> c
public String convert(RelNode relNode) {
RelNode rel = convertRel(relNode, configs);
SqlNode sqlNode = convertToSqlNode(rel);
SqlNode sqlNodeWithUDFOperatorConverted = sqlNode.accept(new CoralToTrinoSqlCallConverter(configs));

SqlNode sqlNodeWithRelDataTypeDerivedConversions =
sqlNode.accept(new DataTypeDerivedSqlCallConverter(_hiveMetastoreClient, sqlNode));

SqlNode sqlNodeWithUDFOperatorConverted =
sqlNodeWithRelDataTypeDerivedConversions.accept(new CoralToTrinoSqlCallConverter(configs));
return sqlNodeWithUDFOperatorConverted.accept(new TrinoSqlRewriter()).toSqlString(TrinoSqlDialect.INSTANCE)
.toString();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/**
* Copyright 2023 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package com.linkedin.coral.trino.rel2trino.transformers;

import java.util.List;

import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlLiteral;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.parser.SqlParserPos;

import com.linkedin.coral.common.functions.FunctionReturnTypes;
import com.linkedin.coral.common.transformers.SqlCallTransformer;

import static org.apache.calcite.sql.type.ReturnTypes.*;
import static org.apache.calcite.sql.type.SqlTypeName.*;


/**
* This transformer operates on SqlCalls with 'FROM_UNIXTIME(x)' Coral IR function
* and transforms it to trino engine compatible function - FORMAT_DATETIME(FROM_UNIXTIME(x)).
* For Example:
* A SqlCall of the form: "FROM_UNIXTIME(10000)" is transformed to
* "FORMAT_DATETIME(FROM_UNIXTIME(10000), 'yyyy-MM-dd HH:mm:ss')"
*/
public class FromUnixtimeOperatorTransformer extends SqlCallTransformer {

private static final String FORMAT_DATETIME = "format_datetime";
private static final String FROM_UNIXTIME = "from_unixtime";

@Override
protected boolean condition(SqlCall sqlCall) {
return sqlCall.getOperator().getName().equalsIgnoreCase(FROM_UNIXTIME);
}

@Override
protected SqlCall transform(SqlCall sqlCall) {
SqlOperator formatDatetimeOperator = createSqlOperator(FORMAT_DATETIME, FunctionReturnTypes.STRING);
SqlOperator fromUnixtimeOperator = createSqlOperator(FROM_UNIXTIME, explicit(TIMESTAMP));

List<SqlNode> operands = sqlCall.getOperandList();
if (operands.size() == 1) {
return formatDatetimeOperator.createCall(SqlParserPos.ZERO,
fromUnixtimeOperator.createCall(SqlParserPos.ZERO, operands.get(0)),
SqlLiteral.createCharString("yyyy-MM-dd HH:mm:ss", SqlParserPos.ZERO));
} else if (operands.size() == 2) {
return formatDatetimeOperator.createCall(SqlParserPos.ZERO,
fromUnixtimeOperator.createCall(SqlParserPos.ZERO, operands.get(0)), operands.get(1));
}
return sqlCall;
}
}
Loading

0 comments on commit e808370

Please sign in to comment.