For example: * - * {{{ - * val df2 = df.alias("A") - * df2.select(df2.col("A.num")) - * }}} + *
{{{ val df2 = df.alias("A") df2.select(df2.col("A.num")) }}} * - * @group basic * @since 1.10.0 * @param alias The alias name of the dataframe * @return a [[DataFrame]] diff --git a/src/main/java/com/snowflake/snowpark_java/DataFrameWriter.java b/src/main/java/com/snowflake/snowpark_java/DataFrameWriter.java index 4978e5a6..fb48678a 100644 --- a/src/main/java/com/snowflake/snowpark_java/DataFrameWriter.java +++ b/src/main/java/com/snowflake/snowpark_java/DataFrameWriter.java @@ -61,7 +61,7 @@ public class DataFrameWriter { /** * Sets the specified option in the DataFrameWriter. * - *
Sets the specified option for saving data to a table * *
Use this method to configure options: * @@ -70,7 +70,7 @@ public class DataFrameWriter { * and the target table exists. * * - *
Use this method to configure options: * @@ -117,7 +117,7 @@ public DataFrameWriter option(String key, Object value) { /** * Sets multiple specified options in the DataFrameWriter. * - *
Sets the specified option for saving data to a table * *
Use this method to configure options: * @@ -126,7 +126,7 @@ public DataFrameWriter option(String key, Object value) { * and the target table exists. * * - *
Use this method to configure options: * diff --git a/src/main/scala/com/snowflake/snowpark/internal/ErrorMessage.scala b/src/main/scala/com/snowflake/snowpark/internal/ErrorMessage.scala index fbb7966c..a88ebb35 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/ErrorMessage.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/ErrorMessage.scala @@ -253,7 +253,8 @@ private[snowpark] object ErrorMessage { def DF_MORE_THAN_ONE_TF_IN_SELECT(): SnowparkClientException = createException("0131") - def DF_ALIAS_DUPLICATES(duplicatedAlias: scala.collection.Set[String]): SnowparkClientException = + def DF_ALIAS_DUPLICATES( + duplicatedAlias: scala.collection.Set[String]): SnowparkClientException = createException("0132", duplicatedAlias.mkString(", ")) /* diff --git a/src/main/scala/com/snowflake/snowpark/internal/Utils.scala b/src/main/scala/com/snowflake/snowpark/internal/Utils.scala index 92c8173d..c9d23feb 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/Utils.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/Utils.scala @@ -1,7 +1,12 @@ package com.snowflake.snowpark.internal import com.snowflake.snowpark.Column -import com.snowflake.snowpark.internal.analyzer.{Attribute, LogicalPlan, TableFunctionExpression, singleQuote} +import com.snowflake.snowpark.internal.analyzer.{ + Attribute, + LogicalPlan, + TableFunctionExpression, + singleQuote +} import java.io.{File, FileInputStream} import java.lang.invoke.SerializedLambda @@ -15,7 +20,7 @@ import scala.collection.mutable.ArrayBuffer import scala.util.Random object Utils extends Logging { - val Version: String = "1.10.0-SNAPSHOT" + val Version: String = "1.11.0-SNAPSHOT" // Package name of snowpark on server side val SnowparkPackageName = "com.snowflake:snowpark" val PackageNameDelimiter = ":" @@ -99,8 +104,9 @@ object Utils extends Logging { lastInternalLine + "\n" + stackTrace.take(stackDepth).mkString("\n") } - def addToDataframeAliasMap(result: Map[String, Seq[Attribute]], child: LogicalPlan) - : Map[String, Seq[Attribute]] = { + def addToDataframeAliasMap( + result: Map[String, Seq[Attribute]], + child: LogicalPlan): Map[String, Seq[Attribute]] = { if (child != null) { val map = child.dfAliasMap val duplicatedAlias = result.keySet.intersect(map.keySet) diff --git a/src/main/scala/com/snowflake/snowpark/internal/analyzer/Expression.scala b/src/main/scala/com/snowflake/snowpark/internal/analyzer/Expression.scala index adbddd38..5d11d554 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/analyzer/Expression.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/analyzer/Expression.scala @@ -391,7 +391,8 @@ private[snowpark] case class UnresolvedAttribute(override val name: String) } private[snowpark] case class UnresolvedDFAliasAttribute(override val name: String) - extends Expression with NamedExpression { + extends Expression + with NamedExpression { override def sql: String = "" override def children: Seq[Expression] = Seq.empty diff --git a/src/main/scala/com/snowflake/snowpark/internal/analyzer/ExpressionAnalyzer.scala b/src/main/scala/com/snowflake/snowpark/internal/analyzer/ExpressionAnalyzer.scala index 76bed5ef..826b4703 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/analyzer/ExpressionAnalyzer.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/analyzer/ExpressionAnalyzer.scala @@ -5,16 +5,19 @@ import com.snowflake.snowpark.internal.ErrorMessage import scala.collection.mutable.{Map => MMap} private[snowpark] object ExpressionAnalyzer { - def apply(aliasMap: Map[ExprId, String], - dfAliasMap: Map[String, Seq[Attribute]]): ExpressionAnalyzer = + def apply( + aliasMap: Map[ExprId, String], + dfAliasMap: Map[String, Seq[Attribute]]): ExpressionAnalyzer = new ExpressionAnalyzer(aliasMap, dfAliasMap) def apply(): ExpressionAnalyzer = new ExpressionAnalyzer(Map.empty, Map.empty) // create new analyzer by combining two alias maps - def apply(map1: Map[ExprId, String], map2: Map[ExprId, String], - dfAliasMap: Map[String, Seq[Attribute]]): ExpressionAnalyzer = { + def apply( + map1: Map[ExprId, String], + map2: Map[ExprId, String], + dfAliasMap: Map[String, Seq[Attribute]]): ExpressionAnalyzer = { val common = map1.keySet & map2.keySet val result = (map1 ++ map2).filter { // remove common column, let (df1.join(df2)) @@ -24,16 +27,18 @@ private[snowpark] object ExpressionAnalyzer { new ExpressionAnalyzer(result, dfAliasMap) } - def apply(maps: Seq[Map[ExprId, String]], - dfAliasMap: Map[String, Seq[Attribute]]): ExpressionAnalyzer = { + def apply( + maps: Seq[Map[ExprId, String]], + dfAliasMap: Map[String, Seq[Attribute]]): ExpressionAnalyzer = { maps.foldLeft(ExpressionAnalyzer()) { case (expAnalyzer, map) => ExpressionAnalyzer(expAnalyzer.getAliasMap, map, dfAliasMap) } } } -private[snowpark] class ExpressionAnalyzer(aliasMap: Map[ExprId, String], - dfAliasMap: Map[String, Seq[Attribute]]) { +private[snowpark] class ExpressionAnalyzer( + aliasMap: Map[ExprId, String], + dfAliasMap: Map[String, Seq[Attribute]]) { private val generatedAliasMap: MMap[ExprId, String] = MMap.empty def analyze(ex: Expression): Expression = ex match { diff --git a/src/main/scala/com/snowflake/snowpark/internal/analyzer/MultiChildrenNode.scala b/src/main/scala/com/snowflake/snowpark/internal/analyzer/MultiChildrenNode.scala index b5594184..7b7b863a 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/analyzer/MultiChildrenNode.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/analyzer/MultiChildrenNode.scala @@ -13,7 +13,6 @@ private[snowpark] trait MultiChildrenNode extends LogicalPlan { protected def updateChildren(newChildren: Seq[LogicalPlan]): MultiChildrenNode - override lazy val dfAliasMap: Map[String, Seq[Attribute]] = children.foldLeft(Map.empty[String, Seq[Attribute]]) { case (map, child) => Utils.addToDataframeAliasMap(map, child) diff --git a/src/main/scala/com/snowflake/snowpark/internal/analyzer/SnowflakePlanNode.scala b/src/main/scala/com/snowflake/snowpark/internal/analyzer/SnowflakePlanNode.scala index 3cebd228..961cea4f 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/analyzer/SnowflakePlanNode.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/analyzer/SnowflakePlanNode.scala @@ -195,8 +195,7 @@ private[snowpark] case class Sort(order: Seq[SortOrder], child: LogicalPlan) ext Sort(order, _) } -private[snowpark] case class DataframeAlias(alias: String, child: LogicalPlan) - extends UnaryNode { +private[snowpark] case class DataframeAlias(alias: String, child: LogicalPlan) extends UnaryNode { override lazy val dfAliasMap: Map[String, Seq[Attribute]] = Utils.addToDataframeAliasMap(Map(alias -> child.getSnowflakePlan.get.output), child) diff --git a/src/main/scala/com/snowflake/snowpark/internal/analyzer/binaryPlanNodes.scala b/src/main/scala/com/snowflake/snowpark/internal/analyzer/binaryPlanNodes.scala index 98ff26d5..67002153 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/analyzer/binaryPlanNodes.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/analyzer/binaryPlanNodes.scala @@ -15,7 +15,6 @@ private[snowpark] abstract class BinaryNode extends LogicalPlan { lazy override protected val analyzer: ExpressionAnalyzer = ExpressionAnalyzer(left.aliasMap, right.aliasMap, dfAliasMap) - override lazy val dfAliasMap: Map[String, Seq[Attribute]] = Utils.addToDataframeAliasMap(Utils.addToDataframeAliasMap(Map.empty, left), right) diff --git a/src/main/scala/com/snowflake/snowpark/internal/analyzer/unaryExpressions.scala b/src/main/scala/com/snowflake/snowpark/internal/analyzer/unaryExpressions.scala index 5db1bfef..bf5db817 100644 --- a/src/main/scala/com/snowflake/snowpark/internal/analyzer/unaryExpressions.scala +++ b/src/main/scala/com/snowflake/snowpark/internal/analyzer/unaryExpressions.scala @@ -76,7 +76,7 @@ private[snowpark] case class Alias(child: Expression, name: String, isInternal: } private[snowpark] case class DfAlias(child: Expression, name: String) - extends UnaryExpression + extends UnaryExpression with NamedExpression { override def sqlOperator: String = "" override def operatorFirst: Boolean = false diff --git a/src/test/scala/com/snowflake/snowpark_test/DataFrameAliasSuite.scala b/src/test/scala/com/snowflake/snowpark_test/DataFrameAliasSuite.scala index 5deca2c9..044bc270 100644 --- a/src/test/scala/com/snowflake/snowpark_test/DataFrameAliasSuite.scala +++ b/src/test/scala/com/snowflake/snowpark_test/DataFrameAliasSuite.scala @@ -54,15 +54,27 @@ class DataFrameAliasSuite extends TestData with BeforeAndAfterEach with EagerSes runQuery(s"insert into $tableName2 values(1, 7),(2, 8),(3, 9)", session) val df1 = session.table(tableName1).alias("A") val df2 = session.table(tableName2).alias("B") - checkAnswer(df1.join(df2, $"id1" === $"id2") - .select(df1.col("A.num1")), Seq(Row(4), Row(5), Row(6))) - checkAnswer(df1.join(df2, $"id1" === $"id2") - .select(df2.col("B.num2")), Seq(Row(7), Row(8), Row(9))) + checkAnswer( + df1 + .join(df2, $"id1" === $"id2") + .select(df1.col("A.num1")), + Seq(Row(4), Row(5), Row(6))) + checkAnswer( + df1 + .join(df2, $"id1" === $"id2") + .select(df2.col("B.num2")), + Seq(Row(7), Row(8), Row(9))) - checkAnswer(df1.join(df2, $"id1" === $"id2") - .select($"A.num1"), Seq(Row(4), Row(5), Row(6))) - checkAnswer(df1.join(df2, $"id1" === $"id2") - .select($"B.num2"), Seq(Row(7), Row(8), Row(9))) + checkAnswer( + df1 + .join(df2, $"id1" === $"id2") + .select($"A.num1"), + Seq(Row(4), Row(5), Row(6))) + checkAnswer( + df1 + .join(df2, $"id1" === $"id2") + .select($"B.num2"), + Seq(Row(7), Row(8), Row(9))) } test("Test for alias with join with column renaming") { @@ -72,16 +84,23 @@ class DataFrameAliasSuite extends TestData with BeforeAndAfterEach with EagerSes runQuery(s"insert into $tableName2 values(1, 7),(2, 8),(3, 9)", session) val df1 = session.table(tableName1).alias("A") val df2 = session.table(tableName2).alias("B") - checkAnswer(df1.join(df2, df1.col("id") === df2.col("id")) - .select(df1.col("A.num")), Seq(Row(4), Row(5), Row(6))) - checkAnswer(df1.join(df2, df1.col("id") === df2.col("id")) - .select(df2.col("B.num")), Seq(Row(7), Row(8), Row(9))) + checkAnswer( + df1 + .join(df2, df1.col("id") === df2.col("id")) + .select(df1.col("A.num")), + Seq(Row(4), Row(5), Row(6))) + checkAnswer( + df1 + .join(df2, df1.col("id") === df2.col("id")) + .select(df2.col("B.num")), + Seq(Row(7), Row(8), Row(9))) // The following use case is out of the scope of supporting alias // We still follow the old ambiguity resolving policy and require DF to be used assertThrows[SnowparkClientException]( - df1.join(df2, df1.col("id") === df2.col("id")) - .select($"A.num")) + df1 + .join(df2, df1.col("id") === df2.col("id")) + .select($"A.num")) } test("Test for alias conflict") { @@ -90,7 +109,8 @@ class DataFrameAliasSuite extends TestData with BeforeAndAfterEach with EagerSes val df1 = session.table(tableName1).alias("A") val df2 = session.table(tableName2).alias("A") assertThrows[SnowparkClientException]( - df1.join(df2, df1.col("id") === df2.col("id")) - .select(df1.col("A.num"))) + df1 + .join(df2, df1.col("id") === df2.col("id")) + .select(df1.col("A.num"))) } }