diff --git a/src/main/scala/com/snowflake/snowpark/DataFrame.scala b/src/main/scala/com/snowflake/snowpark/DataFrame.scala index abd9c77a..e602a3b9 100644 --- a/src/main/scala/com/snowflake/snowpark/DataFrame.scala +++ b/src/main/scala/com/snowflake/snowpark/DataFrame.scala @@ -1975,7 +1975,7 @@ class DataFrame private[snowpark] ( val resultSchema = originalResult.schema val columnNames = resultSchema.map(_.name) // duplicated names - val dup = columnNames.diff(columnNames.distinct).distinct + val dup = columnNames.diff(columnNames.distinct).distinct.map(quoteName) // guarantee no duplicated names in the result if (dup.nonEmpty) { val dfPrefix = DataFrame.generatePrefix('o') diff --git a/src/test/scala/com/snowflake/snowpark_test/TableFunctionSuite.scala b/src/test/scala/com/snowflake/snowpark_test/TableFunctionSuite.scala index 8adbcb67..fe330255 100644 --- a/src/test/scala/com/snowflake/snowpark_test/TableFunctionSuite.scala +++ b/src/test/scala/com/snowflake/snowpark_test/TableFunctionSuite.scala @@ -368,6 +368,22 @@ class TableFunctionSuite extends TestData { test("table function join with duplicated column name") { val df = Seq((1, "1,2"), (2, "3,4")).toDF("idx", "value") - df.show() + val result = df.join(tableFunctions.split_to_table(df("value"), lit(","))) + // only one VALUE in the result + checkAnswer(result.select("value"), Seq(Row("1"), Row("2"), Row("3"), Row("4"))) + checkAnswer(result.select(result("value")), Seq(Row("1"), Row("2"), Row("3"), Row("4"))) + checkAnswer(result.select(df("value")), Seq(Row("1,2"), Row("1,2"), Row("3,4"), Row("3,4"))) } + + test("table function select with duplicated column name") { + val df = Seq((1, "1,2"), (2, "3,4")).toDF("idx", "value") + val result1 = df.select(tableFunctions.split_to_table(df("value"), lit(","))) + checkAnswer(result1, Seq(Row(1, 1, "1"), Row(1, 2, "2"), Row(2, 1, "3"), Row(2, 2, "4"))) + val result = df.select(df("value"), tableFunctions.split_to_table(df("value"), lit(","))) + // only one VALUE in the result + checkAnswer(result.select("value"), Seq(Row("1"), Row("2"), Row("3"), Row("4"))) + checkAnswer(result.select(result("value")), Seq(Row("1"), Row("2"), Row("3"), Row("4"))) + checkAnswer(result.select(df("value")), Seq(Row("1,2"), Row("1,2"), Row("3,4"), Row("3,4"))) + } + }