Skip to content

Commit

Permalink
Added type_cast
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-sjayabalan committed Aug 2, 2024
1 parent 7374b82 commit 9f7a385
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 35 deletions.
45 changes: 24 additions & 21 deletions src/main/scala/com/snowflake/snowpark/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package com.snowflake.snowpark

import com.snowflake.snowpark.internal.analyzer._
import com.snowflake.snowpark.internal.ScalaFunctions._
import com.snowflake.snowpark.types.DataType
import com.snowflake.snowpark.types._
import com.snowflake.snowpark.internal.{
ErrorMessage,
OpenTelemetry,
Expand Down Expand Up @@ -3140,7 +3140,8 @@ object functions {
* @group agg_func
*/
def listagg(col: Column): Column = listagg(col, "", isDistinct = false)
/**

/**
* This leverages JSON_EXTRACT_PATH_TEXT and improves functionality by allowing multiple columns
* in a single call, whereas JSON_EXTRACT_PATH_TEXT must be called once for every column.
*
Expand All @@ -3150,12 +3151,12 @@ object functions {
* <li> Identifiers with spaces: Snowflake returns error when an invalid expression is sent. </li>
*
* Usage:
* {
* df = session.createDataFrame(Seq(("CR", "{\"id\": 5,
* {
* df = session.createDataFrame(Seq(("CR", "{\"id\": 5,
* \"name\": \"Jose\", \"age\": 29}")))
* .toDF(Seq("nationality", "json_string"))
* .toDF(Seq("nationality", "json_string"))
* }
* When the result of this function is the only part of
* When the result of this function is the only part of
* the select statement, no changes are needed
* df.select(json_tuple(col("json_string"), "id", "name", "age")).show()
*
Expand Down Expand Up @@ -3217,11 +3218,11 @@ object functions {
/**
* This function converts a JSON string to a variant in Snowflake.
*
* In Snowflake the values are converted automatically, however they're converted as variants,
* meaning that the printSchema
* In Snowflake the values are converted automatically, however they're converted as variants,
* meaning that the printSchema
* function would return different datatypes.
* To convert the datatype and it to be printed as the expected datatype,
* it should be read on the
* To convert the datatype and it to be printed as the expected datatype,
* it should be read on the
* selectExpr function as "json['relative']['age']::integer"
* val data_for_json = Seq(
* (1, "{\"id\": 172319, \"age\": 41, \"relative\": {\"id\": 885471, \"age\": 29}}")
Expand Down Expand Up @@ -3257,28 +3258,29 @@ object functions {
def from_json(e: Column): Column = {
builtin("TRY_PARSE_JSON")(e)
}
/**
* Returns the value of sourceExpr cast to data type

/**
* Returns the value of sourceExpr cast to data type
* targetType if possible, or NULL if not possible.
* @since 1.12.1
* @param source Any castable expression
* @param Target The type of the result
* @return The result is of type targetType.
* special version of CAST for a subset of datatype conversions.
* It performs the same operation
* It performs the same operation
* (i.e. converts a value of one data type into another data type),
* but returns a NULL value instead of raising an error
* but returns a NULL value instead of raising an error
* when the conversion can not be performed.
* The column argument must be a string column in Snowflake.
*/
def try_cast(e : Column,targetType: DataType): Column = {
try_cast(col("e"),targetType())
def try_cast(e: Column, targetType: DataType): Column = {
try_cast(col("e"), targetType)
}

/**
* This function receives a date or timestamp, as well as a
* This function receives a date or timestamp, as well as a
* properly formatted string and subtracts the specified
* amount of days from it. If receiving a string, this string is
* amount of days from it. If receiving a string, this string is
* casted to date using try_cast and if it's not possible to cast,
* returns null. If receiving
* a timestamp it will be casted to date (removing its time).
Expand All @@ -3288,8 +3290,9 @@ object functions {
* @return Column object.
*/
def date_sub(start: Column, days: Int): Column = {
dateadd("DAY", lit(days * -1), try_cast(col(e),DateType()))
dateadd("DAY", lit(days * -1), try_cast(col("start"), DateType))
}

/**
* Invokes a built-in snowflake function with the specified name and arguments.
* Arguments can be of two types
Expand Down Expand Up @@ -4017,4 +4020,4 @@ object functions {
"")(func)
}

}
}
3 changes: 2 additions & 1 deletion src/test/scala/com/snowflake/snowpark/SNTestBase.scala
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,8 @@ trait SNTestBase extends FunSuite with BeforeAndAfterAll with SFTestUtils with S
("FORCE_ENABLE_STRUCTURED_TYPES_NATIVE_ARROW_FORMAT", "true"),
("ENABLE_STRUCTURED_TYPES_NATIVE_ARROW_FORMAT", "true"),
("ENABLE_STRUCTURED_TYPES_IN_BINDS", "enable")),
currentSession, skipPreprod = true)(thunk)
currentSession,
skipPreprod = true)(thunk)
// disable these tests on preprod daily tests until these parameters are enabled by default.
}
}
Expand Down
24 changes: 11 additions & 13 deletions src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2183,43 +2183,41 @@ trait FunctionSuite extends TestData {
Seq(Row(1.0), Row(1.25992104989)),
sort = false)
}
test("from_json") {
var expected = Seq(("21", "Joe", "21021"),("26","Jay","94021")).toDF("age","name","zip")
test("from_json") {
var expected = Seq(("21", "Joe", "21021"), ("26", "Jay", "94021")).toDF("age", "name", "zip")
checkAnswer(
object2
.select(from_json(col("obj"))),
expected,
sort = false)
}
test("json_tuple") {
var expected = Seq(("21", "Joe"),("26","Jay")).toDF("age","name")
var expected = Seq(("21", "Joe"), ("26", "Jay")).toDF("age", "name")
checkAnswer(
object2
.select(json_tuple(col("obj"),"age","name"):_*),
.select(json_tuple(col("obj"), "age", "name")),
expected,
sort = false)
}

test("date_sub") {
var expected = Seq(("2020-04-30 13:11:20.000"),("2020-08-20 01:30:05.000")).toDF()
var expected = Seq(("2020-04-30 13:11:20.000"), ("2020-08-20 01:30:05.000")).toDF("b")
checkAnswer(
timestamp1
.select(date_sub(col("a"),lit(1)),
.select(date_sub(col("a"), 1)),
expected,
sort = false
))
sort = false)
}

test("try_cast") {
var expected = Seq(("2020-08-01"),("2010-12-01")).toDF()
var expected = Seq(("2020-08-01"), ("2010-12-01")).toDF("b")
checkAnswer(
date1
.select(try_cast(col("a") as String),
.select(try_cast(col("a"), StringType)),
expected,
sort = false
))
sort = false)
}

}

class EagerFunctionSuite extends FunctionSuite with EagerSession
Expand Down

0 comments on commit 9f7a385

Please sign in to comment.