Skip to content

Commit

Permalink
added new functions,testcases
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-sjayabalan committed Sep 10, 2024
1 parent dd5e7d6 commit 0afcdb8
Show file tree
Hide file tree
Showing 4 changed files with 340 additions and 0 deletions.
130 changes: 130 additions & 0 deletions src/main/java/com/snowflake/snowpark_java/Functions.java
Original file line number Diff line number Diff line change
Expand Up @@ -4313,6 +4313,136 @@ public static Column from_unixtime(Column ut, String f) {
public static Column monotonically_increasing_id() {
return new Column(com.snowflake.snowpark.functions.monotonically_increasing_id());
}
/**
* Returns number of months between dates `start` and `end`.
*
* <p>A whole number is returned if both inputs have the same day of month or both are the last
* day of their respective months. Otherwise, the difference is calculated assuming 31 days per
* month.
*
* <p>For example:
*
* <pre>{@code
* {{{
* months_between("2017-11-14", "2017-07-14") // returns 4.0
* months_between("2017-01-01", "2017-01-10") // returns 0.29032258
* months_between("2017-06-01", "2017-06-16 12:00:00") // returns -0.5
* }}}
* }</pre>
*
* @param end A date, timestamp or string. If a string, the data must be in a format that can be
* cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param start A date, timestamp or string. If a string, the data must be in a format that can
* cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @return A double, or null if either `end` or `start` were strings that could not be cast to a
* timestamp. Negative if `end` is before `start`
* @since 1.15.0
*/
public static Column months_between(Column end, Column start) {
return new Column(
com.snowflake.snowpark.functions.months_between(
end.toScalaColumn(), start.toScalaColumn()));
}

/**
* Locate the position of the first occurrence of substr column in the given string. Returns null
* if either of the arguments are null.
*
* <p>Example
*
* <pre>{@code
* SELECT id,
* string1,
* REGEXP_SUBSTR(string1, 'nevermore\\d') AS substring,
* REGEXP_INSTR( string1, 'nevermore\\d') AS position
* FROM demo1
* ORDER BY id;
*
* +----+-------------------------------------+------------+----------+
* | ID | STRING1 | SUBSTRING | POSITION |
* |----+-------------------------------------+------------+----------|
* | 1 | nevermore1, nevermore2, nevermore3. | nevermore1 | 1 |
* +----+-------------------------------------+------------+----------+
* }</pre>
*
* @since 1.15.0
* @note The position is not zero based, but 1 based index. Returns 0 if substr could not be found
* in str.
* @param str Column on which instr has to be applied
* @param substring Pattern to be retrieved
* @return A null if either of the arguments are null.
* @since 1.15.0
*/
public static Column instr(Column str, String substring) {
return new Column(com.snowflake.snowpark.functions.instr(str.toScalaColumn(), substring));
}

/**
* Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders
* that time as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14
* 03:40:00.0'.
*
* <p>For Example
*
* <pre>{@code
* ALTER SESSION SET TIMEZONE = 'America/Los_Angeles';
* SELECT TO_TIMESTAMP_TZ('2024-04-05 01:02:03');
* +----------------------------------------+
* | TO_TIMESTAMP_TZ('2024-04-05 01:02:03') |
* |----------------------------------------|
* | 2024-04-05 01:02:03.000 -0700 |
* +----------------------------------------+
* }</pre>
*
* @since 1.15.0
* @param ts A date, timestamp or string. If a string, the data must be in a format that can be
* cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS` A string detailing
* the time zone ID that the input should be adjusted to. It should be in the format of either
* region-based zone IDs or zone offsets. Region IDs must have the form 'area/city', such as
* 'America/Los_Angeles'. Zone offsets must be in the format '(+|-)HH:mm', for example
* '-08:00' or '+01:00'. Also 'UTC' and 'Z' are supported as aliases of '+00:00'. Other short
* names are not recommended to use because they can be ambiguous.
* @return A timestamp, or null if `ts` was a string that could not be cast to a timestamp or `tz`
* was an invalid value
* @since 1.15.0
*/
public static Column from_utc_timestamp(Column ts) {
return new Column(com.snowflake.snowpark.functions.from_utc_timestamp(ts.toScalaColumn()));
}

/**
* Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time zone,
* and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield '2017-07-14
* 01:40:00.0'.
*
* @since 1.15.0
* @param ts A date, timestamp or string. If a string, the data must be in a format that can be
* cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS` A string detailing
* the time zone ID that the input should be adjusted to. It should be in the format of either
* region-based zone IDs or zone offsets. Region IDs must have the form 'area/city', such as
* 'America/Los_Angeles'. Zone offsets must be in the format '(+|-)HH:mm', for example
* '-08:00' or '+01:00'. Also 'UTC' and 'Z' are supported as aliases of '+00:00'. Other short
* names are not recommended to use because they can be ambiguous.
* @return A timestamp, or null if `ts` was a string that could not be cast to a timestamp or `tz`
* was an invalid value
*/
public static Column to_utc_timestamp(Column ts) {
return new Column(com.snowflake.snowpark.functions.to_utc_timestamp(ts.toScalaColumn()));
}

/**
* Formats numeric column x to a format like '#,###,###.##', rounded to d decimal places with
* HALF_EVEN round mode, and returns the result as a string column.
*
* @since 1.15.0 If d is 0, the result has no decimal point or fractional part. If d is less than
* 0, the result will be null.
* @param x numeric column to be transformed
* @param d Amount of decimal for the number format
* @return Number casted to the specific string format
*/
public static Column format_number(Column x, Integer d) {
return new Column(com.snowflake.snowpark.functions.format_number(x.toScalaColumn(), d));
}

/* Returns a Column expression with values sorted in descending order.
*
Expand Down
108 changes: 108 additions & 0 deletions src/main/scala/com/snowflake/snowpark/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3591,6 +3591,114 @@ object functions {
*/
def monotonically_increasing_id(): Column = builtin("seq8")()

/**
* Returns number of months between dates `start` and `end`.
*
* A whole number is returned if both inputs have the same day of month or both are the last day
* of their respective months. Otherwise, the difference is calculated assuming 31 days per month.
*
* For example:
* {{{
* months_between("2017-11-14", "2017-07-14") // returns 4.0
* months_between("2017-01-01", "2017-01-10") // returns 0.29032258
* months_between("2017-06-01", "2017-06-16 12:00:00") // returns -0.5
* }}}
* @since 1.15.0
* @param end A date, timestamp or string. If a string, the data must be in a format that can
* be cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param start A date, timestamp or string. If a string, the data must be in a format that can
* cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @return A double, or null if either `end` or `start` were strings that could not be cast to a
* timestamp. Negative if `end` is before `start`
*/
def months_between(end: Column, start: Column): Column = builtin("MONTHS_BETWEEN")(start, end)

/**
* Locate the position of the first occurrence of substr column in the given string.
* Returns null if either of the arguments are null.
* For example
* SELECT id,
* string1,
* REGEXP_SUBSTR(string1, 'nevermore\\d') AS substring,
* REGEXP_INSTR( string1, 'nevermore\\d') AS position
* FROM demo1
* ORDER BY id;
* +----+-------------------------------------+------------+----------+
* | ID | STRING1 | SUBSTRING | POSITION |
* |----+-------------------------------------+------------+----------|
* | 1 | nevermore1, nevermore2, nevermore3. | nevermore1 | 1 |
* +----+-------------------------------------+------------+----------+
*
* @since 1.15.0
* @note The position is not zero based, but 1 based index. Returns 0 if substr
* could not be found in str.
*/
def instr(str: Column, substring: String): Column = builtin("REGEXP_INSTR")(str, substring)

/**
* Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders
* that time as a timestamp in the given time zone. For example, 'GMT+1' would yield
* '2017-07-14 03:40:00.0'.
* ALTER SESSION SET TIMEZONE = 'America/Los_Angeles';
* SELECT TO_TIMESTAMP_TZ('2024-04-05 01:02:03');
* +----------------------------------------+
* | TO_TIMESTAMP_TZ('2024-04-05 01:02:03') |
* |----------------------------------------|
* | 2024-04-05 01:02:03.000 -0700 |
* +----------------------------------------+
*
* @since 1.15.0
* @param ts A date, timestamp or string. If a string, the data must be in a format that can be
* cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* A string detailing the time zone ID that the input should be adjusted to. It should
* be in the format of either region-based zone IDs or zone offsets. Region IDs must
* have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in
* the format '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are
* supported as aliases of '+00:00'. Other short names are not recommended to use
* because they can be ambiguous.
* @return A timestamp, or null if `ts` was a string that could not be cast to a timestamp or
* `tz` was an invalid value
*/
def from_utc_timestamp(ts: Column): Column =
builtin("TO_TIMESTAMP_TZ")(ts)

/**
* Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time
* zone, and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield
* '2017-07-14 01:40:00.0'.
* @since 1.15.0
* @param ts A date, timestamp or string. If a string, the data must be in a format that can be
* cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* A string detailing the time zone ID that the input should be adjusted to. It should
* be in the format of either region-based zone IDs or zone offsets. Region IDs must
* have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in
* the format '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are
* supported as aliases of '+00:00'. Other short names are not recommended to use
* because they can be ambiguous.
* @return A timestamp, or null if `ts` was a string that could not be cast to a timestamp or
* `tz` was an invalid value
*/
def to_utc_timestamp(ts: Column): Column = builtin("TO_TIMESTAMP_TZ")(ts)

/**
* Formats numeric column x to a format like '#,###,###.##', rounded to d decimal places
* with HALF_EVEN round mode, and returns the result as a string column.
* @since 1.15.0
* If d is 0, the result has no decimal point or fractional part.
* If d is less than 0, the result will be null.
*
* @param x numeric column to be transformed
* @param d Amount of decimal for the number format
*
* @return Number casted to the specific string format
*/
def format_number(x: Column, d: Int): Column = {
if (d < 0) {
lit(null)
} else {
builtin("TO_VARCHAR")(x, if (d > 0) s"999,999.${"0" * d}" else "999,999")
}
}
/* Returns a Column expression with values sorted in descending order.
* Example:
* {{{
Expand Down
51 changes: 51 additions & 0 deletions src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
Original file line number Diff line number Diff line change
Expand Up @@ -3100,4 +3100,55 @@ public void unhex() {
Row[] expected = {Row.create("1"), Row.create("2"), Row.create("3")};
checkAnswer(df.select(Functions.unhex(Functions.col("a"))), expected, false);
}

@Test
public void months_between() {
DataFrame df =
getSession()
.sql(
"select * from values('2010-07-02'::Date,'2010-08-02'::Date), "
+ "('2020-08-02'::Date,'2020-12-02'::Date) as t(a,b)");
Row[] expected = {Row.create(1.000000), Row.create(4.000000)};
checkAnswer(df.select(Functions.months_between(df.col("a"), df.col("b"))), expected, false);
}

@Test
public void instr() {
DataFrame df =
getSession()
.sql(
"select * from values('It was the best of times, it was the worst of times') as t(a)");
Row[] expected = {Row.create(4)};
checkAnswer(df.select(Functions.instr(df.col("a"), "was")), expected, false);
}

@Test
public void format_number1() {
DataFrame df = getSession().sql("select * from values(1),(2),(3) as t(a)");
Row[] expected = {Row.create("1"), Row.create("2"), Row.create("3")};
checkAnswer(
df.select(Functions.ltrim(Functions.format_number(df.col("a"), 0))), expected, false);
}

@Test
public void format_number2() {
DataFrame df = getSession().sql("select * from values(1),(2),(3) as t(a)");
Row[] expected = {Row.create("1.00"), Row.create("2.00"), Row.create("3.00")};
checkAnswer(
df.select(Functions.ltrim(Functions.format_number(df.col("a"), 2))), expected, false);
}

@Test
public void from_utc_timestamp() {
DataFrame df = getSession().sql("select * from values('2024-04-05 01:02:03') as t(a)");
Row[] expected = {Row.create(Timestamp.valueOf("2024-04-05 01:02:03.0"))};
checkAnswer(df.select(Functions.from_utc_timestamp(df.col("a"))), expected, false);
}

@Test
public void to_utc_timestamp() {
DataFrame df = getSession().sql("select * from values('2024-04-05 01:02:03') as t(a)");
Row[] expected = {Row.create(Timestamp.valueOf("2024-04-05 01:02:03.0"))};
checkAnswer(df.select(Functions.to_utc_timestamp(df.col("a"))), expected, false);
}
}
51 changes: 51 additions & 0 deletions src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2451,6 +2451,57 @@ trait FunctionSuite extends TestData {
Seq(Row("1"), Row("2"), Row("3")),
sort = false)
}
test("months_between") {
val input = Seq(
(Date.valueOf("2010-07-02"), Date.valueOf("2010-08-02")),
(Date.valueOf("2020-08-02"), Date.valueOf("2020-12-02")))
.toDF("a", "b")
checkAnswer(
input.select(months_between(col("a"), col("b"))),
Seq(Row((1.000000)), Row(4.000000)),
sort = false)
}

test("instr") {
val df = Seq("It was the best of times, it was the worst of times").toDF("a")
checkAnswer(df.select(instr(col("a"), "was")), Seq(Row(4)), sort = false)
}

test("format_number1") {

checkAnswer(
number3.select(ltrim(format_number(col("a"), 0))),
Seq(Row(("1")), Row(("2")), Row(("3"))),
sort = false)
}

test("format_number2") {

checkAnswer(
number3.select(ltrim(format_number(col("a"), 2))),
Seq(Row(("1.00")), Row(("2.00")), Row(("3.00"))),
sort = false)
}

test("format_number3") {

checkAnswer(
number3.select(ltrim(format_number(col("a"), -1))),
Seq(Row((null)), Row((null)), Row((null))),
sort = false)
}

test("from_utc_timestamp") {
val expected = Seq(Timestamp.valueOf("2024-04-05 01:02:03.0")).toDF("a")
val data = Seq("2024-04-05 01:02:03").toDF("a")
checkAnswer(data.select(from_utc_timestamp(col("a"))), expected, sort = false)
}

test("to_utc_timestamp") {
val expected = Seq(Timestamp.valueOf("2024-04-05 01:02:03.0")).toDF("a")
val data = Seq("2024-04-05 01:02:03").toDF("a")
checkAnswer(data.select(to_utc_timestamp(col("a"))), expected, sort = false)
}

}

Expand Down

0 comments on commit 0afcdb8

Please sign in to comment.