Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SNOW-802269-months_between_format_number #159

Merged
merged 7 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions src/main/java/com/snowflake/snowpark_java/Functions.java
Original file line number Diff line number Diff line change
Expand Up @@ -4504,11 +4504,142 @@ public static Column from_unixtime(Column ut, String f) {
* [Row(SEQ8(0)=0),Row(SEQ8(0)=1), Row(SEQ8(0)=2)]
* }</pre>
*
* @return A sequence of monotonically increasing integers, with wrap-around * which happens after
* largest representable integer of integer width 8 byte.
* @since 1.15.0
*/
public static Column monotonically_increasing_id() {
return new Column(com.snowflake.snowpark.functions.monotonically_increasing_id());
}
/**
* Returns number of months between dates `start` and `end`.
*
* <p>A whole number is returned if both inputs have the same day of month or both are the last
* day of their respective months. Otherwise, the difference is calculated assuming 31 days per
* month.
*
* <p>For example:
*
* <pre>{@code
* {{{
* months_between("2017-11-14", "2017-07-14") // returns 4.0
* months_between("2017-01-01", "2017-01-10") // returns 0.29032258
* months_between("2017-06-01", "2017-06-16 12:00:00") // returns -0.5
* }}}
* }</pre>
*
* @param end A date, timestamp or string. If a string, the data must be in a format that can be
* cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @param start A date, timestamp or string. If a string, the data must be in a format that can
* cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* @return A double, or null if either `end` or `start` were strings that could not be cast to a
* timestamp. Negative if `end` is before `start`
* @since 1.15.0
*/
public static Column months_between(String end, String start) {
return new Column(functions.months_between(end, start));
}

/**
* Locate the position of the first occurrence of substr column in the given string. Returns null
* if either of the arguments are null.
*
* <p>Example
*
* <pre>{@code
* SELECT id,
* string1,
* REGEXP_SUBSTR(string1, 'nevermore\\d') AS substring,
* REGEXP_INSTR( string1, 'nevermore\\d') AS position
* FROM demo1
* ORDER BY id;
*
* +----+-------------------------------------+------------+----------+
* | ID | STRING1 | SUBSTRING | POSITION |
* |----+-------------------------------------+------------+----------|
* | 1 | nevermore1, nevermore2, nevermore3. | nevermore1 | 1 |
* +----+-------------------------------------+------------+----------+
* }</pre>
*
* The position is not zero based, but 1 based index. Returns 0 if substr could not be found in
* str.
*
* @param str Column on which instr has to be applied
* @param substring Pattern to be retrieved
* @return A null if either of the arguments are null.
* @since 1.15.0
*/
public static Column instr(Column str, String substring) {
return new Column(com.snowflake.snowpark.functions.instr(str.toScalaColumn(), substring));
}

/**
* Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders
* that time as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14
* 03:40:00.0'.
*
* <p>For Example
*
* <pre>{@code
* ALTER SESSION SET TIMEZONE = 'America/Los_Angeles';
* SELECT TO_TIMESTAMP_TZ('2024-04-05 01:02:03');
* +----------------------------------------+
* | TO_TIMESTAMP_TZ('2024-04-05 01:02:03') |
* |----------------------------------------|
* | 2024-04-05 01:02:03.000 -0700 |
* +----------------------------------------+
* }</pre>
*
* @param ts A date, timestamp or string. If a string, the data must be in a format that can be
* cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS` A string detailing
* the time zone ID that the input should be adjusted to. It should be in the format of either
* region-based zone IDs or zone offsets. Region IDs must have the form 'area/city', such as
* 'America/Los_Angeles'. Zone offsets must be in the format '(+|-)HH:mm', for example
* '-08:00' or '+01:00'. Also 'UTC' and 'Z' are supported as aliases of '+00:00'. Other short
* names are not recommended to use because they can be ambiguous.
* @return A timestamp, or null if `ts` was a string that could not be cast to a timestamp or `tz`
* was an invalid value
* @since 1.15.0
*/
public static Column from_utc_timestamp(Column ts) {
return new Column(com.snowflake.snowpark.functions.from_utc_timestamp(ts.toScalaColumn()));
}

/**
* Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time zone,
* and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield '2017-07-14
* 01:40:00.0'.
*
* @param ts A date, timestamp or string. If a string, the data must be in a format that can be
* cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS` A string detailing
* the time zone ID that the input should be adjusted to. It should be in the format of either
* region-based zone IDs or zone offsets. Region IDs must have the form 'area/city', such as
* 'America/Los_Angeles'. Zone offsets must be in the format '(+|-)HH:mm', for example
* '-08:00' or '+01:00'. Also 'UTC' and 'Z' are supported as aliases of '+00:00'. Other short
* names are not recommended to use because they can be ambiguous.
* @return A timestamp, or null if `ts` was a string that could not be cast to a timestamp or `tz`
* was an invalid value
* @since 1.15.0
*/
public static Column to_utc_timestamp(Column ts) {
return new Column(com.snowflake.snowpark.functions.to_utc_timestamp(ts.toScalaColumn()));
}

/**
* Formats numeric column x to a format like '#,###,###.##', rounded to d decimal places with
* HALF_EVEN round mode, and returns the result as a string column.
*
* <p>If d is 0, the result has no decimal point or fractional part. If d is less than 0, the
* result will be null.
*
* @param x numeric column to be transformed
* @param d Amount of decimal for the number format
* @return Number casted to the specific string format
* @since 1.15.0
*/
public static Column format_number(Column x, Integer d) {
return new Column(com.snowflake.snowpark.functions.format_number(x.toScalaColumn(), d));
}

/* Returns a Column expression with values sorted in descending order.
*
Expand Down
110 changes: 110 additions & 0 deletions src/main/scala/com/snowflake/snowpark/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3777,6 +3777,116 @@ object functions {
*/
def monotonically_increasing_id(): Column = builtin("seq8")()

/**
* Returns number of months between dates `start` and `end`.
*
* A whole number is returned if both inputs have the same day of month or both are the last day
* of their respective months. Otherwise, the difference is calculated assuming 31 days per month.
*
* For example:
* {{{
* months_between("2017-11-14", "2017-07-14") // returns 4.0
* months_between("2017-01-01", "2017-01-10") // returns 0.29032258
* months_between("2017-06-01", "2017-06-16 12:00:00") // returns -0.5
* }}}
* @since 1.15.0
* @param end Column name. If a string, the data must be in a format that can
* be cast to a timestamp, such as yyyy-MM-dd
* or yyyy-MM-dd HH:mm:ss.SSSS
* @param start Column name . If a string, the data must be in a format that can
* cast to a timestamp, such as yyyy-MM-dd or yyyy-MM-dd HH:mm:ss.SSSS
* @return A double, or null if either end or start were strings that could not be cast to a
* timestamp. Negative if end is before start
*/
def months_between(end: String, start: String): Column =
builtin("MONTHS_BETWEEN")(col(end), col(start))

/**
* Locate the position of the first occurrence of substr column in the given string.
* Returns null if either of the arguments are null.
* For example
* SELECT id,
* string1,
* REGEXP_SUBSTR(string1, 'nevermore\\d') AS substring,
* REGEXP_INSTR( string1, 'nevermore\\d') AS position
* FROM demo1
* ORDER BY id;
* +----+-------------------------------------+------------+----------+
* | ID | STRING1 | SUBSTRING | POSITION |
* |----+-------------------------------------+------------+----------|
* | 1 | nevermore1, nevermore2, nevermore3. | nevermore1 | 1 |
* +----+-------------------------------------+------------+----------+
*
* @since 1.15.0
* @note The position is not zero based, but 1 based index. Returns 0 if substr
* could not be found in str.
*/
def instr(str: Column, substring: String): Column = builtin("REGEXP_INSTR")(str, substring)

/**
* Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders
* that time as a timestamp in the given time zone. For example, 'GMT+1' would yield
* '2017-07-14 03:40:00.0'.
* ALTER SESSION SET TIMEZONE = 'America/Los_Angeles';
* SELECT TO_TIMESTAMP_TZ('2024-04-05 01:02:03');
* +----------------------------------------+
* | TO_TIMESTAMP_TZ('2024-04-05 01:02:03') |
* |----------------------------------------|
* | 2024-04-05 01:02:03.000 -0700 |
* +----------------------------------------+
*
* @since 1.15.0
* @param ts A date, timestamp or string. If a string, the data must be in a format that can be
* cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* A string detailing the time zone ID that the input should be adjusted to. It should
* be in the format of either region-based zone IDs or zone offsets. Region IDs must
* have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in
* the format '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are
* supported as aliases of '+00:00'. Other short names are not recommended to use
* because they can be ambiguous.
* @return A timestamp, or null if `ts` was a string that could not be cast to a timestamp or
* `tz` was an invalid value
*/
def from_utc_timestamp(ts: Column): Column =
builtin("TO_TIMESTAMP_TZ")(ts)

/**
* Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time
* zone, and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield
* '2017-07-14 01:40:00.0'.
* @since 1.15.0
* @param ts A date, timestamp or string. If a string, the data must be in a format that can be
* cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
* A string detailing the time zone ID that the input should be adjusted to. It should
* be in the format of either region-based zone IDs or zone offsets. Region IDs must
* have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in
* the format '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are
* supported as aliases of '+00:00'. Other short names are not recommended to use
* because they can be ambiguous.
* @return A timestamp, or null if `ts` was a string that could not be cast to a timestamp or
* `tz` was an invalid value
*/
def to_utc_timestamp(ts: Column): Column = builtin("TO_TIMESTAMP_TZ")(ts)

/**
* Formats numeric column x to a format like '#,###,###.##', rounded to d decimal places
* with HALF_EVEN round mode, and returns the result as a string column.
* @since 1.15.0
* If d is 0, the result has no decimal point or fractional part.
* If d is less than 0, the result will be null.
*
* @param x numeric column to be transformed
* @param d Amount of decimal for the number format
*
* @return Number casted to the specific string format
*/
def format_number(x: Column, d: Int): Column = {
if (d < 0) {
lit(null)
} else {
builtin("TO_VARCHAR")(x, if (d > 0) s"999,999.${"0" * d}" else "999,999")
}
}
/* Returns a Column expression with values sorted in descending order.
* Example:
* {{{
Expand Down
51 changes: 51 additions & 0 deletions src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
Original file line number Diff line number Diff line change
Expand Up @@ -3135,4 +3135,55 @@ public void unhex() {
Row[] expected = {Row.create("1"), Row.create("2"), Row.create("3")};
checkAnswer(df.select(Functions.unhex(Functions.col("a"))), expected, false);
}

@Test
public void months_between() {
DataFrame df =
getSession()
.sql(
"select * from values('2010-07-02'::Date,'2010-08-02'::Date), "
+ "('2020-08-02'::Date,'2020-12-02'::Date) as t(a,b)");
Row[] expected = {Row.create(1.000000), Row.create(4.000000)};
checkAnswer(df.select(Functions.months_between("b", "a")), expected, false);
}

@Test
public void instr() {
DataFrame df =
getSession()
.sql(
"select * from values('It was the best of times, it was the worst of times') as t(a)");
Row[] expected = {Row.create(4)};
checkAnswer(df.select(Functions.instr(df.col("a"), "was")), expected, false);
}

@Test
public void format_number1() {
DataFrame df = getSession().sql("select * from values(1),(2),(3) as t(a)");
Row[] expected = {Row.create("1"), Row.create("2"), Row.create("3")};
checkAnswer(
df.select(Functions.ltrim(Functions.format_number(df.col("a"), 0))), expected, false);
}

@Test
public void format_number2() {
DataFrame df = getSession().sql("select * from values(1),(2),(3) as t(a)");
Row[] expected = {Row.create("1.00"), Row.create("2.00"), Row.create("3.00")};
checkAnswer(
df.select(Functions.ltrim(Functions.format_number(df.col("a"), 2))), expected, false);
}

@Test
public void from_utc_timestamp() {
DataFrame df = getSession().sql("select * from values('2024-04-05 01:02:03') as t(a)");
Row[] expected = {Row.create(Timestamp.valueOf("2024-04-05 01:02:03.0"))};
checkAnswer(df.select(Functions.from_utc_timestamp(df.col("a"))), expected, false);
}

@Test
public void to_utc_timestamp() {
DataFrame df = getSession().sql("select * from values('2024-04-05 01:02:03') as t(a)");
Row[] expected = {Row.create(Timestamp.valueOf("2024-04-05 01:02:03.0"))};
checkAnswer(df.select(Functions.to_utc_timestamp(df.col("a"))), expected, false);
}
}
52 changes: 52 additions & 0 deletions src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2466,6 +2466,58 @@ trait FunctionSuite extends TestData {
Seq(Row("1"), Row("2"), Row("3")),
sort = false)
}
test("months_between") {
val months_between = functions.builtin("MONTHS_BETWEEN")
val input = Seq(
(Date.valueOf("2010-08-02"), Date.valueOf("2010-07-02")),
(Date.valueOf("2020-12-02"), Date.valueOf("2020-08-02")))
.toDF("a", "b")
checkAnswer(
input.select(months_between(col("a"), col("b"))),
Seq(Row((1.000000)), Row(4.000000)),
sort = false)
}

test("instr") {
val df = Seq("It was the best of times, it was the worst of times").toDF("a")
checkAnswer(df.select(instr(col("a"), "was")), Seq(Row(4)), sort = false)
}

test("format_number1") {

checkAnswer(
number3.select(ltrim(format_number(col("a"), 0))),
Seq(Row(("1")), Row(("2")), Row(("3"))),
sort = false)
}

test("format_number2") {

checkAnswer(
number3.select(ltrim(format_number(col("a"), 2))),
Seq(Row(("1.00")), Row(("2.00")), Row(("3.00"))),
sort = false)
}

test("format_number3") {

checkAnswer(
number3.select(ltrim(format_number(col("a"), -1))),
Seq(Row((null)), Row((null)), Row((null))),
sort = false)
}

test("from_utc_timestamp") {
val expected = Seq(Timestamp.valueOf("2024-04-05 01:02:03.0")).toDF("a")
val data = Seq("2024-04-05 01:02:03").toDF("a")
checkAnswer(data.select(from_utc_timestamp(col("a"))), expected, sort = false)
}

test("to_utc_timestamp") {
val expected = Seq(Timestamp.valueOf("2024-04-05 01:02:03.0")).toDF("a")
val data = Seq("2024-04-05 01:02:03").toDF("a")
checkAnswer(data.select(to_utc_timestamp(col("a"))), expected, sort = false)
}

}

Expand Down
Loading