From fcd50b68c3335af51a9f9c0a57bcadc5d74b9089 Mon Sep 17 00:00:00 2001 From: "shyamala.jayabalan" Date: Mon, 19 Aug 2024 17:03:16 -0400 Subject: [PATCH] Modified comment section and changed regexp in substring_index --- .../snowflake/snowpark_java/Functions.java | 45 +++++++++++-------- .../com/snowflake/snowpark/functions.scala | 14 ++++-- .../snowpark_test/JavaFunctionSuite.java | 4 +- .../snowpark_test/FunctionSuite.scala | 6 ++- 4 files changed, 42 insertions(+), 27 deletions(-) diff --git a/src/main/java/com/snowflake/snowpark_java/Functions.java b/src/main/java/com/snowflake/snowpark_java/Functions.java index d94dc81a..ead78cb4 100644 --- a/src/main/java/com/snowflake/snowpark_java/Functions.java +++ b/src/main/java/com/snowflake/snowpark_java/Functions.java @@ -3882,13 +3882,12 @@ public static Column listagg(Column col) { } /** - * * Signature - snowflake.snowpark.functions.regexp_extract (value: Union[Column, str], regexp: * Union[Column, str], idx: int) Column Extract a specific group matched by a regex, from the * specified string column. If the regex did not match, or the specified group did not match, an - * empty string is returned. - * Example: - *
{@code
+   * empty string is returned. Example:
+   *
+   * 
{@code
    * from snowflake.snowpark.functions import regexp_extract
    * df = session.createDataFrame([["id_20_30", 10], ["id_40_50", 30]], ["id", "age"])
    * df.select(regexp_extract("id", r"(\d+)", 1).alias("RES")).show()
@@ -3898,9 +3897,14 @@ public static Column listagg(Column col) {
    *     |20     |
    *     |40     |
    *     ---------
-   * }
+   * }
* * @since 1.14.0 + * @param col Column. + * @param exp String + * @param position Integer. + * @param Occurences Integer. + * @param grpIdx Integer. * @return Column object. */ public static Column regexp_extract( @@ -3915,9 +3919,9 @@ public static Column regexp_extract( * *

- -1 if the argument is negative. - 1 if it is positive. - 0 if it is 0. * - *

Args: col: The column to evaluate its sign - * Example:: - * *

{@code df =
+   * 

Args: col: The column to evaluate its sign Example:: * + * + *

{@code df =
    * session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) >>>
    * df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"),
    * sign("c").alias("c_sign")).show()
@@ -3926,10 +3930,10 @@ public static Column regexp_extract(
    *     ----------------------------------
    *     |-1        |1         |0         |
    *     ----------------------------------
-   * }
+   * }
* * @since 1.14.0 - * @param e Column to calculate the sign. + * @param col Column to calculate the sign. * @return Column object. */ public static Column signum(Column col) { @@ -3941,8 +3945,8 @@ public static Column signum(Column col) { * *

- -1 if the argument is negative. - 1 if it is positive. - 0 if it is 0. * - *

Args: col: The column to evaluate its sign - * Example:: + *

Args: col: The column to evaluate its sign Example:: + * *

{@code df =
    * session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) >>>
    * df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"),
@@ -3952,10 +3956,10 @@ public static Column signum(Column col) {
    *     ----------------------------------
    *     |-1        |1         |0         |
    *     ----------------------------------
-   * }
+   * }
* * @since 1.14.0 - * @param e Column to calculate the sign. + * @param col Column to calculate the sign. * @return Column object. */ public static Column sign(Column col) { @@ -3968,11 +3972,14 @@ public static Column sign(Column col) { * count is negative, every to the right of the final delimiter (counting from the right) is * returned. substring_index performs a case-sensitive match when searching for delim. * + * @param col String. + * @param delim String + * @param count Integer. + * @return Column object. * @since 1.14.0 */ - public static Column substring_index(Column col, String delim, Integer count) { - return new Column( - com.snowflake.snowpark.functions.substring_index(col.toScalaColumn(), delim, count)); + public static Column substring_index(String col, String delim, Integer count) { + return new Column(com.snowflake.snowpark.functions.substring_index(col, delim, count)); } /** @@ -3991,8 +3998,8 @@ public static Column substring_index(Column col, String delim, Integer count) { * @param c Column to be collect. * @return The array. */ - public static Column collect_list(Column col) { - return new Column(com.snowflake.snowpark.functions.collect_list(col.toScalaColumn())); + public static Column collect_list(Column c) { + return new Column(com.snowflake.snowpark.functions.collect_list(c.toScalaColumn())); } /* Returns a Column expression with values sorted in descending order. diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala index 241b8583..0abd3008 100644 --- a/src/main/scala/com/snowflake/snowpark/functions.scala +++ b/src/main/scala/com/snowflake/snowpark/functions.scala @@ -3176,7 +3176,7 @@ object functions { when(colName.is_null, lit(null)) .otherwise( coalesce( - builtin("REGEX_SUBSTR")( + builtin("REGEXP_SUBSTR")( colName, lit(exp), lit(position), @@ -3266,19 +3266,25 @@ object functions { * substring_index performs a case-sensitive match when searching for delim. * @since 1.14.0 */ - def substring_index(str: Column, delim: String, count: Int): Column = { + def substring_index(str: String, delim: String, count: Int): Column = { when( lit(count) < lit(0), callBuiltin( "substring", lit(str), - callBuiltin("regexp_instr", sqlExpr(s"reverse(${str}, ${delim}, 1, abs(${count}), 0")))) + callBuiltin( + "regexp_instr", + sqlExpr(s"reverse(${str})"), + lit(delim), + 1, + abs(lit(count)), + lit(0)))) .otherwise( callBuiltin( "substring", lit(str), 1, - callBuiltin("regexp_instr", col("str"), lit(delim), 1, lit(count), 1))) + callBuiltin("regexp_instr", lit(str), lit(delim), 1, lit(count), 1))) } /** diff --git a/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java b/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java index e2ebf707..5f8346a4 100644 --- a/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java +++ b/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java @@ -2810,9 +2810,7 @@ public void substring_index() { .sql( "select * from values ('It was the best of times,it was the worst of times') as T(a)"); checkAnswer( - df.select(Functions.substring_index(df.col("a"), "was", 1)), - new Row[] {Row.create(7)}, - false); + df.select(Functions.substring_index("a", "was", 1)), new Row[] {Row.create(7)}, false); } public void test_asc() { diff --git a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala index 873e8103..f4e0a21a 100644 --- a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala +++ b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala @@ -1090,6 +1090,7 @@ trait FunctionSuite extends TestData { .collect()(0) .getTimestamp(0) .toString == "2020-10-28 13:35:47.001234567") + } test("timestamp_ltz_from_parts") { @@ -2218,7 +2219,10 @@ trait FunctionSuite extends TestData { } test("substring_index") { val df = Seq("It was the best of times, it was the worst of times").toDF("a") - checkAnswer(df.select(substring_index(col("a"), "was", 1)), Seq(Row(7)), sort = false) + checkAnswer( + df.select(substring_index("It was the best of times, it was the worst of times", "was", 1)), + Seq(Row(7)), + sort = false) } test("desc column order") {