From d14ff9fce2817e400db7735252515c3855556ff5 Mon Sep 17 00:00:00 2001
From: Shyamala Jayabalan <shyamala.jayabalan@snowflake.com>
Date: Tue, 13 Aug 2024 10:22:38 -0400
Subject: [PATCH 1/8] Sfc gh sjayabalan sma regextract signum subindex
 collectlist (#141)

* Added regexp_extract,signum,substring_index,collect_list

1) Added regexp_extract,signum,substring_index,collect_list to functions.scala .
2) Added test cases for the same

* Added examples and updated the description

* Fixed format

* formatted the comments

* Added java functions and unit test cases for java

* Added sign function

* Modified the alignment

* Added examples

* adjusted comments

* Update Functions.java

---------

Co-authored-by: sfc-gh-mrojas <mauricio.rojas@snowflake.com>
---
 .../snowflake/snowpark_java/Functions.java    | 114 +++++++++++
 .../com/snowflake/snowpark/functions.scala    | 187 ++++++++++++++++++
 .../snowpark_test/JavaFunctionSuite.java      |  49 +++++
 .../snowpark_test/FunctionSuite.scala         |  45 +++++
 4 files changed, 395 insertions(+)
diff --git a/src/main/java/com/snowflake/snowpark_java/Functions.java b/src/main/java/com/snowflake/snowpark_java/Functions.java
index 65f50020..41c38135 100644
--- a/src/main/java/com/snowflake/snowpark_java/Functions.java
+++ b/src/main/java/com/snowflake/snowpark_java/Functions.java
@@ -3882,6 +3882,119 @@ public static Column listagg(Column col) {
   }
 
   /**
+
+   * Signature - snowflake.snowpark.functions.regexp_extract (value: Union[Column, str], regexp:
+   * Union[Column, str], idx: int) Column Extract a specific group matched by a regex, from the
+   * specified string column. If the regex did not match, or the specified group did not match, an
+   * empty string is returned.
+   * Example:
+   *  <pre>{@code
+   * from snowflake.snowpark.functions import regexp_extract
+   * df = session.createDataFrame([["id_20_30", 10], ["id_40_50", 30]], ["id", "age"])
+   * df.select(regexp_extract("id", r"(\d+)", 1).alias("RES")).show()
+   *    ---------
+   *     |"RES"  |
+   *     ---------
+   *     |20     |
+   *     |40     |
+   *     ---------
+   * }</pr>
+   *
+   * @since 1.12.1
+   * @return Column object.
+   */
+  public static Column regexp_extract(
+      Column col, String exp, Integer position, Integer Occurences, Integer grpIdx) {
+    return new Column(
+        com.snowflake.snowpark.functions.regexp_extract(
+            col.toScalaColumn(), exp, position, Occurences, grpIdx));
+  }
+
+  /**
+   * Returns the sign of its argument:
+   *
+   * <p>- -1 if the argument is negative. - 1 if it is positive. - 0 if it is 0.
+   *
+   * <p>Args: col: The column to evaluate its sign
+   *  Example::
+   * * <pre>{@code df =
+   * session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) >>>
+   * df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"),
+   * sign("c").alias("c_sign")).show()
+   *   ----------------------------------
+   *     |"A_SIGN"  |"B_SIGN"  |"C_SIGN"  |
+   *     ----------------------------------
+   *     |-1        |1         |0         |
+   *     ----------------------------------
+   * }</pr>
+   *
+   * @since 1.12.1
+   * @param e Column to calculate the sign.
+   * @return Column object.
+   */
+  public static Column signum(Column col) {
+    return new Column(com.snowflake.snowpark.functions.signum(col.toScalaColumn()));
+  }
+
+  /**
+   * Returns the sign of its argument:
+   *
+   * <p>- -1 if the argument is negative. - 1 if it is positive. - 0 if it is 0.
+   *
+   * <p>Args: col: The column to evaluate its sign
+   * Example::
+   * <pre>{@code df =
+   * session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) >>>
+   * df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"),
+   * sign("c").alias("c_sign")).show()
+   *   ----------------------------------
+   *     |"A_SIGN"  |"B_SIGN"  |"C_SIGN"  |
+   *     ----------------------------------
+   *     |-1        |1         |0         |
+   *     ----------------------------------
+   * }</pr>
+   *
+   * @since 1.12.1
+   * @param e Column to calculate the sign.
+   * @return Column object.
+   */
+  public static Column sign(Column col) {
+    return new Column(com.snowflake.snowpark.functions.sign(col.toScalaColumn()));
+  }
+
+  /**
+   * Returns the substring from string str before count occurrences of the delimiter delim. If count
+   * is positive, everything the left of the final delimiter (counting from left) is returned. If
+   * count is negative, every to the right of the final delimiter (counting from the right) is
+   * returned. substring_index performs a case-sensitive match when searching for delim.
+   *
+   * @since 1.12.1
+   */
+  public static Column substring_index(Column col, String delim, Integer count) {
+    return new Column(
+        com.snowflake.snowpark.functions.substring_index(col.toScalaColumn(), delim, count));
+  }
+
+  /**
+   * Returns the input values, pivoted into an ARRAY. If the input is empty, an empty ARRAY is
+   * returned.
+   *
+   * <p>Example::
+   *
+   * <pre>{@code
+   * df = session.create_dataframe([[1], [2], [3], [1]], schema=["a"])
+   * df.select(array_agg("a", True).alias("result")).show()
+   * "RESULT" [ 1, 2, 3 ]
+   * }</pre>
+   *
+   * @since 1.10.0
+   * @param c Column to be collect.
+   * @return The array.
+   */
+  public static Column collect_list(Column col) {
+    return new Column(com.snowflake.snowpark.functions.collect_list(col.toScalaColumn()));
+  }
+
    * Returns a Column expression with values sorted in descending order.
    *
    * <p>Example: order column values in descending
@@ -4053,6 +4166,7 @@ public static Column last(Column col) {
     return new Column(functions.last(col.toScalaColumn()));
   }
 
+
   /**
    * Calls a user-defined function (UDF) by name.
    *
diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala
index 5c6f599f..a28be119 100644
--- a/src/main/scala/com/snowflake/snowpark/functions.scala
+++ b/src/main/scala/com/snowflake/snowpark/functions.scala
@@ -3142,6 +3142,192 @@ object functions {
   def listagg(col: Column): Column = listagg(col, "", isDistinct = false)
 
   /**
+
+   * Signature - snowflake.snowpark.functions.regexp_extract
+   * (value: Union[Column, str], regexp: Union[Column, str], idx: int)
+   *   Column
+   * Extract a specific group matched by a regex, from the specified string
+   * column. If the regex did not match, or the specified group did not match,
+   * an empty string is returned.
+   * <pr>Example:
+   * from snowflake.snowpark.functions import regexp_extract
+   * df = session.createDataFrame([["id_20_30", 10], ["id_40_50", 30]],
+   *  ["id", "age"])
+   * df.select(regexp_extract("id", r"(\d+)", 1).alias("RES")).show()
+   *</pr>
+   *<pr>
+   *     ---------
+   *     |"RES"  |
+   *     ---------
+   *     |20     |
+   *     |40     |
+   *     ---------
+   *</pr>
+   * Note: non-greedy tokens such as  are not supported
+   * @since 1.12.1
+   * @return Column object.
+   */
+  def regexp_extract(
+      colName: Column,
+      exp: String,
+      position: Int,
+      Occurences: Int,
+      grpIdx: Int): Column = {
+    when(colName.is_null, lit(null))
+      .otherwise(
+        coalesce(
+          builtin("REGEX_SUBSTR")(
+            colName,
+            lit(exp),
+            lit(position),
+            lit(Occurences),
+            lit("ce"),
+            lit(grpIdx)),
+          lit("")))
+  }
+
+  /**
+   *    Returns the sign of its argument:
+   *
+   *     - -1 if the argument is negative.
+   *     - 1 if it is positive.
+   *     - 0 if it is 0.
+   *
+   * Args:
+   *     col: The column to evaluate its sign
+   *<pr>
+   * Example::
+   *     >>> df = session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"])
+   *     >>> df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"),
+   * sign("c").alias("c_sign")).show()
+   *     ----------------------------------
+   *     |"A_SIGN"  |"B_SIGN"  |"C_SIGN"  |
+   *     ----------------------------------
+   *     |-1        |1         |0         |
+   *     ----------------------------------
+   * </pr>
+   * @since 1.12.1
+   * @param e Column to calculate the sign.
+   * @return Column object.
+   */
+  def sign(colName: Column): Column = {
+    builtin("SIGN")(colName)
+  }
+
+  /**
+   *    Returns the sign of its argument:
+   *
+   *     - -1 if the argument is negative.
+   *     - 1 if it is positive.
+   *     - 0 if it is 0.
+   *
+   * Args:
+   *     col: The column to evaluate its sign
+   *<pr>
+   * Example::
+   *     >>> df = session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"])
+   *     >>> df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"),
+   * sign("c").alias("c_sign")).show()
+   *     ----------------------------------
+   *     |"A_SIGN"  |"B_SIGN"  |"C_SIGN"  |
+   *     ----------------------------------
+   *     |-1        |1         |0         |
+   *     ----------------------------------
+   * </pr>
+   * @since 1.12.1
+   * @param e Column to calculate the sign.
+   * @return Column object.
+   */
+  def signum(colName: Column): Column = {
+    builtin("SIGN")(colName)
+  }
+
+  /**
+   * Returns the sign of the given column. Returns either 1 for positive,
+   *  0 for 0 or
+   * NaN, -1 for negative and null for null.
+   * NOTE: if string values are provided snowflake will attempts to cast.
+   *  If it casts correctly, returns the calculation,
+   *  if not an error will be thrown
+   * @since 1.12.1
+   * @param columnName Name of the column to calculate the sign.
+   * @return Column object.
+   */
+  def signum(columnName: String): Column = {
+    signum(col(columnName))
+  }
+
+  /**
+   * Returns the substring from string str before count occurrences
+   * of the delimiter delim. If count is positive,
+   * everything the left of the final delimiter (counting from left)
+   *  is returned. If count is negative, every to the right of the
+   * final delimiter (counting from the right) is returned.
+   * substring_index performs a case-sensitive match when searching for delim.
+   *   @since 1.12.1
+   */
+  def substring_index(str: Column, delim: String, count: Int): Column = {
+    when(
+      lit(count) < lit(0),
+      callBuiltin(
+        "substring",
+        lit(str),
+        callBuiltin("regexp_instr", sqlExpr(s"reverse(${str}, ${delim}, 1, abs(${count}), 0"))))
+      .otherwise(
+        callBuiltin(
+          "substring",
+          lit(str),
+          1,
+          callBuiltin("regexp_instr", col("str"), lit(delim), 1, lit(count), 1)))
+  }
+
+  /**
+   *
+   * Returns the input values, pivoted into an ARRAY. If the input is empty, an empty
+   * ARRAY is returned.
+   *<pr>
+   * Example::
+   *     >>> df = session.create_dataframe([[1], [2], [3], [1]], schema=["a"])
+   *     >>> df.select(array_agg("a", True).alias("result")).show()
+   *     ------------
+   *     |"RESULT"  |
+   *     ------------
+   *     |[         |
+   *     |  1,      |
+   *     |  2,      |
+   *     |  3       |
+   *     |]         |
+   *     ------------
+   * </pr>
+   * @since 1.10.0
+   * @param c Column to be collect.
+   * @return The array.
+   */
+  def collect_list(c: Column): Column = array_agg(c)
+
+  /**
+   *
+   * Returns the input values, pivoted into an ARRAY. If the input is empty, an empty
+   * ARRAY is returned.
+   *
+   * Example::
+   *     >>> df = session.create_dataframe([[1], [2], [3], [1]], schema=["a"])
+   *     >>> df.select(array_agg("a", True).alias("result")).show()
+   *     ------------
+   *     |"RESULT"  |
+   *     ------------
+   *     |[         |
+   *     |  1,      |
+   *     |  2,      |
+   *     |  3       |
+   *     |]         |
+   *     ------------
+   * @since 1.10.0
+   * @param s Column name to be collected.
+   * @return The array.
+   */
+  def collect_list(s: String): Column = array_agg(col(s))
+
    * Returns a Column expression with values sorted in descending order.
    * Example:
    * {{{
@@ -3312,6 +3498,7 @@ object functions {
   def last(c: Column): Column =
     builtin("LAST_VALUE")(c)
 
+
   /**
    * Invokes a built-in snowflake function with the specified name and arguments.
    * Arguments can be of two types
diff --git a/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java b/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
index 624ea481..954cb278 100644
--- a/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
+++ b/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
@@ -2766,6 +2766,54 @@ public void any_value() {
   }
 
   @Test
+
+  public void regexp_extract() {
+    DataFrame df = getSession().sql("select * from values('A MAN A PLAN A CANAL') as T(a)");
+    Row[] expected = {Row.create("MAN")};
+    checkAnswer(
+        df.select(Functions.regexp_extract(df.col("a"), "A\\W+(\\w+)", 1, 1, 1)), expected, false);
+    Row[] expected2 = {Row.create("PLAN")};
+    checkAnswer(
+        df.select(Functions.regexp_extract(df.col("a"), "A\\W+(\\w+)", 1, 2, 1)), expected2, false);
+    Row[] expected3 = {Row.create("CANAL")};
+    checkAnswer(
+        df.select(Functions.regexp_extract(df.col("a"), "A\\W+(\\w+)", 1, 2, 1)), expected3, false);
+    Row[] expected4 = {Row.create(null)};
+    checkAnswer(
+        df.select(Functions.regexp_extract(df.col("a"), "A\\W+(\\w+)", 1, 3, 1)), expected4, false);
+  }
+
+  @Test
+  public void signum() {
+    DataFrame df = getSession().sql("select * from values(1,-2,0) as T(a)");
+    checkAnswer(df.select(Functions.signum(df.col("a"))), new Row[] {Row.create(1, -1, 0)}, false);
+  }
+
+  @Test
+  public void sign() {
+    DataFrame df = getSession().sql("select * from values(1,-2,0) as T(a)");
+    checkAnswer(df.select(Functions.sign(df.col("a"))), new Row[] {Row.create(1, -1, 0)}, false);
+  }
+
+  @Test
+  public void collect_list() {
+    DataFrame df = getSession().sql("select * from values(10000,400,450) as T(a)");
+    checkAnswer(
+        df.select(Functions.collect_list(df.col("a"))),
+        new Row[] {Row.create("[\n  \"10000,400,450\"\n]")},
+        false);
+  }
+
+  @Test
+  public void substring_index() {
+    DataFrame df =
+        getSession()
+            .sql(
+                "select * from values ('It was the best of times,it was the worst of times') as T(a)");
+    checkAnswer(
+        df.select(Functions.substring_index(df.col("a"), "was", 1)),
+        new Row[] {Row.create(7)},
+
   public void test_asc() {
     DataFrame df = getSession().sql("select * from values(3),(1),(2) as t(a)");
     Row[] expected = {Row.create(1), Row.create(2), Row.create(3)};
@@ -2826,6 +2874,7 @@ public void last() {
             Functions.last(df.col("name"))
                 .over(Window.partitionBy(df.col("grade")).orderBy(df.col("score").desc()))),
         expected,
+
         false);
   }
 }
diff --git a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
index 8a89d87b..22f474ea 100644
--- a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
+++ b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
@@ -2177,7 +2177,51 @@ trait FunctionSuite extends TestData {
       expected,
       sort = false)
   }
+  test("regexp_extract") {
+    val data = Seq("A MAN A PLAN A CANAL").toDF("a")
+    var expected = Seq(Row("MAN"))
+    checkAnswer(
+      data.select(regexp_extract(col("a"), "A\\W+(\\w+)", 1, 1, 1)),
+      expected,
+      sort = false)
+    expected = Seq(Row("PLAN"))
+    checkAnswer(
+      data.select(regexp_extract(col("a"), "A\\W+(\\w+)", 1, 2, 1)),
+      expected,
+      sort = false)
+    expected = Seq(Row("CANAL"))
+    checkAnswer(
+      data.select(regexp_extract(col("a"), "A\\W+(\\w+)", 1, 3, 1)),
+      expected,
+      sort = false)
 
+
+    expected = Seq(Row(null))
+    checkAnswer(
+      data.select(regexp_extract(col("a"), "A\\W+(\\w+)", 1, 4, 1)),
+      expected,
+      sort = false)
+  }
+  test("signum") {
+    val df = Seq(1, -2, 0).toDF("a")
+    checkAnswer(df.select(signum(col("a"))), Seq(Row(1), Row(-1), Row(0)), sort = false)
+  }
+  test("sign") {
+    val df = Seq(1, -2, 0).toDF("a")
+    checkAnswer(df.select(sign(col("a"))), Seq(Row(1), Row(-1), Row(0)), sort = false)
+  }
+
+  test("collect_list") {
+    assert(monthlySales.select(collect_list(col("amount"))).collect()(0).get(0).toString ==
+      "[\n  10000,\n  400,\n  4500,\n  35000,\n  5000,\n  3000,\n  200,\n  90500,\n  6000,\n  " +
+        "5000,\n  2500,\n  9500,\n  8000,\n  10000,\n  800,\n  4500\n]")
+
+  }
+  test("substring_index") {
+    val df = Seq("It was the best of times, it was the worst of times").toDF("a")
+    checkAnswer(df.select(substring_index(col("a"), "was", 1)), Seq(Row(7)), sort = false)
+  }
+  
   test("desc column order") {
     val input = Seq(1, 2, 3).toDF("data")
     val expected = Seq(3, 2, 1).toDF("data")
@@ -2245,6 +2289,7 @@ trait FunctionSuite extends TestData {
       sort = false)
   }
 
+
 }
 
 class EagerFunctionSuite extends FunctionSuite with EagerSession

From cb7e041bb628366222a3bf99d7ac590f5fbb361e Mon Sep 17 00:00:00 2001
From: Shyamala Jayabalan <shyamala.jayabalan@snowflake.com>
Date: Thu, 15 Aug 2024 10:05:50 -0400
Subject: [PATCH 2/8] Reformatted

---
 src/main/java/com/snowflake/snowpark_java/Functions.java     | 5 ++---
 src/main/scala/com/snowflake/snowpark/functions.scala        | 3 +--
 .../java/com/snowflake/snowpark_test/JavaFunctionSuite.java  | 4 ++--
 .../scala/com/snowflake/snowpark_test/FunctionSuite.scala    | 4 +---
 4 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/main/java/com/snowflake/snowpark_java/Functions.java b/src/main/java/com/snowflake/snowpark_java/Functions.java
index 41c38135..4b3bdb1d 100644
--- a/src/main/java/com/snowflake/snowpark_java/Functions.java
+++ b/src/main/java/com/snowflake/snowpark_java/Functions.java
@@ -3882,7 +3882,7 @@ public static Column listagg(Column col) {
   }
 
   /**
-
+   *
    * Signature - snowflake.snowpark.functions.regexp_extract (value: Union[Column, str], regexp:
    * Union[Column, str], idx: int) Column Extract a specific group matched by a regex, from the
    * specified string column. If the regex did not match, or the specified group did not match, an
@@ -3995,7 +3995,7 @@ public static Column collect_list(Column col) {
     return new Column(com.snowflake.snowpark.functions.collect_list(col.toScalaColumn()));
   }
 
-   * Returns a Column expression with values sorted in descending order.
+  /* Returns a Column expression with values sorted in descending order.
    *
    * <p>Example: order column values in descending
    *
@@ -4166,7 +4166,6 @@ public static Column last(Column col) {
     return new Column(functions.last(col.toScalaColumn()));
   }
 
-
   /**
    * Calls a user-defined function (UDF) by name.
    *
diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala
index a28be119..588d8290 100644
--- a/src/main/scala/com/snowflake/snowpark/functions.scala
+++ b/src/main/scala/com/snowflake/snowpark/functions.scala
@@ -3328,7 +3328,7 @@ object functions {
    */
   def collect_list(s: String): Column = array_agg(col(s))
 
-   * Returns a Column expression with values sorted in descending order.
+  /* Returns a Column expression with values sorted in descending order.
    * Example:
    * {{{
    *   val df = session.createDataFrame(Seq(1, 2, 3)).toDF("id")
@@ -3498,7 +3498,6 @@ object functions {
   def last(c: Column): Column =
     builtin("LAST_VALUE")(c)
 
-
   /**
    * Invokes a built-in snowflake function with the specified name and arguments.
    * Arguments can be of two types
diff --git a/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java b/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
index 954cb278..f70ccdea 100644
--- a/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
+++ b/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
@@ -2766,7 +2766,6 @@ public void any_value() {
   }
 
   @Test
-
   public void regexp_extract() {
     DataFrame df = getSession().sql("select * from values('A MAN A PLAN A CANAL') as T(a)");
     Row[] expected = {Row.create("MAN")};
@@ -2813,6 +2812,8 @@ public void substring_index() {
     checkAnswer(
         df.select(Functions.substring_index(df.col("a"), "was", 1)),
         new Row[] {Row.create(7)},
+        false);
+  }
 
   public void test_asc() {
     DataFrame df = getSession().sql("select * from values(3),(1),(2) as t(a)");
@@ -2874,7 +2875,6 @@ public void last() {
             Functions.last(df.col("name"))
                 .over(Window.partitionBy(df.col("grade")).orderBy(df.col("score").desc()))),
         expected,
-
         false);
   }
 }
diff --git a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
index 22f474ea..1420bb10 100644
--- a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
+++ b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
@@ -2195,7 +2195,6 @@ trait FunctionSuite extends TestData {
       expected,
       sort = false)
 
-
     expected = Seq(Row(null))
     checkAnswer(
       data.select(regexp_extract(col("a"), "A\\W+(\\w+)", 1, 4, 1)),
@@ -2221,7 +2220,7 @@ trait FunctionSuite extends TestData {
     val df = Seq("It was the best of times, it was the worst of times").toDF("a")
     checkAnswer(df.select(substring_index(col("a"), "was", 1)), Seq(Row(7)), sort = false)
   }
-  
+
   test("desc column order") {
     val input = Seq(1, 2, 3).toDF("data")
     val expected = Seq(3, 2, 1).toDF("data")
@@ -2289,7 +2288,6 @@ trait FunctionSuite extends TestData {
       sort = false)
   }
 
-
 }
 
 class EagerFunctionSuite extends FunctionSuite with EagerSession

From 23643c38afe26e60aad0569c854a0b4fbea7c335 Mon Sep 17 00:00:00 2001
From: Shyamala Jayabalan <shyamala.jayabalan@snowflake.com>
Date: Thu, 15 Aug 2024 19:09:45 -0400
Subject: [PATCH 3/8] Modified version

---
 .../com/snowflake/snowpark_java/Functions.java     | 10 +++++-----
 .../scala/com/snowflake/snowpark/functions.scala   | 14 +++++++-------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/main/java/com/snowflake/snowpark_java/Functions.java b/src/main/java/com/snowflake/snowpark_java/Functions.java
index 4b3bdb1d..35864a11 100644
--- a/src/main/java/com/snowflake/snowpark_java/Functions.java
+++ b/src/main/java/com/snowflake/snowpark_java/Functions.java
@@ -3900,7 +3900,7 @@ public static Column listagg(Column col) {
    *     ---------
    * }</pr>
    *
-   * @since 1.12.1
+   * @since 1.14.0
    * @return Column object.
    */
   public static Column regexp_extract(
@@ -3928,7 +3928,7 @@ public static Column regexp_extract(
    *     ----------------------------------
    * }</pr>
    *
-   * @since 1.12.1
+   * @since 1.14.0
    * @param e Column to calculate the sign.
    * @return Column object.
    */
@@ -3954,7 +3954,7 @@ public static Column signum(Column col) {
    *     ----------------------------------
    * }</pr>
    *
-   * @since 1.12.1
+   * @since 1.14.0
    * @param e Column to calculate the sign.
    * @return Column object.
    */
@@ -3968,7 +3968,7 @@ public static Column sign(Column col) {
    * count is negative, every to the right of the final delimiter (counting from the right) is
    * returned. substring_index performs a case-sensitive match when searching for delim.
    *
-   * @since 1.12.1
+   * @since 1.14.0
    */
   public static Column substring_index(Column col, String delim, Integer count) {
     return new Column(
@@ -3987,7 +3987,7 @@ public static Column substring_index(Column col, String delim, Integer count) {
    * "RESULT" [ 1, 2, 3 ]
    * }</pre>
    *
-   * @since 1.10.0
+   * @since 1.14.0
    * @param c Column to be collect.
    * @return The array.
    */
diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala
index 588d8290..d76f52a0 100644
--- a/src/main/scala/com/snowflake/snowpark/functions.scala
+++ b/src/main/scala/com/snowflake/snowpark/functions.scala
@@ -3164,7 +3164,7 @@ object functions {
    *     ---------
    *</pr>
    * Note: non-greedy tokens such as  are not supported
-   * @since 1.12.1
+   * @since 1.14.0
    * @return Column object.
    */
   def regexp_extract(
@@ -3206,7 +3206,7 @@ object functions {
    *     |-1        |1         |0         |
    *     ----------------------------------
    * </pr>
-   * @since 1.12.1
+   * @since 1.14.0
    * @param e Column to calculate the sign.
    * @return Column object.
    */
@@ -3234,7 +3234,7 @@ object functions {
    *     |-1        |1         |0         |
    *     ----------------------------------
    * </pr>
-   * @since 1.12.1
+   * @since 1.14.0
    * @param e Column to calculate the sign.
    * @return Column object.
    */
@@ -3249,7 +3249,7 @@ object functions {
    * NOTE: if string values are provided snowflake will attempts to cast.
    *  If it casts correctly, returns the calculation,
    *  if not an error will be thrown
-   * @since 1.12.1
+   * @since 1.14.0
    * @param columnName Name of the column to calculate the sign.
    * @return Column object.
    */
@@ -3264,7 +3264,7 @@ object functions {
    *  is returned. If count is negative, every to the right of the
    * final delimiter (counting from the right) is returned.
    * substring_index performs a case-sensitive match when searching for delim.
-   *   @since 1.12.1
+   *   @since 1.14.0
    */
   def substring_index(str: Column, delim: String, count: Int): Column = {
     when(
@@ -3299,7 +3299,7 @@ object functions {
    *     |]         |
    *     ------------
    * </pr>
-   * @since 1.10.0
+   * @since 1.14.0
    * @param c Column to be collect.
    * @return The array.
    */
@@ -3322,7 +3322,7 @@ object functions {
    *     |  3       |
    *     |]         |
    *     ------------
-   * @since 1.10.0
+   * @since 1.14.0
    * @param s Column name to be collected.
    * @return The array.
    */

From 5d4d8cef21a64d75a9bf47eefd2ea7522021e9b8 Mon Sep 17 00:00:00 2001
From: Shyamala Jayabalan <shyamala.jayabalan@snowflake.com>
Date: Fri, 16 Aug 2024 10:59:03 -0400
Subject: [PATCH 4/8] added comment

---
 src/main/scala/com/snowflake/snowpark/functions.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala
index d76f52a0..4eeeec07 100644
--- a/src/main/scala/com/snowflake/snowpark/functions.scala
+++ b/src/main/scala/com/snowflake/snowpark/functions.scala
@@ -3187,7 +3187,7 @@ object functions {
   }
 
   /**
-   *    Returns the sign of its argument:
+   *    Returns the sign of its argument as mentioned:
    *
    *     - -1 if the argument is negative.
    *     - 1 if it is positive.

From 3966d5ae616096d28bb79348600d82301940d01e Mon Sep 17 00:00:00 2001
From: Shyamala Jayabalan <shyamala.jayabalan@snowflake.com>
Date: Fri, 16 Aug 2024 11:30:07 -0400
Subject: [PATCH 5/8] modified description

---
 src/main/scala/com/snowflake/snowpark/functions.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala
index 4eeeec07..04bb13b4 100644
--- a/src/main/scala/com/snowflake/snowpark/functions.scala
+++ b/src/main/scala/com/snowflake/snowpark/functions.scala
@@ -3187,7 +3187,7 @@ object functions {
   }
 
   /**
-   *    Returns the sign of its argument as mentioned:
+   *    Returns the sign of its argument :
    *
    *     - -1 if the argument is negative.
    *     - 1 if it is positive.

From 388555d03a7f0e5108d744a2065c1552cf77f53e Mon Sep 17 00:00:00 2001
From: "shyamala.jayabalan" <shyamala.jayabalan@snowflake.com>
Date: Fri, 16 Aug 2024 14:59:48 -0400
Subject: [PATCH 6/8] modified description

---
 src/main/scala/com/snowflake/snowpark/functions.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala
index 04bb13b4..e9353365 100644
--- a/src/main/scala/com/snowflake/snowpark/functions.scala
+++ b/src/main/scala/com/snowflake/snowpark/functions.scala
@@ -3187,7 +3187,7 @@ object functions {
   }
 
   /**
-   *    Returns the sign of its argument :
+   *    Returns the sign of its argument as mentioned :
    *
    *     - -1 if the argument is negative.
    *     - 1 if it is positive.

From fcd50b68c3335af51a9f9c0a57bcadc5d74b9089 Mon Sep 17 00:00:00 2001
From: "shyamala.jayabalan" <shyamala.jayabalan@snowflake.com>
Date: Mon, 19 Aug 2024 17:03:16 -0400
Subject: [PATCH 7/8] Modified comment section and changed regexp in
 substring_index

---
 .../snowflake/snowpark_java/Functions.java    | 45 +++++++++++--------
 .../com/snowflake/snowpark/functions.scala    | 14 ++++--
 .../snowpark_test/JavaFunctionSuite.java      |  4 +-
 .../snowpark_test/FunctionSuite.scala         |  6 ++-
 4 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/src/main/java/com/snowflake/snowpark_java/Functions.java b/src/main/java/com/snowflake/snowpark_java/Functions.java
index d94dc81a..ead78cb4 100644
--- a/src/main/java/com/snowflake/snowpark_java/Functions.java
+++ b/src/main/java/com/snowflake/snowpark_java/Functions.java
@@ -3882,13 +3882,12 @@ public static Column listagg(Column col) {
   }
 
   /**
-   *
    * Signature - snowflake.snowpark.functions.regexp_extract (value: Union[Column, str], regexp:
    * Union[Column, str], idx: int) Column Extract a specific group matched by a regex, from the
    * specified string column. If the regex did not match, or the specified group did not match, an
-   * empty string is returned.
-   * Example:
-   *  <pre>{@code
+   * empty string is returned. Example:
+   *
+   * <pre>{@code
    * from snowflake.snowpark.functions import regexp_extract
    * df = session.createDataFrame([["id_20_30", 10], ["id_40_50", 30]], ["id", "age"])
    * df.select(regexp_extract("id", r"(\d+)", 1).alias("RES")).show()
@@ -3898,9 +3897,14 @@ public static Column listagg(Column col) {
    *     |20     |
    *     |40     |
    *     ---------
-   * }</pr>
+   * }</pre>
    *
    * @since 1.14.0
+   * @param col Column.
+   * @param exp String
+   * @param position Integer.
+   * @param Occurences Integer.
+   * @param grpIdx Integer.
    * @return Column object.
    */
   public static Column regexp_extract(
@@ -3915,9 +3919,9 @@ public static Column regexp_extract(
    *
    * <p>- -1 if the argument is negative. - 1 if it is positive. - 0 if it is 0.
    *
-   * <p>Args: col: The column to evaluate its sign
-   *  Example::
-   * * <pre>{@code df =
+   * <p>Args: col: The column to evaluate its sign Example:: *
+   *
+   * <pre>{@code df =
    * session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) >>>
    * df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"),
    * sign("c").alias("c_sign")).show()
@@ -3926,10 +3930,10 @@ public static Column regexp_extract(
    *     ----------------------------------
    *     |-1        |1         |0         |
    *     ----------------------------------
-   * }</pr>
+   * }</pre>
    *
    * @since 1.14.0
-   * @param e Column to calculate the sign.
+   * @param col Column to calculate the sign.
    * @return Column object.
    */
   public static Column signum(Column col) {
@@ -3941,8 +3945,8 @@ public static Column signum(Column col) {
    *
    * <p>- -1 if the argument is negative. - 1 if it is positive. - 0 if it is 0.
    *
-   * <p>Args: col: The column to evaluate its sign
-   * Example::
+   * <p>Args: col: The column to evaluate its sign Example::
+   *
    * <pre>{@code df =
    * session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) >>>
    * df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"),
@@ -3952,10 +3956,10 @@ public static Column signum(Column col) {
    *     ----------------------------------
    *     |-1        |1         |0         |
    *     ----------------------------------
-   * }</pr>
+   * }</pre>
    *
    * @since 1.14.0
-   * @param e Column to calculate the sign.
+   * @param col Column to calculate the sign.
    * @return Column object.
    */
   public static Column sign(Column col) {
@@ -3968,11 +3972,14 @@ public static Column sign(Column col) {
    * count is negative, every to the right of the final delimiter (counting from the right) is
    * returned. substring_index performs a case-sensitive match when searching for delim.
    *
+   * @param col String.
+   * @param delim String
+   * @param count Integer.
+   * @return Column object.
    * @since 1.14.0
    */
-  public static Column substring_index(Column col, String delim, Integer count) {
-    return new Column(
-        com.snowflake.snowpark.functions.substring_index(col.toScalaColumn(), delim, count));
+  public static Column substring_index(String col, String delim, Integer count) {
+    return new Column(com.snowflake.snowpark.functions.substring_index(col, delim, count));
   }
 
   /**
@@ -3991,8 +3998,8 @@ public static Column substring_index(Column col, String delim, Integer count) {
    * @param c Column to be collect.
    * @return The array.
    */
-  public static Column collect_list(Column col) {
-    return new Column(com.snowflake.snowpark.functions.collect_list(col.toScalaColumn()));
+  public static Column collect_list(Column c) {
+    return new Column(com.snowflake.snowpark.functions.collect_list(c.toScalaColumn()));
   }
 
   /* Returns a Column expression with values sorted in descending order.
diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala
index 241b8583..0abd3008 100644
--- a/src/main/scala/com/snowflake/snowpark/functions.scala
+++ b/src/main/scala/com/snowflake/snowpark/functions.scala
@@ -3176,7 +3176,7 @@ object functions {
     when(colName.is_null, lit(null))
       .otherwise(
         coalesce(
-          builtin("REGEX_SUBSTR")(
+          builtin("REGEXP_SUBSTR")(
             colName,
             lit(exp),
             lit(position),
@@ -3266,19 +3266,25 @@ object functions {
    * substring_index performs a case-sensitive match when searching for delim.
    *   @since 1.14.0
    */
-  def substring_index(str: Column, delim: String, count: Int): Column = {
+  def substring_index(str: String, delim: String, count: Int): Column = {
     when(
       lit(count) < lit(0),
       callBuiltin(
         "substring",
         lit(str),
-        callBuiltin("regexp_instr", sqlExpr(s"reverse(${str}, ${delim}, 1, abs(${count}), 0"))))
+        callBuiltin(
+          "regexp_instr",
+          sqlExpr(s"reverse(${str})"),
+          lit(delim),
+          1,
+          abs(lit(count)),
+          lit(0))))
       .otherwise(
         callBuiltin(
           "substring",
           lit(str),
           1,
-          callBuiltin("regexp_instr", col("str"), lit(delim), 1, lit(count), 1)))
+          callBuiltin("regexp_instr", lit(str), lit(delim), 1, lit(count), 1)))
   }
 
   /**
diff --git a/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java b/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
index e2ebf707..5f8346a4 100644
--- a/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
+++ b/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
@@ -2810,9 +2810,7 @@ public void substring_index() {
             .sql(
                 "select * from values ('It was the best of times,it was the worst of times') as T(a)");
     checkAnswer(
-        df.select(Functions.substring_index(df.col("a"), "was", 1)),
-        new Row[] {Row.create(7)},
-        false);
+        df.select(Functions.substring_index("a", "was", 1)), new Row[] {Row.create(7)}, false);
   }
 
   public void test_asc() {
diff --git a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
index 873e8103..f4e0a21a 100644
--- a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
+++ b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
@@ -1090,6 +1090,7 @@ trait FunctionSuite extends TestData {
         .collect()(0)
         .getTimestamp(0)
         .toString == "2020-10-28 13:35:47.001234567")
+
   }
 
   test("timestamp_ltz_from_parts") {
@@ -2218,7 +2219,10 @@ trait FunctionSuite extends TestData {
   }
   test("substring_index") {
     val df = Seq("It was the best of times, it was the worst of times").toDF("a")
-    checkAnswer(df.select(substring_index(col("a"), "was", 1)), Seq(Row(7)), sort = false)
+    checkAnswer(
+      df.select(substring_index("It was the best of times, it was the worst of times", "was", 1)),
+      Seq(Row(7)),
+      sort = false)
   }
 
   test("desc column order") {

From 0cde3b55e43b246c3df03179a1e01527176134da Mon Sep 17 00:00:00 2001
From: "shyamala.jayabalan" <shyamala.jayabalan@snowflake.com>
Date: Tue, 20 Aug 2024 09:41:31 -0400
Subject: [PATCH 8/8] Modified test cases

---
 .../com/snowflake/snowpark/functions.scala    |  2 +-
 .../snowpark_test/JavaFunctionSuite.java      | 34 +++++++++++--------
 .../snowpark_test/FunctionSuite.scala         | 29 +++++++---------
 3 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala
index 0abd3008..48bbadc6 100644
--- a/src/main/scala/com/snowflake/snowpark/functions.scala
+++ b/src/main/scala/com/snowflake/snowpark/functions.scala
@@ -3274,7 +3274,7 @@ object functions {
         lit(str),
         callBuiltin(
           "regexp_instr",
-          sqlExpr(s"reverse(${str})"),
+          sqlExpr(s"reverse('${str}')"),
           lit(delim),
           1,
           abs(lit(count)),
diff --git a/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java b/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
index 5f8346a4..00cdbd2b 100644
--- a/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
+++ b/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
@@ -2776,31 +2776,33 @@ public void regexp_extract() {
         df.select(Functions.regexp_extract(df.col("a"), "A\\W+(\\w+)", 1, 2, 1)), expected2, false);
     Row[] expected3 = {Row.create("CANAL")};
     checkAnswer(
-        df.select(Functions.regexp_extract(df.col("a"), "A\\W+(\\w+)", 1, 2, 1)), expected3, false);
-    Row[] expected4 = {Row.create(null)};
-    checkAnswer(
-        df.select(Functions.regexp_extract(df.col("a"), "A\\W+(\\w+)", 1, 3, 1)), expected4, false);
+        df.select(Functions.regexp_extract(df.col("a"), "A\\W+(\\w+)", 1, 3, 1)), expected3, false);
   }
 
   @Test
   public void signum() {
-    DataFrame df = getSession().sql("select * from values(1,-2,0) as T(a)");
-    checkAnswer(df.select(Functions.signum(df.col("a"))), new Row[] {Row.create(1, -1, 0)}, false);
+    DataFrame df = getSession().sql("select * from values(1) as T(a)");
+    checkAnswer(df.select(Functions.signum(df.col("a"))), new Row[] {Row.create(1)}, false);
+    DataFrame df1 = getSession().sql("select * from values(-2) as T(a)");
+    checkAnswer(df1.select(Functions.signum(df1.col("a"))), new Row[] {Row.create(-1)}, false);
+    DataFrame df2 = getSession().sql("select * from values(0) as T(a)");
+    checkAnswer(df2.select(Functions.signum(df2.col("a"))), new Row[] {Row.create(0)}, false);
   }
 
   @Test
   public void sign() {
-    DataFrame df = getSession().sql("select * from values(1,-2,0) as T(a)");
-    checkAnswer(df.select(Functions.sign(df.col("a"))), new Row[] {Row.create(1, -1, 0)}, false);
+    DataFrame df = getSession().sql("select * from values(1) as T(a)");
+    checkAnswer(df.select(Functions.signum(df.col("a"))), new Row[] {Row.create(1)}, false);
+    DataFrame df1 = getSession().sql("select * from values(-2) as T(a)");
+    checkAnswer(df1.select(Functions.signum(df1.col("a"))), new Row[] {Row.create(-1)}, false);
+    DataFrame df2 = getSession().sql("select * from values(0) as T(a)");
+    checkAnswer(df2.select(Functions.signum(df2.col("a"))), new Row[] {Row.create(0)}, false);
   }
 
   @Test
   public void collect_list() {
-    DataFrame df = getSession().sql("select * from values(10000,400,450) as T(a)");
-    checkAnswer(
-        df.select(Functions.collect_list(df.col("a"))),
-        new Row[] {Row.create("[\n  \"10000,400,450\"\n]")},
-        false);
+    DataFrame df = getSession().sql("select * from values(1), (2), (3) as T(a)");
+    df.select(Functions.collect_list(df.col("a"))).show();
   }
 
   @Test
@@ -2810,7 +2812,11 @@ public void substring_index() {
             .sql(
                 "select * from values ('It was the best of times,it was the worst of times') as T(a)");
     checkAnswer(
-        df.select(Functions.substring_index("a", "was", 1)), new Row[] {Row.create(7)}, false);
+        df.select(
+            Functions.substring_index(
+                "It was the best of times,it was the worst of times", "was", 1)),
+        new Row[] {Row.create("It was ")},
+        false);
   }
 
   public void test_asc() {
diff --git a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
index f4e0a21a..9658006e 100644
--- a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
+++ b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
@@ -2196,32 +2196,29 @@ trait FunctionSuite extends TestData {
       expected,
       sort = false)
 
-    expected = Seq(Row(null))
-    checkAnswer(
-      data.select(regexp_extract(col("a"), "A\\W+(\\w+)", 1, 4, 1)),
-      expected,
-      sort = false)
   }
   test("signum") {
-    val df = Seq(1, -2, 0).toDF("a")
-    checkAnswer(df.select(signum(col("a"))), Seq(Row(1), Row(-1), Row(0)), sort = false)
+    val df = Seq(1).toDF("a")
+    checkAnswer(df.select(sign(col("a"))), Seq(Row(1)), sort = false)
+    val df1 = Seq(-2).toDF("a")
+    checkAnswer(df1.select(sign(col("a"))), Seq(Row(-1)), sort = false)
+    val df2 = Seq(0).toDF("a")
+    checkAnswer(df2.select(sign(col("a"))), Seq(Row(0)), sort = false)
   }
   test("sign") {
-    val df = Seq(1, -2, 0).toDF("a")
-    checkAnswer(df.select(sign(col("a"))), Seq(Row(1), Row(-1), Row(0)), sort = false)
+    val df = Seq(1).toDF("a")
+    checkAnswer(df.select(sign(col("a"))), Seq(Row(1)), sort = false)
+    val df1 = Seq(-2).toDF("a")
+    checkAnswer(df1.select(sign(col("a"))), Seq(Row(-1)), sort = false)
+    val df2 = Seq(0).toDF("a")
+    checkAnswer(df2.select(sign(col("a"))), Seq(Row(0)), sort = false)
   }
 
-  test("collect_list") {
-    assert(monthlySales.select(collect_list(col("amount"))).collect()(0).get(0).toString ==
-      "[\n  10000,\n  400,\n  4500,\n  35000,\n  5000,\n  3000,\n  200,\n  90500,\n  6000,\n  " +
-        "5000,\n  2500,\n  9500,\n  8000,\n  10000,\n  800,\n  4500\n]")
-
-  }
   test("substring_index") {
     val df = Seq("It was the best of times, it was the worst of times").toDF("a")
     checkAnswer(
       df.select(substring_index("It was the best of times, it was the worst of times", "was", 1)),
-      Seq(Row(7)),
+      Seq(Row("It was ")),
       sort = false)
   }