From fcd50b68c3335af51a9f9c0a57bcadc5d74b9089 Mon Sep 17 00:00:00 2001
From: "shyamala.jayabalan" <shyamala.jayabalan@snowflake.com>
Date: Mon, 19 Aug 2024 17:03:16 -0400
Subject: [PATCH] Modified comment section and changed regexp in
 substring_index

---
 .../snowflake/snowpark_java/Functions.java    | 45 +++++++++++--------
 .../com/snowflake/snowpark/functions.scala    | 14 ++++--
 .../snowpark_test/JavaFunctionSuite.java      |  4 +-
 .../snowpark_test/FunctionSuite.scala         |  6 ++-
 4 files changed, 42 insertions(+), 27 deletions(-)
diff --git a/src/main/java/com/snowflake/snowpark_java/Functions.java b/src/main/java/com/snowflake/snowpark_java/Functions.java
index d94dc81a..ead78cb4 100644
--- a/src/main/java/com/snowflake/snowpark_java/Functions.java
+++ b/src/main/java/com/snowflake/snowpark_java/Functions.java
@@ -3882,13 +3882,12 @@ public static Column listagg(Column col) {
   }
 
   /**
-   *
    * Signature - snowflake.snowpark.functions.regexp_extract (value: Union[Column, str], regexp:
    * Union[Column, str], idx: int) Column Extract a specific group matched by a regex, from the
    * specified string column. If the regex did not match, or the specified group did not match, an
-   * empty string is returned.
-   * Example:
-   *  <pre>{@code
+   * empty string is returned. Example:
+   *
+   * <pre>{@code
    * from snowflake.snowpark.functions import regexp_extract
    * df = session.createDataFrame([["id_20_30", 10], ["id_40_50", 30]], ["id", "age"])
    * df.select(regexp_extract("id", r"(\d+)", 1).alias("RES")).show()
@@ -3898,9 +3897,14 @@ public static Column listagg(Column col) {
    *     |20     |
    *     |40     |
    *     ---------
-   * }</pr>
+   * }</pre>
    *
    * @since 1.14.0
+   * @param col Column.
+   * @param exp String
+   * @param position Integer.
+   * @param Occurences Integer.
+   * @param grpIdx Integer.
    * @return Column object.
    */
   public static Column regexp_extract(
@@ -3915,9 +3919,9 @@ public static Column regexp_extract(
    *
    * <p>- -1 if the argument is negative. - 1 if it is positive. - 0 if it is 0.
    *
-   * <p>Args: col: The column to evaluate its sign
-   *  Example::
-   * * <pre>{@code df =
+   * <p>Args: col: The column to evaluate its sign Example:: *
+   *
+   * <pre>{@code df =
    * session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) >>>
    * df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"),
    * sign("c").alias("c_sign")).show()
@@ -3926,10 +3930,10 @@ public static Column regexp_extract(
    *     ----------------------------------
    *     |-1        |1         |0         |
    *     ----------------------------------
-   * }</pr>
+   * }</pre>
    *
    * @since 1.14.0
-   * @param e Column to calculate the sign.
+   * @param col Column to calculate the sign.
    * @return Column object.
    */
   public static Column signum(Column col) {
@@ -3941,8 +3945,8 @@ public static Column signum(Column col) {
    *
    * <p>- -1 if the argument is negative. - 1 if it is positive. - 0 if it is 0.
    *
-   * <p>Args: col: The column to evaluate its sign
-   * Example::
+   * <p>Args: col: The column to evaluate its sign Example::
+   *
    * <pre>{@code df =
    * session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) >>>
    * df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"),
@@ -3952,10 +3956,10 @@ public static Column signum(Column col) {
    *     ----------------------------------
    *     |-1        |1         |0         |
    *     ----------------------------------
-   * }</pr>
+   * }</pre>
    *
    * @since 1.14.0
-   * @param e Column to calculate the sign.
+   * @param col Column to calculate the sign.
    * @return Column object.
    */
   public static Column sign(Column col) {
@@ -3968,11 +3972,14 @@ public static Column sign(Column col) {
    * count is negative, every to the right of the final delimiter (counting from the right) is
    * returned. substring_index performs a case-sensitive match when searching for delim.
    *
+   * @param col String.
+   * @param delim String
+   * @param count Integer.
+   * @return Column object.
    * @since 1.14.0
    */
-  public static Column substring_index(Column col, String delim, Integer count) {
-    return new Column(
-        com.snowflake.snowpark.functions.substring_index(col.toScalaColumn(), delim, count));
+  public static Column substring_index(String col, String delim, Integer count) {
+    return new Column(com.snowflake.snowpark.functions.substring_index(col, delim, count));
   }
 
   /**
@@ -3991,8 +3998,8 @@ public static Column substring_index(Column col, String delim, Integer count) {
    * @param c Column to be collect.
    * @return The array.
    */
-  public static Column collect_list(Column col) {
-    return new Column(com.snowflake.snowpark.functions.collect_list(col.toScalaColumn()));
+  public static Column collect_list(Column c) {
+    return new Column(com.snowflake.snowpark.functions.collect_list(c.toScalaColumn()));
   }
 
   /* Returns a Column expression with values sorted in descending order.
diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala
index 241b8583..0abd3008 100644
--- a/src/main/scala/com/snowflake/snowpark/functions.scala
+++ b/src/main/scala/com/snowflake/snowpark/functions.scala
@@ -3176,7 +3176,7 @@ object functions {
     when(colName.is_null, lit(null))
       .otherwise(
         coalesce(
-          builtin("REGEX_SUBSTR")(
+          builtin("REGEXP_SUBSTR")(
             colName,
             lit(exp),
             lit(position),
@@ -3266,19 +3266,25 @@ object functions {
    * substring_index performs a case-sensitive match when searching for delim.
    *   @since 1.14.0
    */
-  def substring_index(str: Column, delim: String, count: Int): Column = {
+  def substring_index(str: String, delim: String, count: Int): Column = {
     when(
       lit(count) < lit(0),
       callBuiltin(
         "substring",
         lit(str),
-        callBuiltin("regexp_instr", sqlExpr(s"reverse(${str}, ${delim}, 1, abs(${count}), 0"))))
+        callBuiltin(
+          "regexp_instr",
+          sqlExpr(s"reverse(${str})"),
+          lit(delim),
+          1,
+          abs(lit(count)),
+          lit(0))))
       .otherwise(
         callBuiltin(
           "substring",
           lit(str),
           1,
-          callBuiltin("regexp_instr", col("str"), lit(delim), 1, lit(count), 1)))
+          callBuiltin("regexp_instr", lit(str), lit(delim), 1, lit(count), 1)))
   }
 
   /**
diff --git a/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java b/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
index e2ebf707..5f8346a4 100644
--- a/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
+++ b/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java
@@ -2810,9 +2810,7 @@ public void substring_index() {
             .sql(
                 "select * from values ('It was the best of times,it was the worst of times') as T(a)");
     checkAnswer(
-        df.select(Functions.substring_index(df.col("a"), "was", 1)),
-        new Row[] {Row.create(7)},
-        false);
+        df.select(Functions.substring_index("a", "was", 1)), new Row[] {Row.create(7)}, false);
   }
 
   public void test_asc() {
diff --git a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
index 873e8103..f4e0a21a 100644
--- a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
+++ b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala
@@ -1090,6 +1090,7 @@ trait FunctionSuite extends TestData {
         .collect()(0)
         .getTimestamp(0)
         .toString == "2020-10-28 13:35:47.001234567")
+
   }
 
   test("timestamp_ltz_from_parts") {
@@ -2218,7 +2219,10 @@ trait FunctionSuite extends TestData {
   }
   test("substring_index") {
     val df = Seq("It was the best of times, it was the worst of times").toDF("a")
-    checkAnswer(df.select(substring_index(col("a"), "was", 1)), Seq(Row(7)), sort = false)
+    checkAnswer(
+      df.select(substring_index("It was the best of times, it was the worst of times", "was", 1)),
+      Seq(Row(7)),
+      sort = false)
   }
 
   test("desc column order") {