Skip to content

Commit

Permalink
Modified comment section and changed regexp in substring_index
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-sjayabalan committed Aug 19, 2024
1 parent 55dd239 commit fcd50b6
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 27 deletions.
45 changes: 26 additions & 19 deletions src/main/java/com/snowflake/snowpark_java/Functions.java
Original file line number Diff line number Diff line change
Expand Up @@ -3882,13 +3882,12 @@ public static Column listagg(Column col) {
}

/**
*
* Signature - snowflake.snowpark.functions.regexp_extract (value: Union[Column, str], regexp:
* Union[Column, str], idx: int) Column Extract a specific group matched by a regex, from the
* specified string column. If the regex did not match, or the specified group did not match, an
* empty string is returned.
* Example:
* <pre>{@code
* empty string is returned. Example:
*
* <pre>{@code
* from snowflake.snowpark.functions import regexp_extract
* df = session.createDataFrame([["id_20_30", 10], ["id_40_50", 30]], ["id", "age"])
* df.select(regexp_extract("id", r"(\d+)", 1).alias("RES")).show()
Expand All @@ -3898,9 +3897,14 @@ public static Column listagg(Column col) {
* |20 |
* |40 |
* ---------
* }</pr>
* }</pre>
*
* @since 1.14.0
* @param col Column.
* @param exp String
* @param position Integer.
* @param Occurences Integer.
* @param grpIdx Integer.
* @return Column object.
*/
public static Column regexp_extract(
Expand All @@ -3915,9 +3919,9 @@ public static Column regexp_extract(
*
* <p>- -1 if the argument is negative. - 1 if it is positive. - 0 if it is 0.
*
* <p>Args: col: The column to evaluate its sign
* Example::
* * <pre>{@code df =
* <p>Args: col: The column to evaluate its sign Example:: *
*
* <pre>{@code df =
* session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) >>>
* df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"),
* sign("c").alias("c_sign")).show()
Expand All @@ -3926,10 +3930,10 @@ public static Column regexp_extract(
* ----------------------------------
* |-1 |1 |0 |
* ----------------------------------
* }</pr>
* }</pre>
*
* @since 1.14.0
* @param e Column to calculate the sign.
* @param col Column to calculate the sign.
* @return Column object.
*/
public static Column signum(Column col) {
Expand All @@ -3941,8 +3945,8 @@ public static Column signum(Column col) {
*
* <p>- -1 if the argument is negative. - 1 if it is positive. - 0 if it is 0.
*
* <p>Args: col: The column to evaluate its sign
* Example::
* <p>Args: col: The column to evaluate its sign Example::
*
* <pre>{@code df =
* session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) >>>
* df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"),
Expand All @@ -3952,10 +3956,10 @@ public static Column signum(Column col) {
* ----------------------------------
* |-1 |1 |0 |
* ----------------------------------
* }</pr>
* }</pre>
*
* @since 1.14.0
* @param e Column to calculate the sign.
* @param col Column to calculate the sign.
* @return Column object.
*/
public static Column sign(Column col) {
Expand All @@ -3968,11 +3972,14 @@ public static Column sign(Column col) {
* count is negative, every to the right of the final delimiter (counting from the right) is
* returned. substring_index performs a case-sensitive match when searching for delim.
*
* @param col String.
* @param delim String
* @param count Integer.
* @return Column object.
* @since 1.14.0
*/
public static Column substring_index(Column col, String delim, Integer count) {
return new Column(
com.snowflake.snowpark.functions.substring_index(col.toScalaColumn(), delim, count));
public static Column substring_index(String col, String delim, Integer count) {
return new Column(com.snowflake.snowpark.functions.substring_index(col, delim, count));
}

/**
Expand All @@ -3991,8 +3998,8 @@ public static Column substring_index(Column col, String delim, Integer count) {
* @param c Column to be collect.
* @return The array.
*/
public static Column collect_list(Column col) {
return new Column(com.snowflake.snowpark.functions.collect_list(col.toScalaColumn()));
public static Column collect_list(Column c) {
return new Column(com.snowflake.snowpark.functions.collect_list(c.toScalaColumn()));
}

/* Returns a Column expression with values sorted in descending order.
Expand Down
14 changes: 10 additions & 4 deletions src/main/scala/com/snowflake/snowpark/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3176,7 +3176,7 @@ object functions {
when(colName.is_null, lit(null))
.otherwise(
coalesce(
builtin("REGEX_SUBSTR")(
builtin("REGEXP_SUBSTR")(
colName,
lit(exp),
lit(position),
Expand Down Expand Up @@ -3266,19 +3266,25 @@ object functions {
* substring_index performs a case-sensitive match when searching for delim.
* @since 1.14.0
*/
def substring_index(str: Column, delim: String, count: Int): Column = {
def substring_index(str: String, delim: String, count: Int): Column = {
when(
lit(count) < lit(0),
callBuiltin(
"substring",
lit(str),
callBuiltin("regexp_instr", sqlExpr(s"reverse(${str}, ${delim}, 1, abs(${count}), 0"))))
callBuiltin(
"regexp_instr",
sqlExpr(s"reverse(${str})"),
lit(delim),
1,
abs(lit(count)),
lit(0))))
.otherwise(
callBuiltin(
"substring",
lit(str),
1,
callBuiltin("regexp_instr", col("str"), lit(delim), 1, lit(count), 1)))
callBuiltin("regexp_instr", lit(str), lit(delim), 1, lit(count), 1)))
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2810,9 +2810,7 @@ public void substring_index() {
.sql(
"select * from values ('It was the best of times,it was the worst of times') as T(a)");
checkAnswer(
df.select(Functions.substring_index(df.col("a"), "was", 1)),
new Row[] {Row.create(7)},
false);
df.select(Functions.substring_index("a", "was", 1)), new Row[] {Row.create(7)}, false);
}

public void test_asc() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1090,6 +1090,7 @@ trait FunctionSuite extends TestData {
.collect()(0)
.getTimestamp(0)
.toString == "2020-10-28 13:35:47.001234567")

}

test("timestamp_ltz_from_parts") {
Expand Down Expand Up @@ -2218,7 +2219,10 @@ trait FunctionSuite extends TestData {
}
test("substring_index") {
val df = Seq("It was the best of times, it was the worst of times").toDF("a")
checkAnswer(df.select(substring_index(col("a"), "was", 1)), Seq(Row(7)), sort = false)
checkAnswer(
df.select(substring_index("It was the best of times, it was the worst of times", "was", 1)),
Seq(Row(7)),
sort = false)
}

test("desc column order") {
Expand Down

0 comments on commit fcd50b6

Please sign in to comment.