From 2bc1f07193db7dd37d84e09f0bae2faf1db4c927 Mon Sep 17 00:00:00 2001 From: Bing Li <63471091+sfc-gh-bli@users.noreply.github.com> Date: Tue, 19 Sep 2023 15:33:29 -0700 Subject: [PATCH] SNOW-897574 Support regexp_replace function (#52) support regexp_replace function --- .../snowflake/snowpark_java/Functions.java | 32 +++++++++++++++++++ .../com/snowflake/snowpark/functions.scala | 22 +++++++++++++ .../snowpark_test/JavaFunctionSuite.java | 13 ++++++++ .../snowpark_test/FunctionSuite.scala | 14 ++++++++ 4 files changed, 81 insertions(+) diff --git a/src/main/java/com/snowflake/snowpark_java/Functions.java b/src/main/java/com/snowflake/snowpark_java/Functions.java index aac59ec5..74cc39a8 100644 --- a/src/main/java/com/snowflake/snowpark_java/Functions.java +++ b/src/main/java/com/snowflake/snowpark_java/Functions.java @@ -2354,6 +2354,38 @@ public static Column regexp_count(Column strExpr, Column pattern) { strExpr.toScalaColumn(), pattern.toScalaColumn())); } + /** + * Returns the subject with the specified pattern (or all occurrences of the pattern) removed. If + * no matches are found, returns the original subject. + * + * @param strExpr The input string + * @param pattern The pattern + * @return The result column + * @since 1.9.0 + */ + public static Column regexp_replace(Column strExpr, Column pattern) { + return new Column( + com.snowflake.snowpark.functions.regexp_replace( + strExpr.toScalaColumn(), pattern.toScalaColumn())); + } + + /** + * Returns the subject with the specified pattern (or all occurrences of the pattern) + * replaced by a replacement string. If no matches are found, returns the original + * subject. + * + * @param strExpr The input string + * @param pattern The pattern + * @param replacement The replacement string + * @return The result column + * @since 1.9.0 + */ + public static Column regexp_replace(Column strExpr, Column pattern, Column replacement) { + return new Column( + com.snowflake.snowpark.functions.regexp_replace( + strExpr.toScalaColumn(), pattern.toScalaColumn(), replacement.toScalaColumn())); + } + /** * Removes all occurrences of a specified strExpr, and optionally replaces them with replacement. * diff --git a/src/main/scala/com/snowflake/snowpark/functions.scala b/src/main/scala/com/snowflake/snowpark/functions.scala index 0a10f7f8..49f38593 100644 --- a/src/main/scala/com/snowflake/snowpark/functions.scala +++ b/src/main/scala/com/snowflake/snowpark/functions.scala @@ -1796,6 +1796,28 @@ object functions { def regexp_count(strExpr: Column, pattern: Column): Column = builtin("regexp_count")(strExpr, pattern) + /** + * Returns the subject with the specified pattern (or all occurrences of the pattern) removed. + * If no matches are found, returns the original subject. + * + * @group str_func + * @since 1.9.0 + */ + def regexp_replace(strExpr: Column, pattern: Column): Column = + builtin("regexp_replace")(strExpr, pattern) + + /** + * Returns the subject with the specified pattern (or all occurrences of the pattern) + * replaced by a replacement string. If no matches are found, + * returns the original subject. + * + * @group str_func + * @since 1.9.0 + */ + def regexp_replace(strExpr: Column, pattern: Column, replacement: Column): Column = + builtin("regexp_replace")(strExpr, pattern, replacement) + + /** * Removes all occurrences of a specified strExpr, * and optionally replaces them with replacement. diff --git a/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java b/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java index 1785ef14..f74dc440 100644 --- a/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java +++ b/src/test/java/com/snowflake/snowpark_test/JavaFunctionSuite.java @@ -1447,6 +1447,19 @@ public void regexp_count() { df.select(Functions.regexp_count(df.col("a"), Functions.lit("a"))), expected1, false); } + @Test + public void regexp_replace() { + DataFrame df = getSession().sql("select * from values('cat'),('dog'),('mouse') as T(a)"); + Column pattern = Functions.lit("^ca|^[m|d]o"); + Row[] expected = {Row.create("t"), Row.create("g"), Row.create("use")}; + checkAnswer(df.select(Functions.regexp_replace(df.col("a"), pattern)), expected, false); + + Column replacement = Functions.lit("ch"); + Row[] expected1 = {Row.create("cht"), Row.create("chg"), Row.create("chuse")}; + checkAnswer( + df.select(Functions.regexp_replace(df.col("a"), pattern, replacement)), expected1, false); + } + @Test public void replace() { DataFrame df = getSession().sql("select * from values('apple'),('banana'),('peach') as T(a)"); diff --git a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala index 8b8356f7..274e88f5 100644 --- a/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala +++ b/src/test/scala/com/snowflake/snowpark_test/FunctionSuite.scala @@ -2144,6 +2144,20 @@ trait FunctionSuite extends TestData { Seq(Row("1'2'3'4'5"))) } + test("regexp_replace") { + val data = Seq("cat", "dog", "mouse").toDF("a") + val pattern = lit("^ca|^[m|d]o") + var expected = Seq(Row("t"), Row("g"), Row("use")) + checkAnswer(data.select(regexp_replace(data("a"), pattern)), expected, sort = false) + + val replacement = lit("ch") + expected = Seq(Row("cht"), Row("chg"), Row("chuse")) + checkAnswer( + data.select(regexp_replace(data("a"), pattern, replacement)), + expected, + sort = false) + } + } class EagerFunctionSuite extends FunctionSuite with EagerSession