-
Notifications
You must be signed in to change notification settings - Fork 20
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SNOW-802269 Add regextract signum subindex collectlist functions #142
Changes from 2 commits
d14ff9f
cb7e041
23643c3
126fda6
5d4d8ce
3966d5a
388555d
55dd239
fcd50b6
0cde3b5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3882,7 +3882,120 @@ public static Column listagg(Column col) { | |
} | ||
|
||
/** | ||
* Returns a Column expression with values sorted in descending order. | ||
* | ||
* Signature - snowflake.snowpark.functions.regexp_extract (value: Union[Column, str], regexp: | ||
* Union[Column, str], idx: int) Column Extract a specific group matched by a regex, from the | ||
* specified string column. If the regex did not match, or the specified group did not match, an | ||
* empty string is returned. | ||
* Example: | ||
* <pre>{@code | ||
* from snowflake.snowpark.functions import regexp_extract | ||
* df = session.createDataFrame([["id_20_30", 10], ["id_40_50", 30]], ["id", "age"]) | ||
* df.select(regexp_extract("id", r"(\d+)", 1).alias("RES")).show() | ||
* --------- | ||
* |"RES" | | ||
* --------- | ||
* |20 | | ||
* |40 | | ||
* --------- | ||
* }</pr> | ||
* | ||
* @since 1.12.1 | ||
* @return Column object. | ||
*/ | ||
public static Column regexp_extract( | ||
Column col, String exp, Integer position, Integer Occurences, Integer grpIdx) { | ||
return new Column( | ||
com.snowflake.snowpark.functions.regexp_extract( | ||
col.toScalaColumn(), exp, position, Occurences, grpIdx)); | ||
} | ||
|
||
/** | ||
* Returns the sign of its argument: | ||
* | ||
* <p>- -1 if the argument is negative. - 1 if it is positive. - 0 if it is 0. | ||
* | ||
* <p>Args: col: The column to evaluate its sign | ||
* Example:: | ||
* * <pre>{@code df = | ||
* session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) >>> | ||
* df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"), | ||
* sign("c").alias("c_sign")).show() | ||
* ---------------------------------- | ||
* |"A_SIGN" |"B_SIGN" |"C_SIGN" | | ||
* ---------------------------------- | ||
* |-1 |1 |0 | | ||
* ---------------------------------- | ||
* }</pr> | ||
* | ||
* @since 1.12.1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
* @param e Column to calculate the sign. | ||
* @return Column object. | ||
*/ | ||
public static Column signum(Column col) { | ||
return new Column(com.snowflake.snowpark.functions.signum(col.toScalaColumn())); | ||
} | ||
|
||
/** | ||
* Returns the sign of its argument: | ||
* | ||
* <p>- -1 if the argument is negative. - 1 if it is positive. - 0 if it is 0. | ||
* | ||
* <p>Args: col: The column to evaluate its sign | ||
* Example:: | ||
* <pre>{@code df = | ||
* session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) >>> | ||
* df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"), | ||
* sign("c").alias("c_sign")).show() | ||
* ---------------------------------- | ||
* |"A_SIGN" |"B_SIGN" |"C_SIGN" | | ||
* ---------------------------------- | ||
* |-1 |1 |0 | | ||
* ---------------------------------- | ||
* }</pr> | ||
* | ||
* @since 1.12.1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
* @param e Column to calculate the sign. | ||
* @return Column object. | ||
*/ | ||
public static Column sign(Column col) { | ||
return new Column(com.snowflake.snowpark.functions.sign(col.toScalaColumn())); | ||
} | ||
|
||
/** | ||
* Returns the substring from string str before count occurrences of the delimiter delim. If count | ||
* is positive, everything the left of the final delimiter (counting from left) is returned. If | ||
* count is negative, every to the right of the final delimiter (counting from the right) is | ||
* returned. substring_index performs a case-sensitive match when searching for delim. | ||
* | ||
* @since 1.12.1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
*/ | ||
public static Column substring_index(Column col, String delim, Integer count) { | ||
return new Column( | ||
com.snowflake.snowpark.functions.substring_index(col.toScalaColumn(), delim, count)); | ||
} | ||
|
||
/** | ||
* Returns the input values, pivoted into an ARRAY. If the input is empty, an empty ARRAY is | ||
* returned. | ||
* | ||
* <p>Example:: | ||
* | ||
* <pre>{@code | ||
* df = session.create_dataframe([[1], [2], [3], [1]], schema=["a"]) | ||
* df.select(array_agg("a", True).alias("result")).show() | ||
* "RESULT" [ 1, 2, 3 ] | ||
* }</pre> | ||
* | ||
* @since 1.10.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
* @param c Column to be collect. | ||
* @return The array. | ||
*/ | ||
public static Column collect_list(Column col) { | ||
return new Column(com.snowflake.snowpark.functions.collect_list(col.toScalaColumn())); | ||
} | ||
|
||
/* Returns a Column expression with values sorted in descending order. | ||
* | ||
* <p>Example: order column values in descending | ||
* | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3142,7 +3142,193 @@ object functions { | |
def listagg(col: Column): Column = listagg(col, "", isDistinct = false) | ||
|
||
/** | ||
* Returns a Column expression with values sorted in descending order. | ||
|
||
* Signature - snowflake.snowpark.functions.regexp_extract | ||
* (value: Union[Column, str], regexp: Union[Column, str], idx: int) | ||
* Column | ||
* Extract a specific group matched by a regex, from the specified string | ||
* column. If the regex did not match, or the specified group did not match, | ||
* an empty string is returned. | ||
* <pr>Example: | ||
* from snowflake.snowpark.functions import regexp_extract | ||
* df = session.createDataFrame([["id_20_30", 10], ["id_40_50", 30]], | ||
* ["id", "age"]) | ||
* df.select(regexp_extract("id", r"(\d+)", 1).alias("RES")).show() | ||
*</pr> | ||
*<pr> | ||
* --------- | ||
* |"RES" | | ||
* --------- | ||
* |20 | | ||
* |40 | | ||
* --------- | ||
*</pr> | ||
* Note: non-greedy tokens such as are not supported | ||
* @since 1.12.1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
* @return Column object. | ||
*/ | ||
def regexp_extract( | ||
colName: Column, | ||
exp: String, | ||
position: Int, | ||
Occurences: Int, | ||
grpIdx: Int): Column = { | ||
when(colName.is_null, lit(null)) | ||
.otherwise( | ||
coalesce( | ||
builtin("REGEX_SUBSTR")( | ||
colName, | ||
lit(exp), | ||
lit(position), | ||
lit(Occurences), | ||
lit("ce"), | ||
lit(grpIdx)), | ||
lit(""))) | ||
} | ||
|
||
/** | ||
* Returns the sign of its argument: | ||
* | ||
* - -1 if the argument is negative. | ||
* - 1 if it is positive. | ||
* - 0 if it is 0. | ||
* | ||
* Args: | ||
* col: The column to evaluate its sign | ||
*<pr> | ||
* Example:: | ||
* >>> df = session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) | ||
* >>> df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"), | ||
* sign("c").alias("c_sign")).show() | ||
* ---------------------------------- | ||
* |"A_SIGN" |"B_SIGN" |"C_SIGN" | | ||
* ---------------------------------- | ||
* |-1 |1 |0 | | ||
* ---------------------------------- | ||
* </pr> | ||
* @since 1.12.1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
* @param e Column to calculate the sign. | ||
* @return Column object. | ||
*/ | ||
def sign(colName: Column): Column = { | ||
builtin("SIGN")(colName) | ||
} | ||
|
||
/** | ||
* Returns the sign of its argument: | ||
* | ||
* - -1 if the argument is negative. | ||
* - 1 if it is positive. | ||
* - 0 if it is 0. | ||
* | ||
* Args: | ||
* col: The column to evaluate its sign | ||
*<pr> | ||
* Example:: | ||
* >>> df = session.create_dataframe([(-2, 2, 0)], ["a", "b", "c"]) | ||
* >>> df.select(sign("a").alias("a_sign"), sign("b").alias("b_sign"), | ||
* sign("c").alias("c_sign")).show() | ||
* ---------------------------------- | ||
* |"A_SIGN" |"B_SIGN" |"C_SIGN" | | ||
* ---------------------------------- | ||
* |-1 |1 |0 | | ||
* ---------------------------------- | ||
* </pr> | ||
* @since 1.12.1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
* @param e Column to calculate the sign. | ||
* @return Column object. | ||
*/ | ||
def signum(colName: Column): Column = { | ||
builtin("SIGN")(colName) | ||
} | ||
|
||
/** | ||
* Returns the sign of the given column. Returns either 1 for positive, | ||
* 0 for 0 or | ||
* NaN, -1 for negative and null for null. | ||
* NOTE: if string values are provided snowflake will attempts to cast. | ||
* If it casts correctly, returns the calculation, | ||
* if not an error will be thrown | ||
* @since 1.12.1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
* @param columnName Name of the column to calculate the sign. | ||
* @return Column object. | ||
*/ | ||
def signum(columnName: String): Column = { | ||
signum(col(columnName)) | ||
} | ||
|
||
/** | ||
* Returns the substring from string str before count occurrences | ||
* of the delimiter delim. If count is positive, | ||
* everything the left of the final delimiter (counting from left) | ||
* is returned. If count is negative, every to the right of the | ||
* final delimiter (counting from the right) is returned. | ||
* substring_index performs a case-sensitive match when searching for delim. | ||
* @since 1.12.1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
*/ | ||
def substring_index(str: Column, delim: String, count: Int): Column = { | ||
when( | ||
lit(count) < lit(0), | ||
callBuiltin( | ||
"substring", | ||
lit(str), | ||
callBuiltin("regexp_instr", sqlExpr(s"reverse(${str}, ${delim}, 1, abs(${count}), 0")))) | ||
.otherwise( | ||
callBuiltin( | ||
"substring", | ||
lit(str), | ||
1, | ||
callBuiltin("regexp_instr", col("str"), lit(delim), 1, lit(count), 1))) | ||
} | ||
|
||
/** | ||
* | ||
* Returns the input values, pivoted into an ARRAY. If the input is empty, an empty | ||
* ARRAY is returned. | ||
*<pr> | ||
* Example:: | ||
* >>> df = session.create_dataframe([[1], [2], [3], [1]], schema=["a"]) | ||
* >>> df.select(array_agg("a", True).alias("result")).show() | ||
* ------------ | ||
* |"RESULT" | | ||
* ------------ | ||
* |[ | | ||
* | 1, | | ||
* | 2, | | ||
* | 3 | | ||
* |] | | ||
* ------------ | ||
* </pr> | ||
* @since 1.10.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
* @param c Column to be collect. | ||
* @return The array. | ||
*/ | ||
def collect_list(c: Column): Column = array_agg(c) | ||
|
||
/** | ||
* | ||
* Returns the input values, pivoted into an ARRAY. If the input is empty, an empty | ||
* ARRAY is returned. | ||
* | ||
* Example:: | ||
* >>> df = session.create_dataframe([[1], [2], [3], [1]], schema=["a"]) | ||
* >>> df.select(array_agg("a", True).alias("result")).show() | ||
* ------------ | ||
* |"RESULT" | | ||
* ------------ | ||
* |[ | | ||
* | 1, | | ||
* | 2, | | ||
* | 3 | | ||
* |] | | ||
* ------------ | ||
* @since 1.10.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
* @param s Column name to be collected. | ||
* @return The array. | ||
*/ | ||
def collect_list(s: String): Column = array_agg(col(s)) | ||
|
||
/* Returns a Column expression with values sorted in descending order. | ||
* Example: | ||
* {{{ | ||
* val df = session.createDataFrame(Seq(1, 2, 3)).toDF("id") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@since 1.14.0