From 354e84302edc4cc9961566ee6a526a17564d6c9b Mon Sep 17 00:00:00 2001 From: Margarit Hakobyan Date: Tue, 6 Dec 2022 10:09:37 -0800 Subject: [PATCH] Add position() function to V2 engine (#1121) POSITION function is a synonym to LOCATE. It's syntax is POSITION( IN ) Signed-off-by: Margarit Hakobyan --- .../org/opensearch/sql/expression/DSL.java | 4 + .../function/BuiltinFunctionName.java | 25 ++-- .../sql/expression/text/TextFunction.java | 37 +++-- .../sql/expression/text/TextFunctionTest.java | 20 +++ docs/user/dql/functions.rst | 25 ++++ .../sql/sql/PositionFunctionIT.java | 129 ++++++++++++++++++ sql/src/main/antlr/OpenSearchSQLLexer.g4 | 1 + sql/src/main/antlr/OpenSearchSQLParser.g4 | 5 + .../sql/sql/parser/AstExpressionBuilder.java | 10 ++ .../sql/parser/AstExpressionBuilderTest.java | 8 ++ 10 files changed, 241 insertions(+), 23 deletions(-) create mode 100644 integ-test/src/test/java/org/opensearch/sql/sql/PositionFunctionIT.java diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index 710e7f1114..f5fd1e3315 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -230,6 +230,10 @@ public static FunctionExpression cbrt(Expression... expressions) { return compile(FunctionProperties.None, BuiltinFunctionName.CBRT, expressions); } + public static FunctionExpression position(Expression... expressions) { + return compile(FunctionProperties.None, BuiltinFunctionName.POSITION, expressions); + } + public static FunctionExpression truncate(Expression... expressions) { return compile(FunctionProperties.None, BuiltinFunctionName.TRUNCATE, expressions); } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index b3af0181f7..b09f3b0c74 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -156,23 +156,24 @@ public enum BuiltinFunctionName { /** * Text Functions. */ - SUBSTR(FunctionName.of("substr")), - SUBSTRING(FunctionName.of("substring")), - RTRIM(FunctionName.of("rtrim")), - LTRIM(FunctionName.of("ltrim")), - TRIM(FunctionName.of("trim")), - UPPER(FunctionName.of("upper")), - LOWER(FunctionName.of("lower")), - REGEXP(FunctionName.of("regexp")), + ASCII(FunctionName.of("ascii")), CONCAT(FunctionName.of("concat")), CONCAT_WS(FunctionName.of("concat_ws")), - LENGTH(FunctionName.of("length")), - STRCMP(FunctionName.of("strcmp")), - RIGHT(FunctionName.of("right")), LEFT(FunctionName.of("left")), - ASCII(FunctionName.of("ascii")), + LENGTH(FunctionName.of("length")), LOCATE(FunctionName.of("locate")), + LOWER(FunctionName.of("lower")), + LTRIM(FunctionName.of("ltrim")), + POSITION(FunctionName.of("position")), + REGEXP(FunctionName.of("regexp")), REPLACE(FunctionName.of("replace")), + RIGHT(FunctionName.of("right")), + RTRIM(FunctionName.of("rtrim")), + STRCMP(FunctionName.of("strcmp")), + SUBSTR(FunctionName.of("substr")), + SUBSTRING(FunctionName.of("substring")), + TRIM(FunctionName.of("trim")), + UPPER(FunctionName.of("upper")), /** * NULL Test. diff --git a/core/src/main/java/org/opensearch/sql/expression/text/TextFunction.java b/core/src/main/java/org/opensearch/sql/expression/text/TextFunction.java index 8035728d19..5915700bf1 100644 --- a/core/src/main/java/org/opensearch/sql/expression/text/TextFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/text/TextFunction.java @@ -39,22 +39,23 @@ public class TextFunction { * @param repository {@link BuiltinFunctionRepository}. */ public void register(BuiltinFunctionRepository repository) { - repository.register(substr()); - repository.register(substring()); - repository.register(ltrim()); - repository.register(rtrim()); - repository.register(trim()); - repository.register(lower()); - repository.register(upper()); + repository.register(ascii()); repository.register(concat()); repository.register(concat_ws()); - repository.register(length()); - repository.register(strcmp()); - repository.register(right()); repository.register(left()); - repository.register(ascii()); + repository.register(length()); repository.register(locate()); + repository.register(lower()); + repository.register(ltrim()); + repository.register(position()); repository.register(replace()); + repository.register(right()); + repository.register(rtrim()); + repository.register(strcmp()); + repository.register(substr()); + repository.register(substring()); + repository.register(trim()); + repository.register(upper()); } /** @@ -241,6 +242,20 @@ private DefaultFunctionResolver locate() { TextFunction::exprLocate), INTEGER, STRING, STRING, INTEGER)); } + /** + * Returns the position of the first occurrence of a substring in a string starting from 1. + * Returns 0 if substring is not in string. + * Returns NULL if any argument is NULL. + * Supports following signature: + * (STRING IN STRING) -> INTEGER + */ + private DefaultFunctionResolver position() { + return define(BuiltinFunctionName.POSITION.getName(), + impl(nullMissingHandling( + (SerializableBiFunction) + TextFunction::exprLocate), INTEGER, STRING, STRING)); + } + /** * REPLACE(str, from_str, to_str) returns the string str with all occurrences of * the string from_str replaced by the string to_str. diff --git a/core/src/test/java/org/opensearch/sql/expression/text/TextFunctionTest.java b/core/src/test/java/org/opensearch/sql/expression/text/TextFunctionTest.java index 502fe70ec8..5e32678b94 100644 --- a/core/src/test/java/org/opensearch/sql/expression/text/TextFunctionTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/text/TextFunctionTest.java @@ -377,6 +377,26 @@ void locate() { DSL.locate(missingRef, DSL.literal("hello"), DSL.literal(1)))); } + @Test + void position() { + FunctionExpression expression = DSL.position( + DSL.literal("world"), + DSL.literal("helloworldworld")); + assertEquals(INTEGER, expression.type()); + assertEquals(6, eval(expression).integerValue()); + + expression = DSL.position( + DSL.literal("abc"), + DSL.literal("hello world")); + assertEquals(INTEGER, expression.type()); + assertEquals(0, eval(expression).integerValue()); + + when(nullRef.type()).thenReturn(STRING); + assertEquals(nullValue(), eval(DSL.position(nullRef, DSL.literal("hello")))); + when(missingRef.type()).thenReturn(STRING); + assertEquals(missingValue(), eval(DSL.position(missingRef, DSL.literal("hello")))); + } + @Test void replace() { FunctionExpression expression = DSL.replace( diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index e7360d52cc..c4c2fa988b 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -2420,6 +2420,31 @@ Example:: +---------------------+---------------------+ +POSITION +------ + +Description +>>>>>>>>>>> + +Usage: The syntax POSITION(substr IN str) returns the position of the first occurrence of substring substr in string str. Returns 0 if substr is not in str. Returns NULL if any argument is NULL. + +Argument type: STRING, STRING + +Return type integer: + +(STRING IN STRING) -> INTEGER + +Example:: + + os> SELECT POSITION('world' IN 'helloworld'), POSITION('invalid' IN 'helloworld'); + fetched rows / total rows = 1/1 + +-------------------------------------+---------------------------------------+ + | POSITION('world' IN 'helloworld') | POSITION('invalid' IN 'helloworld') | + |-------------------------------------+---------------------------------------| + | 6 | 0 | + +-------------------------------------+---------------------------------------+ + + REPLACE ------- diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/PositionFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/PositionFunctionIT.java new file mode 100644 index 0000000000..f51a3a0977 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/PositionFunctionIT.java @@ -0,0 +1,129 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.legacy.SQLIntegTestCase; +import org.opensearch.sql.legacy.TestsConstants; + +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +public class PositionFunctionIT extends SQLIntegTestCase { + + @Override + protected void init() throws Exception { + loadIndex(Index.PEOPLE2); + loadIndex(Index.CALCS); + } + + @Test + public void position_function_test() { + String query = "SELECT firstname, position('a' IN firstname) FROM %s"; + JSONObject response = executeJdbcRequest(String.format(query, TestsConstants.TEST_INDEX_PEOPLE2)); + + verifySchema(response, schema("firstname", null, "keyword"), + schema("position('a' IN firstname)", null, "integer")); + assertEquals(12, response.getInt("total")); + + verifyDataRows(response, + rows("Daenerys", 2), rows("Hattie", 2), + rows("Nanette", 2), rows("Dale", 2), + rows("Elinor", 0), rows("Virginia", 8), + rows("Dillard", 5), rows("Mcgee", 0), + rows("Aurelia", 7), rows("Fulton", 0), + rows("Burton", 0), rows("Josie", 0)); + } + + @Test + public void position_function_with_nulls_test() { + String query = "SELECT str2, position('ee' IN str2) FROM %s"; + JSONObject response = executeJdbcRequest(String.format(query, TestsConstants.TEST_INDEX_CALCS)); + + verifySchema(response, schema("str2", null, "keyword"), + schema("position('ee' IN str2)", null, "integer")); + assertEquals(17, response.getInt("total")); + + verifyDataRows(response, + rows("one", 0), rows("two", 0), + rows("three", 4), rows(null, null), + rows("five", 0), rows("six", 0), + rows(null, null), rows("eight", 0), + rows("nine", 0), rows("ten", 0), + rows("eleven", 0), rows("twelve", 0), + rows(null, null), rows("fourteen", 6), + rows("fifteen", 5), rows("sixteen", 5), + rows(null, null)); + } + + @Test + public void position_function_with_string_literals_test() { + String query = "SELECT position('world' IN 'hello world')"; + JSONObject response = executeJdbcRequest(query); + + verifySchema(response, schema("position('world' IN 'hello world')", null, "integer")); + assertEquals(1, response.getInt("total")); + + verifyDataRows(response, rows(7)); + } + + @Test + public void position_function_with_only_fields_as_args_test() { + String query = "SELECT position(str3 IN str2) FROM %s WHERE str2 IN ('one', 'two', 'three')"; + JSONObject response = executeJdbcRequest(String.format(query, TestsConstants.TEST_INDEX_CALCS)); + + verifySchema(response, schema("position(str3 IN str2)", null, "integer")); + assertEquals(3, response.getInt("total")); + + verifyDataRows(response, rows(3), rows(0), rows(4)); + } + + @Test + public void position_function_with_function_as_arg_test() { + String query = "SELECT position(upper(str3) IN str1) FROM %s WHERE str1 LIKE 'BINDING SUPPLIES'"; + JSONObject response = executeJdbcRequest(String.format(query, TestsConstants.TEST_INDEX_CALCS)); + + verifySchema(response, schema("position(upper(str3) IN str1)", null, "integer")); + assertEquals(1, response.getInt("total")); + + verifyDataRows(response, rows(15)); + } + + @Test + public void position_function_in_where_clause_test() { + String query = "SELECT str2 FROM %s WHERE position(str3 IN str2)=1"; + JSONObject response = executeJdbcRequest(String.format(query, TestsConstants.TEST_INDEX_CALCS)); + + verifySchema(response, schema("str2", null, "keyword")); + assertEquals(2, response.getInt("total")); + + verifyDataRows(response, rows("eight"), rows("eleven")); + } + + @Test + public void position_function_with_null_args_test() { + String query1 = "SELECT str2, position(null IN str2) FROM %s WHERE str2 IN ('one')"; + String query2 = "SELECT str2, position(str2 IN null) FROM %s WHERE str2 IN ('one')"; + JSONObject response1 = executeJdbcRequest(String.format(query1, TestsConstants.TEST_INDEX_CALCS)); + JSONObject response2 = executeJdbcRequest(String.format(query2, TestsConstants.TEST_INDEX_CALCS)); + + verifySchema(response1, + schema("str2", null, "keyword"), + schema("position(null IN str2)", null, "integer")); + assertEquals(1, response1.getInt("total")); + + verifySchema(response2, + schema("str2", null, "keyword"), + schema("position(str2 IN null)", null, "integer")); + assertEquals(1, response2.getInt("total")); + + verifyDataRows(response1, rows("one", null)); + verifyDataRows(response2, rows("one", null)); + } +} diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index d11877488f..c416c78432 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -234,6 +234,7 @@ NULLIF: 'NULLIF'; PERIOD_ADD: 'PERIOD_ADD'; PERIOD_DIFF: 'PERIOD_DIFF'; PI: 'PI'; +POSITION: 'POSITION'; POW: 'POW'; POWER: 'POWER'; RADIANS: 'RADIANS'; diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 9f24594c9d..e6ae551fa2 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -316,6 +316,7 @@ functionCall | aggregateFunction (orderByClause)? filterClause #filteredAggregationFunctionCall | relevanceFunction #relevanceFunctionCall | highlightFunction #highlightFunctionCall + | positionFunction #positionFunctionCall ; @@ -323,6 +324,10 @@ highlightFunction : HIGHLIGHT LR_BRACKET relevanceField (COMMA highlightArg)* RR_BRACKET ; +positionFunction + : POSITION LR_BRACKET functionArg IN functionArg RR_BRACKET + ; + scalarFunctionName : mathematicalFunctionName | dateTimeFunctionName diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index 3c59a5f8f2..bae22595ca 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -12,6 +12,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NULL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.LIKE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOT_LIKE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.POSITION; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEXP; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.BinaryComparisonPredicateContext; import static org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.BooleanContext; @@ -148,6 +149,15 @@ public UnresolvedExpression visitHighlightFunctionCall( builder.build()); } + @Override + public UnresolvedExpression visitPositionFunction( + OpenSearchSQLParser.PositionFunctionContext ctx) { + return new Function( + POSITION.getName().getFunctionName(), + Arrays.asList(visitFunctionArg(ctx.functionArg(0)), + visitFunctionArg(ctx.functionArg(1)))); + } + @Override public UnresolvedExpression visitTableFilter(TableFilterContext ctx) { return new Function( diff --git a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java index 85c4135b45..ac68d146b2 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/parser/AstExpressionBuilderTest.java @@ -328,6 +328,14 @@ public void canBuildQualifiedNameHighlightFunction() { ); } + @Test + public void canBuildStringLiteralPositionFunction() { + assertEquals( + function("position", stringLiteral("substr"), stringLiteral("str")), + buildExprAst("position(\"substr\" IN \"str\")") + ); + } + @Test public void canBuildWindowFunctionWithoutOrderBy() { assertEquals(