From 9d2f94df654cd981a7b9389d8c182f0af82f53a8 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Tue, 29 Oct 2024 13:38:35 +0800 Subject: [PATCH] Fix the issue that missing identifiers from ANTLR keywords (#821) Signed-off-by: Lantao Jin --- .../src/main/antlr4/OpenSearchPPLLexer.g4 | 4 - .../src/main/antlr4/OpenSearchPPLParser.g4 | 106 +++++++++--------- .../sql/ppl/parser/AstExpressionBuilder.java | 14 +-- 3 files changed, 55 insertions(+), 69 deletions(-) diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 index fc92b0a14..3b5f605df 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 @@ -74,13 +74,9 @@ INDEX: 'INDEX'; D: 'D'; DESC: 'DESC'; DATASOURCES: 'DATASOURCES'; -VALUE: 'VALUE'; USING: 'USING'; WITH: 'WITH'; -// CLAUSE KEYWORDS -SORTBY: 'SORTBY'; - // FIELD KEYWORDS AUTO: 'AUTO'; STR: 'STR'; diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 index 1b8acac1b..12aa1332d 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 @@ -55,6 +55,35 @@ commands | fieldsummaryCommand ; +commandName + : SEARCH + | DESCRIBE + | SHOW + | AD + | ML + | KMEANS + | WHERE + | CORRELATE + | JOIN + | FIELDS + | STATS + | EVENTSTATS + | DEDUP + | EXPLAIN + | SORT + | HEAD + | TOP + | RARE + | EVAL + | GROK + | PARSE + | PATTERNS + | LOOKUP + | RENAME + | FILLNULL + | FIELDSUMMARY + ; + searchCommand : (SEARCH)? fromClause # searchFrom | (SEARCH)? fromClause logicalExpression # searchFromFilter @@ -360,14 +389,6 @@ statsFunctionName | STDDEV_POP ; -takeAggFunction - : TAKE LT_PRTHS fieldExpression (COMMA size = integerLiteral)? RT_PRTHS - ; - -percentileAggFunction - : PERCENTILE LESS value = integerLiteral GREATER LT_PRTHS aggField = fieldExpression RT_PRTHS - ; - // expressions expression : logicalExpression @@ -999,46 +1020,37 @@ keywordsCanBeId | mathematicalFunctionName | positionFunctionName | cryptographicFunctionName - // commands - | SEARCH - | DESCRIBE - | SHOW - | FROM - | WHERE - | CORRELATE - | FIELDS - | RENAME - | STATS - | DEDUP - | SORT - | EVAL - | HEAD - | TOP - | RARE - | PARSE - | METHOD - | REGEX - | PUNCT - | GROK - | PATTERN - | PATTERNS - | NEW_FIELD - | KMEANS - | AD - | ML - | EXPLAIN + | singleFieldRelevanceFunctionName + | multiFieldRelevanceFunctionName + | commandName + | comparisonOperator + | explainMode + | correlationType // commands assist keywords | IN | SOURCE | INDEX | DESC | DATASOURCES - // CLAUSEKEYWORDS - | SORTBY - // FIELDKEYWORDSAUTO + | AUTO | STR | IP | NUM + | FROM + | PATTERN + | NEW_FIELD + | SCOPE + | MAPPING + | WITH + | USING + | CAST + | GET_FORMAT + | EXTRACT + | INTERVAL + | PLUS + | MINUS + | INCLUDEFIELDS + | NULLS // ARGUMENT KEYWORDS | KEEPEMPTY | CONSECUTIVE @@ -1061,27 +1073,21 @@ keywordsCanBeId | TRAINING_DATA_SIZE | ANOMALY_SCORE_THRESHOLD // AGGREGATIONS - | AVG - | COUNT + | statsFunctionName | DISTINCT_COUNT + | PERCENTILE + | PERCENTILE_APPROX | ESTDC | ESTDC_ERROR - | MAX | MEAN | MEDIAN - | MIN | MODE | RANGE | STDEV | STDEVP - | SUM | SUMSQ | VAR_SAMP | VAR_POP - | STDDEV_SAMP - | STDDEV_POP - | PERCENTILE - | PERCENTILE_APPROX | TAKE | FIRST | LAST @@ -1099,10 +1105,6 @@ keywordsCanBeId | SPARKLINE | C | DC - // FIELD SUMMARY - | FIELDSUMMARY - | INCLUDEFIELDS - | NULLS // JOIN TYPE | OUTER | INNER diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index fa03fc856..20c55a401 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -22,7 +22,6 @@ import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.EqualTo; import org.opensearch.sql.ast.expression.Field; -import org.opensearch.sql.ast.expression.FieldList; import org.opensearch.sql.ast.expression.Function; import org.opensearch.sql.ast.expression.In; import org.opensearch.sql.ast.expression.subquery.ExistsSubquery; @@ -42,7 +41,6 @@ import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.expression.When; import org.opensearch.sql.ast.expression.Xor; -import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.ppl.utils.ArgumentFactory; @@ -54,8 +52,6 @@ import java.util.stream.IntStream; import java.util.stream.Stream; -import static org.opensearch.flint.spark.ppl.OpenSearchPPLParser.INCLUDEFIELDS; -import static org.opensearch.flint.spark.ppl.OpenSearchPPLParser.NULLS; import static org.opensearch.sql.expression.function.BuiltinFunctionName.EQUAL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NOT_NULL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NULL; @@ -81,7 +77,7 @@ public void setAstBuilder(AstBuilder astBuilder) { /** * The function name mapping between fronted and core engine. */ - private static Map FUNCTION_NAME_MAPPING = + private static final Map FUNCTION_NAME_MAPPING = new ImmutableMap.Builder() .put("isnull", IS_NULL.getName().getFunctionName()) .put("isnotnull", IS_NOT_NULL.getName().getFunctionName()) @@ -217,14 +213,6 @@ public UnresolvedExpression visitDistinctCountFunctionCall(OpenSearchPPLParser.D return new AggregateFunction("count", visit(ctx.valueExpression()), true); } - @Override - public UnresolvedExpression visitPercentileAggFunction(OpenSearchPPLParser.PercentileAggFunctionContext ctx) { - return new AggregateFunction( - ctx.PERCENTILE().getText(), - visit(ctx.aggField), - Collections.singletonList(new Argument("rank", (Literal) visit(ctx.value)))); - } - @Override public UnresolvedExpression visitPercentileFunctionCall(OpenSearchPPLParser.PercentileFunctionCallContext ctx) { return new AggregateFunction(