From bfbb55557d1d64d6e94e488f10693c4519112a69 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Fri, 13 Dec 2024 15:39:09 -0800 Subject: [PATCH] update syntax from `project` to `view` Signed-off-by: YANGDB --- docs/ppl-lang/PPL-Example-Commands.md | 10 +++---- docs/ppl-lang/README.md | 2 +- ...project-command.md => ppl-view-command.md} | 30 +++++++++---------- ...> FlintSparkPPLViewStatementITSuite.scala} | 30 +++++++++---------- .../src/main/antlr4/OpenSearchPPLLexer.g4 | 2 +- .../src/main/antlr4/OpenSearchPPLParser.g4 | 6 ++-- .../sql/ast/AbstractNodeVisitor.java | 8 ++--- ...ojectStatement.java => ViewStatement.java} | 8 ++--- .../sql/ast/tree/{Project.java => View.java} | 16 +++++----- .../sql/ppl/CatalystQueryPlanVisitor.java | 23 ++++++-------- .../opensearch/sql/ppl/parser/AstBuilder.java | 10 +++---- .../sql/ppl/parser/AstStatementBuilder.java | 9 +++--- .../{ProjectionUtils.java => ViewUtils.java} | 8 ++--- ...lPlanViewQueriesTranslatorTestSuite.scala} | 26 ++++++++-------- 14 files changed, 91 insertions(+), 97 deletions(-) rename docs/ppl-lang/{ppl-project-command.md => ppl-view-command.md} (61%) rename integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/{FlintSparkPPLProjectStatementITSuite.scala => FlintSparkPPLViewStatementITSuite.scala} (95%) rename ppl-spark-integration/src/main/java/org/opensearch/sql/ast/statement/{ProjectStatement.java => ViewStatement.java} (84%) rename ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/{Project.java => View.java} (76%) rename ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/{ProjectionUtils.java => ViewUtils.java} (92%) rename ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/{PPLLogicalPlanProjectQueriesTranslatorTestSuite.scala => PPLLogicalPlanViewQueriesTranslatorTestSuite.scala} (90%) diff --git a/docs/ppl-lang/PPL-Example-Commands.md b/docs/ppl-lang/PPL-Example-Commands.md index 7f1d1051c..13f382afb 100644 --- a/docs/ppl-lang/PPL-Example-Commands.md +++ b/docs/ppl-lang/PPL-Example-Commands.md @@ -276,18 +276,18 @@ source = table | where ispresent(a) | - `source=accounts | parse email '.+@(?.+)' | where age > 45 | sort - age | fields age, email, host` - `source=accounts | parse address '(?\d+) (?.+)' | where streetNumber > 500 | sort num(streetNumber) | fields streetNumber, street` -#### **Project** -[See additional command details](ppl-project-command.md) +#### **view** +[See additional command details](ppl-view-command.md) ```sql -project newTableName using csv | source = table | where fieldA > value | stats count(fieldA) by fieldB +view newTableName using csv | source = table | where fieldA > value | stats count(fieldA) by fieldB -project ageDistribByCountry using parquet OPTIONS('parquet.bloom.filter.enabled'='true', 'parquet.bloom.filter.enabled#age'='false') partitioned by (age, country) | +view ageDistribByCountry using parquet OPTIONS('parquet.bloom.filter.enabled'='true', 'parquet.bloom.filter.enabled#age'='false') partitioned by (age, country) | source = table | stats avg(age) as avg_city_age by country, state, city | eval new_avg_city_age = avg_city_age - 1 | stats avg(new_avg_city_age) as avg_state_age by country, state | where avg_state_age > 18 | stats avg(avg_state_age) as avg_adult_country_age by country -project ageDistribByCountry using parquet OPTIONS('parquet.bloom.filter.enabled'='true', 'parquet.bloom.filter.enabled#age'='false') partitioned by (age, country) location 's://demo-app/my-bucket'| +view ageDistribByCountry using parquet OPTIONS('parquet.bloom.filter.enabled'='true', 'parquet.bloom.filter.enabled#age'='false') partitioned by (age, country) location 's://demo-app/my-bucket'| source = table | stats avg(age) as avg_city_age by country, state, city | eval new_avg_city_age = avg_city_age - 1 | stats avg(new_avg_city_age) as avg_state_age by country, state | where avg_state_age > 18 | stats avg(avg_state_age) as avg_adult_country_age by country diff --git a/docs/ppl-lang/README.md b/docs/ppl-lang/README.md index 1ed2863cb..852f924b8 100644 --- a/docs/ppl-lang/README.md +++ b/docs/ppl-lang/README.md @@ -42,7 +42,7 @@ For additional examples see the next [documentation](PPL-Example-Commands.md). - [`parse command`](ppl-parse-command.md) - - - [`project command`](ppl-project-command.md) + - [`view command`](ppl-view-command.md) - [`patterns command`](ppl-patterns-command.md) diff --git a/docs/ppl-lang/ppl-project-command.md b/docs/ppl-lang/ppl-view-command.md similarity index 61% rename from docs/ppl-lang/ppl-project-command.md rename to docs/ppl-lang/ppl-view-command.md index d1ddfd89d..2b32a1479 100644 --- a/docs/ppl-lang/ppl-project-command.md +++ b/docs/ppl-lang/ppl-view-command.md @@ -1,14 +1,14 @@ -## PPL `project` command +## PPL `view` command ### Description -Using `project` command to materialize a query into a dedicated view: -In some cases it is required to construct a projection view (materialized into a view) of the query results. -This projection can be later used as a source of continued queries for further slicing and dicing the data, in addition such tables can be also saved into a MV table that are pushed into OpenSearch and can be used for visualization and enhanced performant queries. +Using `view` command to materialize a query into a dedicated view: +In some cases it is required to construct a view (materialized into a view) of the query results. +This view can be later used as a source of continued queries for further slicing and dicing the data, in addition such tables can be also saved into a MV table that are pushed into OpenSearch and can be used for visualization and enhanced performant queries. -The command can also function as an ETL process where the original datasource will be transformed and ingested into the output projected view using the ppl transformation and aggregation operators +The command can also function as an ETL process where the original datasource will be transformed and ingested into the output view using the ppl transformation and aggregation operators **### Syntax -`PROJECT (IF NOT EXISTS)? viewName (USING datasource)? (OPTIONS optionsList)? (PARTITIONED BY partitionColumnNames)? location?` +`VIEW (IF NOT EXISTS)? viewName (USING datasource)? (OPTIONS optionsList)? (PARTITIONED BY partitionColumnNames)? location?` - **viewName** Specifies a view name, which may be optionally qualified with a database name. @@ -29,30 +29,30 @@ Specifies the physical location where the view or table data is stored. This cou The outcome view (viewName) is populated using the data from the select statement. ### Usage Guidelines -The project command produces a view based on the resulting rows returned from the query. +The view command produces a view based on the resulting rows returned from the query. Any query can be used in the `AS ` statement and attention must be used to the volume and compute that may incur due to such queries. -As a precautions an `explain cost | source = table | ... ` can be run prior to the `project` statement to have a better estimation. +As a precautions an `explain cost | source = table | ... ` can be run prior to the `view` statement to have a better estimation. ### Examples: ```sql -project newTableName using csv | source = table | where fieldA > value | stats count(fieldA) by fieldB +view newTableName using csv | source = table | where fieldA > value | stats count(fieldA) by fieldB -project ipRanges using parquet | source = table | where isV6 = true | eval inRange = case(cidrmatch(ipAddress, '2003:db8::/32'), 'in' else 'out') | fields ip, inRange +view ipRanges using parquet | source = table | where isV6 = true | eval inRange = case(cidrmatch(ipAddress, '2003:db8::/32'), 'in' else 'out') | fields ip, inRange -project avgBridgesByCountry using json | source = table | fields country, bridges | flatten bridges | fields country, length | stats avg(length) as avg by country +view avgBridgesByCountry using json | source = table | fields country, bridges | flatten bridges | fields country, length | stats avg(length) as avg by country -project ageDistribByCountry using parquet partitioned by (age, country) | +view ageDistribByCountry using parquet partitioned by (age, country) | source = table | stats avg(age) as avg_city_age by country, state, city | eval new_avg_city_age = avg_city_age - 1 | stats avg(new_avg_city_age) as avg_state_age by country, state | where avg_state_age > 18 | stats avg(avg_state_age) as avg_adult_country_age by country -project ageDistribByCountry using parquet OPTIONS('parquet.bloom.filter.enabled'='true', 'parquet.bloom.filter.enabled#age'='false') partitioned by (age, country) | +view ageDistribByCountry using parquet OPTIONS('parquet.bloom.filter.enabled'='true', 'parquet.bloom.filter.enabled#age'='false') partitioned by (age, country) | source = table | stats avg(age) as avg_city_age by country, state, city | eval new_avg_city_age = avg_city_age - 1 | stats avg(new_avg_city_age) as avg_state_age by country, state | where avg_state_age > 18 | stats avg(avg_state_age) as avg_adult_country_age by country -project ageDistribByCountry using parquet OPTIONS('parquet.bloom.filter.enabled'='true', 'parquet.bloom.filter.enabled#age'='false') partitioned by (age, country) location 's://demo-app/my-bucket'| +view ageDistribByCountry using parquet OPTIONS('parquet.bloom.filter.enabled'='true', 'parquet.bloom.filter.enabled#age'='false') partitioned by (age, country) location 's://demo-app/my-bucket'| source = table | stats avg(age) as avg_city_age by country, state, city | eval new_avg_city_age = avg_city_age - 1 | stats avg(new_avg_city_age) as avg_state_age by country, state | where avg_state_age > 18 | stats avg(avg_state_age) as avg_adult_country_age by country @@ -60,7 +60,7 @@ project ageDistribByCountry using parquet OPTIONS('parquet.bloom.filter.enabled' ``` ### Effective SQL push-down query -The project command is translated into an equivalent SQL `create table [Using ] As ` as shown here: +The view command is translated into an equivalent SQL `create table [Using ] As ` as shown here: ```sql CREATE TABLE [ IF NOT EXISTS ] table_identifier diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLProjectStatementITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLViewStatementITSuite.scala similarity index 95% rename from integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLProjectStatementITSuite.scala rename to integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLViewStatementITSuite.scala index b639faf7a..6ba424b79 100644 --- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLProjectStatementITSuite.scala +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLViewStatementITSuite.scala @@ -18,7 +18,7 @@ import org.apache.spark.sql.execution.ExplainMode import org.apache.spark.sql.execution.command.{DescribeTableCommand, ExplainCommand} import org.apache.spark.sql.streaming.StreamTest -class FlintSparkPPLProjectStatementITSuite +class FlintSparkPPLViewStatementITSuite extends QueryTest with LogicalPlanTestUtils with FlintPPLSuite @@ -78,7 +78,7 @@ class FlintSparkPPLProjectStatementITSuite } } - ignore("project sql test using csv") { + ignore("view sql test using csv") { val viewLocation = viewFolderLocation.toAbsolutePath.toString val frame = sql(s""" | CREATE TABLE student_partition_bucket @@ -107,9 +107,9 @@ class FlintSparkPPLProjectStatementITSuite comparePlans(logicalPlan, expectedPlan, checkAnalysis = false) } - test("project using csv") { + test("view using csv") { val frame = sql(s""" - | project $viewName using csv | source = $testTable | where state != 'California' | fields name + | view $viewName using csv | source = $testTable | where state != 'California' | fields name | """.stripMargin) // Retrieve the logical plan @@ -168,9 +168,9 @@ class FlintSparkPPLProjectStatementITSuite "Partitioning does not contain ay FieldReferences") } - test("project using csv partition by age") { + test("view using csv partition by age") { val frame = sql(s""" - | project $viewName using csv partitioned by (age) | source = $testTable | where state != 'California' | fields name, age + | view $viewName using csv partitioned by (age) | source = $testTable | where state != 'California' | fields name, age | """.stripMargin) // Retrieve the logical plan @@ -237,9 +237,9 @@ class FlintSparkPPLProjectStatementITSuite "Partitioning does not contain a FieldReferences: 'age'") } - test("project using csv partition by state and country") { + test("view using csv partition by state and country") { val frame = sql(s""" - |project $viewName using csv partitioned by (state, country) | source = $testTable | dedup name | fields name, state, country + |view $viewName using csv partitioned by (state, country) | source = $testTable | dedup name | fields name, state, country | """.stripMargin) frame.collect() @@ -347,9 +347,9 @@ class FlintSparkPPLProjectStatementITSuite "Partitioning does not contain a FieldReferences: 'name'") } - test("project using parquet partition by state & country") { + test("view using parquet partition by state & country") { val frame = sql(s""" - |project $viewName using parquet partitioned by (state, country) | source = $testTable | dedup name | fields name, state, country + |view $viewName using parquet partitioned by (state, country) | source = $testTable | dedup name | fields name, state, country | """.stripMargin) frame.collect() @@ -457,9 +457,9 @@ class FlintSparkPPLProjectStatementITSuite "Partitioning does not contain a FieldReferences: 'name'") } - test("project using parquet with options & partition by state & country") { + test("view using parquet with options & partition by state & country") { val frame = sql(s""" - | project $viewName using parquet OPTIONS('parquet.bloom.filter.enabled'='true', 'parquet.bloom.filter.enabled#age'='false') + | view $viewName using parquet OPTIONS('parquet.bloom.filter.enabled'='true', 'parquet.bloom.filter.enabled#age'='false') | partitioned by (state, country) | source = $testTable | dedup name | fields name, state, country | """.stripMargin) @@ -571,10 +571,10 @@ class FlintSparkPPLProjectStatementITSuite "Partitioning does not contain a FieldReferences: 'name'") } - test("project using parquet with options & location with partition by state & country") { + test("view using parquet with options & location with partition by state & country") { val viewLocation = viewFolderLocation.toAbsolutePath.toString val frame = sql(s""" - | project $viewName using parquet OPTIONS('parquet.bloom.filter.enabled'='true', 'parquet.bloom.filter.enabled#age'='false') + | view $viewName using parquet OPTIONS('parquet.bloom.filter.enabled'='true', 'parquet.bloom.filter.enabled#age'='false') | partitioned by (state, country) location '$viewLocation' | source = $testTable | dedup name | fields name, state, country | """.stripMargin) @@ -688,7 +688,7 @@ class FlintSparkPPLProjectStatementITSuite test("test inner join with relation subquery") { val viewLocation = viewFolderLocation.toAbsolutePath.toString val frame = sql(s""" - | project $viewName using parquet OPTIONS('parquet.bloom.filter.enabled'='true') + | view $viewName using parquet OPTIONS('parquet.bloom.filter.enabled'='true') | partitioned by (age_span) location '$viewLocation' | | source = $testTable1 | | where country = 'USA' OR country = 'England' diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 index 1933fa5d2..1aa6568d4 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 @@ -28,7 +28,7 @@ TOP: 'TOP'; RARE_APPROX: 'RARE_APPROX'; RARE: 'RARE'; PARSE: 'PARSE'; -PROJECT: 'PROJECT'; +VIEW: 'VIEW'; METHOD: 'METHOD'; REGEX: 'REGEX'; PUNCT: 'PUNCT'; diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 index 12a503b05..bf6f45dbd 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 @@ -17,7 +17,7 @@ pplStatement ; dmlStatement - : (explainCommand PIPE | projectCommand PIPE)? queryStatement + : (explainCommand PIPE | viewCommand PIPE)? queryStatement ; queryStatement @@ -194,8 +194,8 @@ grokCommand : GROK (source_field = expression) (pattern = stringLiteral) ; -projectCommand - : PROJECT (IF NOT EXISTS)? tableQualifiedName (USING datasourceValues)? (OPTIONS options=tablePropertyList)? (PARTITIONED BY partitionColumnNames=identifierSeq)? locationSpec? +viewCommand + : VIEW (IF NOT EXISTS)? tableQualifiedName (USING datasourceValues)? (OPTIONS options=tablePropertyList)? (PARTITIONED BY partitionColumnNames=identifierSeq)? locationSpec? ; locationSpec diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index b71c0eb7f..36789a9bf 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -20,7 +20,7 @@ import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.FieldList; import org.opensearch.sql.ast.expression.LambdaFunction; -import org.opensearch.sql.ast.statement.ProjectStatement; +import org.opensearch.sql.ast.statement.ViewStatement; import org.opensearch.sql.ast.tree.FieldSummary; import org.opensearch.sql.ast.expression.FieldsMapping; import org.opensearch.sql.ast.expression.Function; @@ -56,7 +56,7 @@ import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.Parse; -import org.opensearch.sql.ast.tree.Project; +import org.opensearch.sql.ast.tree.View; import org.opensearch.sql.ast.tree.RareTopN; import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; @@ -138,11 +138,11 @@ public T visitSubqueryAlias(SubqueryAlias node, C context) { return visitChildren(node, context); } - public T visitProject(Project node, C context) { + public T visitView(View node, C context) { return visitChildren(node, context); } - public T visitProjectStatement(ProjectStatement node, C context) { + public T visitProjectStatement(ViewStatement node, C context) { return visitChildren(node, context); } diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/statement/ProjectStatement.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/statement/ViewStatement.java similarity index 84% rename from ppl-spark-integration/src/main/java/org/opensearch/sql/ast/statement/ProjectStatement.java rename to ppl-spark-integration/src/main/java/org/opensearch/sql/ast/statement/ViewStatement.java index 967eafbbe..af8729da8 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/statement/ProjectStatement.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/statement/ViewStatement.java @@ -25,7 +25,7 @@ */ @Data @EqualsAndHashCode(callSuper = false) -public class ProjectStatement extends Statement implements TableIdentifier { +public class ViewStatement extends Statement implements TableIdentifier { private final Statement statement; private final boolean override; @@ -36,9 +36,9 @@ public class ProjectStatement extends Statement implements TableIdentifier { private final Optional partitionColumns; private final Optional location; - public ProjectStatement(List tableNames, Optional using, - Optional options, Optional partitionColumns, - Optional location, Query statement, boolean override) { + public ViewStatement(List tableNames, Optional using, + Optional options, Optional partitionColumns, + Optional location, Query statement, boolean override) { this.tableNames = tableNames; this.using = using.map(p->DataSourceType.valueOf(p.toString())); this.options = options; diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/Project.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/View.java similarity index 76% rename from ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/Project.java rename to ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/View.java index 47e83ca5f..e6da38274 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/Project.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/View.java @@ -21,18 +21,18 @@ @ToString @Getter @EqualsAndHashCode(callSuper = false) -public class Project extends UnresolvedPlan { - @Setter private List projectList; +public class View extends UnresolvedPlan { + @Setter private List viewList; private List argExprList; private UnresolvedPlan child; - public Project(List projectList) { - this.projectList = projectList; + public View(List viewList) { + this.viewList = viewList; this.argExprList = Collections.emptyList(); } - public Project(List projectList, List argExprList) { - this.projectList = projectList; + public View(List viewList, List argExprList) { + this.viewList = viewList; this.argExprList = argExprList; } @@ -50,7 +50,7 @@ public boolean isExcluded() { } @Override - public Project attach(UnresolvedPlan child) { + public View attach(UnresolvedPlan child) { this.child = child; return this; } @@ -63,6 +63,6 @@ public List getChild() { @Override public T accept(AbstractNodeVisitor nodeVisitor, C context) { - return nodeVisitor.visitProject(this, context); + return nodeVisitor.visitView(this, context); } } diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java index 1c99f657d..b4a438081 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java @@ -7,7 +7,6 @@ import org.apache.spark.sql.catalyst.TableIdentifier; import org.apache.spark.sql.catalyst.analysis.UnresolvedFunction; -import org.apache.spark.sql.catalyst.analysis.UnresolvedIdentifier; import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation; import org.apache.spark.sql.catalyst.analysis.UnresolvedStar$; import org.apache.spark.sql.catalyst.expressions.Ascending$; @@ -19,7 +18,6 @@ import org.apache.spark.sql.catalyst.expressions.SortDirection; import org.apache.spark.sql.catalyst.expressions.SortOrder; import org.apache.spark.sql.catalyst.plans.logical.Aggregate; -import org.apache.spark.sql.catalyst.plans.logical.CreateTableAsSelect; import org.apache.spark.sql.catalyst.plans.logical.DataFrameDropColumns$; import org.apache.spark.sql.catalyst.plans.logical.DescribeRelation$; import org.apache.spark.sql.catalyst.plans.logical.Generate; @@ -27,9 +25,6 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan; import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$; import org.apache.spark.sql.catalyst.plans.logical.Project$; -import org.apache.spark.sql.catalyst.plans.logical.UnresolvedTableSpec; -import org.apache.spark.sql.connector.expressions.FieldReference; -import org.apache.spark.sql.connector.expressions.IdentityTransform; import org.apache.spark.sql.execution.ExplainMode; import org.apache.spark.sql.execution.command.DescribeTableCommand; import org.apache.spark.sql.execution.command.ExplainCommand; @@ -49,7 +44,7 @@ import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.expression.WindowFunction; import org.opensearch.sql.ast.statement.Explain; -import org.opensearch.sql.ast.statement.ProjectStatement; +import org.opensearch.sql.ast.statement.ViewStatement; import org.opensearch.sql.ast.statement.Query; import org.opensearch.sql.ast.statement.Statement; import org.opensearch.sql.ast.tree.Aggregation; @@ -67,7 +62,7 @@ import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.Parse; -import org.opensearch.sql.ast.tree.Project; +import org.opensearch.sql.ast.tree.View; import org.opensearch.sql.ast.tree.RareAggregation; import org.opensearch.sql.ast.tree.RareTopN; import org.opensearch.sql.ast.tree.Relation; @@ -79,7 +74,7 @@ import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.ppl.utils.FieldSummaryTransformer; import org.opensearch.sql.ppl.utils.ParseTransformer; -import org.opensearch.sql.ppl.utils.ProjectionUtils; +import org.opensearch.sql.ppl.utils.ViewUtils; import org.opensearch.sql.ppl.utils.SortUtils; import org.opensearch.sql.ppl.utils.TrendlineCatalystUtils; import org.opensearch.sql.ppl.utils.WindowSpecTransformer; @@ -138,9 +133,9 @@ public LogicalPlan visitFirstChild(Node node, CatalystPlanContext context) { } @Override - public LogicalPlan visitProjectStatement(ProjectStatement node, CatalystPlanContext context) { + public LogicalPlan visitProjectStatement(ViewStatement node, CatalystPlanContext context) { node.getStatement().accept(this, context); - return context.apply(p -> ProjectionUtils.visitProject(p, node, context)); + return context.apply(p -> ViewUtils.visitView(p, node, context)); } @Override @@ -385,11 +380,11 @@ public LogicalPlan visitAlias(Alias node, CatalystPlanContext context) { } @Override - public LogicalPlan visitProject(Project node, CatalystPlanContext context) { + public LogicalPlan visitView(View node, CatalystPlanContext context) { //update plan's context prior to visiting node children if (node.isExcluded()) { List intersect = context.getProjectedFields().stream() - .filter(node.getProjectList()::contains) + .filter(node.getViewList()::contains) .collect(Collectors.toList()); if (!intersect.isEmpty()) { // Fields in parent projection, but they have be excluded in child. For example, @@ -397,10 +392,10 @@ public LogicalPlan visitProject(Project node, CatalystPlanContext context) { throw new SyntaxCheckException(intersect + " can't be resolved"); } } else { - context.withProjectedFields(node.getProjectList()); + context.withProjectedFields(node.getViewList()); } LogicalPlan child = visitFirstChild(node, context); - visitExpressionList(node.getProjectList(), context); + visitExpressionList(node.getViewList(), context); // Create a projection list from the existing expressions Seq projectList = seq(context.getNamedParseExpressions()); diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 7b6b7c863..fd9240622 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -22,7 +22,7 @@ import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.EqualTo; import org.opensearch.sql.ast.expression.Field; -import org.opensearch.sql.ast.statement.ProjectStatement; +import org.opensearch.sql.ast.statement.ViewStatement; import org.opensearch.sql.ast.statement.Query; import org.opensearch.sql.ast.statement.Statement; import org.opensearch.sql.ast.tree.FieldSummary; @@ -49,7 +49,7 @@ import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.Parse; -import org.opensearch.sql.ast.tree.Project; +import org.opensearch.sql.ast.tree.View; import org.opensearch.sql.ast.tree.RareAggregation; import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; @@ -236,7 +236,7 @@ private Join.JoinType getJoinType(OpenSearchPPLParser.JoinTypeContext ctx) { /** Fields command. */ @Override public UnresolvedPlan visitFieldsCommand(OpenSearchPPLParser.FieldsCommandContext ctx) { - return new Project( + return new View( ctx.fieldList().fieldExpression().stream() .map(this::internalVisitExpression) .collect(Collectors.toList()), @@ -632,14 +632,14 @@ private String getTextInQuery(ParserRuleContext ctx) { return query.substring(start.getStartIndex(), stop.getStopIndex() + 1); } - public Statement buildProjectStatement(Query query, OpenSearchPPLParser.ProjectCommandContext ctx) { + public Statement buildProjectStatement(Query query, OpenSearchPPLParser.ViewCommandContext ctx) { List list = Collections.singletonList(internalVisitExpression(ctx.tableQualifiedName())); boolean override = (ctx.IF()==null); Optional using = (ctx.USING()!=null) ? Optional.of(expressionBuilder.visit(ctx.datasourceValues())) : Optional.empty(); Optional options = (ctx.options!=null) ? Optional.of( expressionBuilder.visit(ctx.options)) : Optional.empty(); Optional partitionColumns = (ctx.partitionColumnNames!=null) ? Optional.of(internalVisitExpression(ctx.partitionColumnNames)) : Optional.empty(); Optional location = (ctx.locationSpec()!=null) ? Optional.of(expressionBuilder.visit(ctx.locationSpec())) : Optional.empty(); - return new ProjectStatement(list, using, options, partitionColumns, location, query, override); + return new ViewStatement(list, using, options, partitionColumns, location, query, override); } } diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java index 032cdf38a..3f8ed60ad 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstStatementBuilder.java @@ -13,11 +13,10 @@ import org.opensearch.flint.spark.ppl.OpenSearchPPLParserBaseVisitor; import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.statement.Explain; -import org.opensearch.sql.ast.statement.ProjectStatement; import org.opensearch.sql.ast.statement.Query; import org.opensearch.sql.ast.statement.Statement; import org.opensearch.sql.ast.tree.DescribeRelation; -import org.opensearch.sql.ast.tree.Project; +import org.opensearch.sql.ast.tree.View; import org.opensearch.sql.ast.tree.UnresolvedPlan; /** Build {@link Statement} from PPL Query. */ @@ -41,7 +40,7 @@ public Statement visitDmlStatement(OpenSearchPPLParser.DmlStatementContext ctx) if (explainContext != null) { return new Explain(query, explainContext.explainMode().getText()); } - OpenSearchPPLParser.ProjectCommandContext projectContext = ctx.projectCommand(); + OpenSearchPPLParser.ViewCommandContext projectContext = ctx.viewCommand(); if (projectContext != null) { return astBuilder.buildProjectStatement(query, projectContext); } @@ -84,12 +83,12 @@ public Object build() { } private UnresolvedPlan addSelectAll(UnresolvedPlan plan) { - if ((plan instanceof Project) && !((Project) plan).isExcluded()) { + if ((plan instanceof View) && !((View) plan).isExcluded()) { return plan; } else if (plan instanceof DescribeRelation) { return plan; } else { - return new Project(ImmutableList.of(AllFields.of())).attach(plan); + return new View(ImmutableList.of(AllFields.of())).attach(plan); } } } diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ProjectionUtils.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ViewUtils.java similarity index 92% rename from ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ProjectionUtils.java rename to ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ViewUtils.java index 0f17b5a6e..47dfaee6e 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ProjectionUtils.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/ViewUtils.java @@ -21,7 +21,7 @@ import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.AttributeList; import org.opensearch.sql.ast.expression.UnresolvedExpression; -import org.opensearch.sql.ast.statement.ProjectStatement; +import org.opensearch.sql.ast.statement.ViewStatement; import org.opensearch.sql.ppl.CatalystPlanContext; import scala.Option; import scala.Tuple2; @@ -36,17 +36,17 @@ import static org.opensearch.sql.ppl.utils.DataTypeTransformer.option; import static org.opensearch.sql.ppl.utils.DataTypeTransformer.seq; -public interface ProjectionUtils { +public interface ViewUtils { /** - * build a CreateTableAsSelect operator base on the ProjectStatement node + * build a CreateTableAsSelect operator base on the ViewStatement node * * 'CreateTableAsSelect [identity(age)], unresolvedtablespec(Some(parquet), optionlist(), None, None, None, false), false, false * :- 'UnresolvedIdentifier [student_partition_bucket], false * - 'Project [*] * - 'UnresolvedRelation [spark_catalog, default, flint_ppl_test], [], false * */ - static CreateTableAsSelect visitProject(LogicalPlan plan, ProjectStatement node, CatalystPlanContext context) { + static CreateTableAsSelect visitView(LogicalPlan plan, ViewStatement node, CatalystPlanContext context) { Optional using = node.getUsing().map(Enum::name); Optional options = node.getOptions(); Optional partitionColumns = node.getPartitionColumns(); diff --git a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanProjectQueriesTranslatorTestSuite.scala b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanViewQueriesTranslatorTestSuite.scala similarity index 90% rename from ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanProjectQueriesTranslatorTestSuite.scala rename to ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanViewQueriesTranslatorTestSuite.scala index c3fb99a8a..78e0ba4f5 100644 --- a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanProjectQueriesTranslatorTestSuite.scala +++ b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanViewQueriesTranslatorTestSuite.scala @@ -20,7 +20,7 @@ import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, IdentityTransform, NamedReference, Transform} -class PPLLogicalPlanProjectQueriesTranslatorTestSuite +class PPLLogicalPlanViewQueriesTranslatorTestSuite extends SparkFunSuite with PlanTest with LogicalPlanTestUtils @@ -33,13 +33,13 @@ class PPLLogicalPlanProjectQueriesTranslatorTestSuite private val pplParser = new PPLSyntaxParser() private val viewFolderLocation = Paths.get(".", "spark-warehouse", "student_partition_bucket") - test("test project a simple search with only one table using csv ") { + test("test view a simple search with only one table using csv ") { // if successful build ppl logical plan and translate to catalyst logical plan val context = new CatalystPlanContext val logPlan = planTransformer.visit( plan( pplParser, - "project simpleView using csv | source = table | where state != 'California' | fields name"), + "view simpleView using csv | source = table | where state != 'California' | fields name"), context) // Define the expected logical plan @@ -81,13 +81,13 @@ class PPLLogicalPlanProjectQueriesTranslatorTestSuite "Partitioning does not contain ay FieldReferences") } - test("test project a simple search with only one table using csv and partitioned field ") { + test("test view a simple search with only one table using csv and partitioned field ") { // if successful build ppl logical plan and translate to catalyst logical plan val context = new CatalystPlanContext val logPlan = planTransformer.visit( plan( pplParser, - "project if not exists simpleView using csv partitioned by (age) | source = table | where state != 'California' "), + "view if not exists simpleView using csv partitioned by (age) | source = table | where state != 'California' "), context) // Define the expected logical plan @@ -138,13 +138,13 @@ class PPLLogicalPlanProjectQueriesTranslatorTestSuite } test( - "test project a simple search with only one table using csv and multiple partitioned fields ") { + "test view a simple search with only one table using csv and multiple partitioned fields ") { // if successful build ppl logical plan and translate to catalyst logical plan val context = new CatalystPlanContext val logPlan = planTransformer.visit( plan( pplParser, - "project if not exists simpleView using csv partitioned by (age, country) | source = table | where state != 'California' "), + "view if not exists simpleView using csv partitioned by (age, country) | source = table | where state != 'California' "), context) // Define the expected logical plan @@ -196,13 +196,13 @@ class PPLLogicalPlanProjectQueriesTranslatorTestSuite } test( - "test project a simple search with only one table using parquet and Options with multiple partitioned fields ") { + "test view a simple search with only one table using parquet and Options with multiple partitioned fields ") { // if successful build ppl logical plan and translate to catalyst logical plan val context = new CatalystPlanContext val logPlan = planTransformer.visit( plan( pplParser, - "project if not exists simpleView using parquet OPTIONS('parquet.bloom.filter.enabled'='true', 'parquet.bloom.filter.enabled#age'='false') " + + "view if not exists simpleView using parquet OPTIONS('parquet.bloom.filter.enabled'='true', 'parquet.bloom.filter.enabled#age'='false') " + " partitioned by (age, country) | source = table | where state != 'California' "), context) @@ -258,14 +258,14 @@ class PPLLogicalPlanProjectQueriesTranslatorTestSuite } test( - "test project a simple search with only one table using parquet with location and Options with multiple partitioned fields ") { + "test view a simple search with only one table using parquet with location and Options with multiple partitioned fields ") { val viewLocation = viewFolderLocation.toAbsolutePath.toString val context = new CatalystPlanContext val logPlan = planTransformer.visit( plan( pplParser, s""" - | project if not exists simpleView using parquet OPTIONS('parquet.bloom.filter.enabled'='true', 'parquet.bloom.filter.enabled#age'='false') + | view if not exists simpleView using parquet OPTIONS('parquet.bloom.filter.enabled'='true', 'parquet.bloom.filter.enabled#age'='false') | partitioned by (age, country) location '$viewLocation' | source = table | where state != 'California' """.stripMargin), context) @@ -322,14 +322,14 @@ class PPLLogicalPlanProjectQueriesTranslatorTestSuite } test( - "test project with inner join: join condition with table names and predicates using parquet with location and Options with single partitioned fields") { + "test view with inner join: join condition with table names and predicates using parquet with location and Options with single partitioned fields") { val viewLocation = viewFolderLocation.toAbsolutePath.toString val context = new CatalystPlanContext val logPlan = planTransformer.visit( plan( pplParser, s""" - | project if not exists simpleView using parquet OPTIONS('parquet.bloom.filter.enabled'='true') + | view if not exists simpleView using parquet OPTIONS('parquet.bloom.filter.enabled'='true') | partitioned by (name) location '$viewLocation' | | source = $testTable1| INNER JOIN left = l right = r ON $testTable1.id = $testTable2.id AND $testTable1.count > 10 AND lower($testTable2.name) = 'hello' $testTable2 | """.stripMargin),