Handle ALTER Index Queries in SQL Plugin (#2554)

Signed-off-by: Vamsi Manohar <[email protected]>
opensearch-project · Mar 18, 2024 · d11a268 · d11a268
1 parent a84c3ef
commit d11a268
Show file tree

Hide file tree

Showing 44 changed files with 2,546 additions and 760 deletions.
diff --git a/build.gradle b/build.gradle
@@ -117,6 +117,7 @@ allprojects {
     configurations.all {
         resolutionStrategy.force "org.jetbrains.kotlin:kotlin-stdlib:1.9.10"
         resolutionStrategy.force "org.jetbrains.kotlin:kotlin-stdlib-common:1.9.10"
+        resolutionStrategy.force "net.bytebuddy:byte-buddy:1.14.9"
     }
 }
 

diff --git a/spark/src/main/antlr/FlintSparkSqlExtensions.g4 b/spark/src/main/antlr/FlintSparkSqlExtensions.g4
@@ -26,8 +26,10 @@ skippingIndexStatement
     : createSkippingIndexStatement
     | refreshSkippingIndexStatement
     | describeSkippingIndexStatement
+    | alterSkippingIndexStatement
     | dropSkippingIndexStatement
     | vacuumSkippingIndexStatement
+    | analyzeSkippingIndexStatement
     ;
 
 createSkippingIndexStatement
@@ -46,6 +48,12 @@ describeSkippingIndexStatement
     : (DESC | DESCRIBE) SKIPPING INDEX ON tableName
     ;
 
+alterSkippingIndexStatement
+    : ALTER SKIPPING INDEX
+        ON tableName
+        WITH LEFT_PAREN propertyList RIGHT_PAREN
+    ;
+
 dropSkippingIndexStatement
     : DROP SKIPPING INDEX ON tableName
     ;
@@ -59,6 +67,7 @@ coveringIndexStatement
     | refreshCoveringIndexStatement
     | showCoveringIndexStatement
     | describeCoveringIndexStatement
+    | alterCoveringIndexStatement
     | dropCoveringIndexStatement
     | vacuumCoveringIndexStatement
     ;
@@ -83,6 +92,12 @@ describeCoveringIndexStatement
     : (DESC | DESCRIBE) INDEX indexName ON tableName
     ;
 
+alterCoveringIndexStatement
+    : ALTER INDEX indexName
+        ON tableName
+        WITH LEFT_PAREN propertyList RIGHT_PAREN
+    ;
+
 dropCoveringIndexStatement
     : DROP INDEX indexName ON tableName
     ;
@@ -91,11 +106,16 @@ vacuumCoveringIndexStatement
     : VACUUM INDEX indexName ON tableName
     ;
 
+analyzeSkippingIndexStatement
+    : ANALYZE SKIPPING INDEX ON tableName
+    ;
+
 materializedViewStatement
     : createMaterializedViewStatement
     | refreshMaterializedViewStatement
     | showMaterializedViewStatement
     | describeMaterializedViewStatement
+    | alterMaterializedViewStatement
     | dropMaterializedViewStatement
     | vacuumMaterializedViewStatement
     ;
@@ -118,6 +138,11 @@ describeMaterializedViewStatement
     : (DESC | DESCRIBE) MATERIALIZED VIEW mvName=multipartIdentifier
     ;
 
+alterMaterializedViewStatement
+    : ALTER MATERIALIZED VIEW mvName=multipartIdentifier
+        WITH LEFT_PAREN propertyList RIGHT_PAREN
+    ;
+
 dropMaterializedViewStatement
     : DROP MATERIALIZED VIEW mvName=multipartIdentifier
     ;
@@ -163,7 +188,7 @@ indexColTypeList
     ;
 
 indexColType
-    : identifier skipType=(PARTITION | VALUE_SET | MIN_MAX)
+    : identifier skipType=(PARTITION | VALUE_SET | MIN_MAX | BLOOM_FILTER)
         (LEFT_PAREN skipParams RIGHT_PAREN)?
     ;
 

diff --git a/spark/src/main/antlr/SparkSqlBase.g4 b/spark/src/main/antlr/SparkSqlBase.g4
@@ -139,6 +139,7 @@ nonReserved
 
 // Flint lexical tokens
 
+BLOOM_FILTER: 'BLOOM_FILTER';
 MIN_MAX: 'MIN_MAX';
 SKIPPING: 'SKIPPING';
 VALUE_SET: 'VALUE_SET';
@@ -155,6 +156,8 @@ DOT: '.';
 
 
 AS: 'AS';
+ALTER: 'ALTER';
+ANALYZE: 'ANALYZE';
 CREATE: 'CREATE';
 DESC: 'DESC';
 DESCRIBE: 'DESCRIBE';

diff --git a/spark/src/main/antlr/SqlBaseLexer.g4 b/spark/src/main/antlr/SqlBaseLexer.g4
@@ -79,6 +79,7 @@ COMMA: ',';
 DOT: '.';
 LEFT_BRACKET: '[';
 RIGHT_BRACKET: ']';
+BANG: '!';
 
 // NOTE: If you add a new token in the list below, you should update the list of keywords
 // and reserved tag in `docs/sql-ref-ansi-compliance.md#sql-keywords`, and
@@ -273,7 +274,7 @@ NANOSECOND: 'NANOSECOND';
 NANOSECONDS: 'NANOSECONDS';
 NATURAL: 'NATURAL';
 NO: 'NO';
-NOT: 'NOT' | '!';
+NOT: 'NOT';
 NULL: 'NULL';
 NULLS: 'NULLS';
 NUMERIC: 'NUMERIC';
@@ -510,8 +511,13 @@ BIGDECIMAL_LITERAL
     | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}?
     ;
 
+// Generalize the identifier to give a sensible INVALID_IDENTIFIER error message:
+// * Unicode letters rather than a-z and A-Z only
+// * URI paths for table references using paths
+// We then narrow down to ANSI rules in exitUnquotedIdentifier() in the parser.
 IDENTIFIER
-    : (LETTER | DIGIT | '_')+
+    : (UNICODE_LETTER | DIGIT | '_')+
+    | UNICODE_LETTER+ '://' (UNICODE_LETTER | DIGIT | '_' | '/' | '-' | '.' | '?' | '=' | '&' | '#' | '%')+
     ;
 
 BACKQUOTED_IDENTIFIER
@@ -535,6 +541,10 @@ fragment LETTER
     : [A-Z]
     ;
 
+fragment UNICODE_LETTER
+    : [\p{L}]
+    ;
+
 SIMPLE_COMMENT
     : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN)
     ;

diff --git a/spark/src/main/antlr/SqlBaseParser.g4 b/spark/src/main/antlr/SqlBaseParser.g4
@@ -388,6 +388,7 @@ describeFuncName
     | comparisonOperator
     | arithmeticOperator
     | predicateOperator
+    | BANG
     ;
 
 describeColName
@@ -946,7 +947,7 @@ expressionSeq
     ;
 
 booleanExpression
-    : NOT booleanExpression                                        #logicalNot
+    : (NOT | BANG) booleanExpression                               #logicalNot
     | EXISTS LEFT_PAREN query RIGHT_PAREN                          #exists
     | valueExpression predicate?                                   #predicated
     | left=booleanExpression operator=AND right=booleanExpression  #logicalBinary

diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandler.java b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandler.java
@@ -10,11 +10,12 @@
 import static org.opensearch.sql.spark.execution.statestore.StateStore.createIndexDMLResult;
 
 import com.amazonaws.services.emrserverless.model.JobRunState;
+import java.util.Map;
 import lombok.RequiredArgsConstructor;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.json.JSONObject;
-import org.opensearch.client.Client;
 import org.opensearch.sql.datasource.model.DataSourceMetadata;
 import org.opensearch.sql.spark.asyncquery.model.AsyncQueryId;
 import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata;
@@ -27,58 +28,71 @@
 import org.opensearch.sql.spark.execution.statement.StatementState;
 import org.opensearch.sql.spark.execution.statestore.StateStore;
 import org.opensearch.sql.spark.flint.FlintIndexMetadata;
-import org.opensearch.sql.spark.flint.FlintIndexMetadataReader;
+import org.opensearch.sql.spark.flint.FlintIndexMetadataService;
 import org.opensearch.sql.spark.flint.operation.FlintIndexOp;
-import org.opensearch.sql.spark.flint.operation.FlintIndexOpCancel;
-import org.opensearch.sql.spark.flint.operation.FlintIndexOpDelete;
+import org.opensearch.sql.spark.flint.operation.FlintIndexOpAlter;
+import org.opensearch.sql.spark.flint.operation.FlintIndexOpDrop;
 import org.opensearch.sql.spark.response.JobExecutionResponseReader;
 
 /** Handle Index DML query. includes * DROP * ALT? */
 @RequiredArgsConstructor
 public class IndexDMLHandler extends AsyncQueryHandler {
   private static final Logger LOG = LogManager.getLogger();
 
+  // To be deprecated in 3.0. Still using for backward compatibility.
   public static final String DROP_INDEX_JOB_ID = "dropIndexJobId";
+  public static final String DML_QUERY_JOB_ID = "DMLQueryJobId";
 
   private final EMRServerlessClient emrServerlessClient;
 
   private final JobExecutionResponseReader jobExecutionResponseReader;
 
-  private final FlintIndexMetadataReader flintIndexMetadataReader;
-
-  private final Client client;
+  private final FlintIndexMetadataService flintIndexMetadataService;
 
   private final StateStore stateStore;
 
   public static boolean isIndexDMLQuery(String jobId) {
-    return DROP_INDEX_JOB_ID.equalsIgnoreCase(jobId);
+    return DROP_INDEX_JOB_ID.equalsIgnoreCase(jobId) || DML_QUERY_JOB_ID.equalsIgnoreCase(jobId);
   }
 
   @Override
   public DispatchQueryResponse submit(
       DispatchQueryRequest dispatchQueryRequest, DispatchQueryContext context) {
     DataSourceMetadata dataSourceMetadata = context.getDataSourceMetadata();
-    IndexQueryDetails indexDetails = context.getIndexQueryDetails();
-    FlintIndexMetadata indexMetadata = flintIndexMetadataReader.getFlintIndexMetadata(indexDetails);
-    // if index is created without auto refresh. there is no job to cancel.
-    String status = JobRunState.FAILED.toString();
-    String error = "";
-    long startTime = 0L;
+    long startTime = System.currentTimeMillis();
     try {
-      FlintIndexOp jobCancelOp =
-          new FlintIndexOpCancel(
-              stateStore, dispatchQueryRequest.getDatasource(), emrServerlessClient);
-      jobCancelOp.apply(indexMetadata);
-
-      FlintIndexOp indexDeleteOp =
-          new FlintIndexOpDelete(stateStore, dispatchQueryRequest.getDatasource());
-      indexDeleteOp.apply(indexMetadata);
-      status = JobRunState.SUCCESS.toString();
+      IndexQueryDetails indexDetails = context.getIndexQueryDetails();
+      FlintIndexMetadata indexMetadata = getFlintIndexMetadata(indexDetails);
+      executeIndexOp(dispatchQueryRequest, indexDetails, indexMetadata);
+      AsyncQueryId asyncQueryId =
+          storeIndexDMLResult(
+              dispatchQueryRequest,
+              dataSourceMetadata,
+              JobRunState.SUCCESS.toString(),
+              StringUtils.EMPTY,
+              startTime);
+      return new DispatchQueryResponse(
+          asyncQueryId, DML_QUERY_JOB_ID, dataSourceMetadata.getResultIndex(), null);
     } catch (Exception e) {
-      error = e.getMessage();
-      LOG.error(e);
+      LOG.error(e.getMessage());
+      AsyncQueryId asyncQueryId =
+          storeIndexDMLResult(
+              dispatchQueryRequest,
+              dataSourceMetadata,
+              JobRunState.FAILED.toString(),
+              e.getMessage(),
+              startTime);
+      return new DispatchQueryResponse(
+          asyncQueryId, DML_QUERY_JOB_ID, dataSourceMetadata.getResultIndex(), null);
     }
+  }
 
+  private AsyncQueryId storeIndexDMLResult(
+      DispatchQueryRequest dispatchQueryRequest,
+      DataSourceMetadata dataSourceMetadata,
+      String status,
+      String error,
+      long startTime) {
     AsyncQueryId asyncQueryId = AsyncQueryId.newAsyncQueryId(dataSourceMetadata.getName());
     IndexDMLResult indexDMLResult =
         new IndexDMLResult(
@@ -88,10 +102,48 @@ public DispatchQueryResponse submit(
             dispatchQueryRequest.getDatasource(),
             System.currentTimeMillis() - startTime,
             System.currentTimeMillis());
-    String resultIndex = dataSourceMetadata.getResultIndex();
-    createIndexDMLResult(stateStore, resultIndex).apply(indexDMLResult);
+    createIndexDMLResult(stateStore, dataSourceMetadata.getResultIndex()).apply(indexDMLResult);
+    return asyncQueryId;
+  }
 
-    return new DispatchQueryResponse(asyncQueryId, DROP_INDEX_JOB_ID, resultIndex, null);
+  private void executeIndexOp(
+      DispatchQueryRequest dispatchQueryRequest,
+      IndexQueryDetails indexQueryDetails,
+      FlintIndexMetadata indexMetadata) {
+    switch (indexQueryDetails.getIndexQueryActionType()) {
+      case DROP:
+        FlintIndexOp dropOp =
+            new FlintIndexOpDrop(
+                stateStore, dispatchQueryRequest.getDatasource(), emrServerlessClient);
+        dropOp.apply(indexMetadata);
+        break;
+      case ALTER:
+        FlintIndexOpAlter flintIndexOpAlter =
+            new FlintIndexOpAlter(
+                indexQueryDetails.getFlintIndexOptions(),
+                stateStore,
+                dispatchQueryRequest.getDatasource(),
+                emrServerlessClient,
+                flintIndexMetadataService);
+        flintIndexOpAlter.apply(indexMetadata);
+        break;
+      default:
+        throw new IllegalStateException(
+            String.format(
+                "IndexQueryActionType: %s is not supported in IndexDMLHandler.",
+                indexQueryDetails.getIndexQueryActionType()));
+    }
+  }
+
+  private FlintIndexMetadata getFlintIndexMetadata(IndexQueryDetails indexDetails) {
+    Map<String, FlintIndexMetadata> indexMetadataMap =
+        flintIndexMetadataService.getFlintIndexMetadata(indexDetails.openSearchIndexName());
+    if (!indexMetadataMap.containsKey(indexDetails.openSearchIndexName())) {
+      throw new IllegalStateException(
+          String.format(
+              "Couldn't fetch flint index: %s details", indexDetails.openSearchIndexName()));
+    }
+    return indexMetadataMap.get(indexDetails.openSearchIndexName());
   }
 
   @Override

diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/RefreshQueryHandler.java b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/RefreshQueryHandler.java
@@ -5,6 +5,7 @@
 
 package org.opensearch.sql.spark.dispatcher;
 
+import java.util.Map;
 import org.opensearch.sql.datasource.model.DataSourceMetadata;
 import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata;
 import org.opensearch.sql.spark.client.EMRServerlessClient;
@@ -14,7 +15,7 @@
 import org.opensearch.sql.spark.dispatcher.model.JobType;
 import org.opensearch.sql.spark.execution.statestore.StateStore;
 import org.opensearch.sql.spark.flint.FlintIndexMetadata;
-import org.opensearch.sql.spark.flint.FlintIndexMetadataReader;
+import org.opensearch.sql.spark.flint.FlintIndexMetadataService;
 import org.opensearch.sql.spark.flint.operation.FlintIndexOp;
 import org.opensearch.sql.spark.flint.operation.FlintIndexOpCancel;
 import org.opensearch.sql.spark.leasemanager.LeaseManager;
@@ -23,27 +24,33 @@
 /** Handle Refresh Query. */
 public class RefreshQueryHandler extends BatchQueryHandler {
 
-  private final FlintIndexMetadataReader flintIndexMetadataReader;
+  private final FlintIndexMetadataService flintIndexMetadataService;
   private final StateStore stateStore;
   private final EMRServerlessClient emrServerlessClient;
 
   public RefreshQueryHandler(
       EMRServerlessClient emrServerlessClient,
       JobExecutionResponseReader jobExecutionResponseReader,
-      FlintIndexMetadataReader flintIndexMetadataReader,
+      FlintIndexMetadataService flintIndexMetadataService,
       StateStore stateStore,
       LeaseManager leaseManager) {
     super(emrServerlessClient, jobExecutionResponseReader, leaseManager);
-    this.flintIndexMetadataReader = flintIndexMetadataReader;
+    this.flintIndexMetadataService = flintIndexMetadataService;
     this.stateStore = stateStore;
     this.emrServerlessClient = emrServerlessClient;
   }
 
   @Override
   public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) {
     String datasourceName = asyncQueryJobMetadata.getDatasourceName();
-    FlintIndexMetadata indexMetadata =
-        flintIndexMetadataReader.getFlintIndexMetadata(asyncQueryJobMetadata.getIndexName());
+    Map<String, FlintIndexMetadata> indexMetadataMap =
+        flintIndexMetadataService.getFlintIndexMetadata(asyncQueryJobMetadata.getIndexName());
+    if (!indexMetadataMap.containsKey(asyncQueryJobMetadata.getIndexName())) {
+      throw new IllegalStateException(
+          String.format(
+              "Couldn't fetch flint index: %s details", asyncQueryJobMetadata.getIndexName()));
+    }
+    FlintIndexMetadata indexMetadata = indexMetadataMap.get(asyncQueryJobMetadata.getIndexName());
     FlintIndexOp jobCancelOp =
         new FlintIndexOpCancel(stateStore, datasourceName, emrServerlessClient);
     jobCancelOp.apply(indexMetadata);