Restrict the scope of cancel API (#2548) (#2556)

* Restric cancel the scope of cancel API * Fix UT, batch query only been used for REFRESH * Update style * support cancel refresh query * fix UT * refactor code * update doc * refactor code --------- (cherry picked from commit a84c3ef) Signed-off-by: Peng Huo <[email protected]> Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
opensearch-project · Mar 14, 2024 · 1f11fbe · 1f11fbe
1 parent 89c8234
commit 1f11fbe
Show file tree

Hide file tree

Showing 12 changed files with 328 additions and 19 deletions.
diff --git a/docs/user/interfaces/asyncqueryinterface.rst b/docs/user/interfaces/asyncqueryinterface.rst
@@ -185,6 +185,9 @@ Async Query Cancellation API
 ======================================
 If security plugin is enabled, this API can only be invoked by users with permission ``cluster:admin/opensearch/ql/jobs/delete``.
 
+Limitation: Flint index creation statement with auto_refresh = true can not be cancelled. User could submit ALTER statement to stop auto refresh query.
+- flint index creation statement include, CREATE SKIPPING INDEX / CREATE INDEX / CREATE MATERIALIZED VIEW
+
 HTTP URI: ``_plugins/_async_query/{queryId}``
 
 HTTP VERB: ``DELETE``

diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImpl.java b/spark/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImpl.java
@@ -56,7 +56,10 @@ public CreateAsyncQueryResponse createAsyncQuery(
             sparkExecutionEngineConfig.getApplicationId(),
             dispatchQueryResponse.getJobId(),
             dispatchQueryResponse.getResultIndex(),
-            dispatchQueryResponse.getSessionId()));
+            dispatchQueryResponse.getSessionId(),
+            dispatchQueryResponse.getDatasourceName(),
+            dispatchQueryResponse.getJobType(),
+            dispatchQueryResponse.getIndexName()));
     return new CreateAsyncQueryResponse(
         dispatchQueryResponse.getQueryId().getId(), dispatchQueryResponse.getSessionId());
   }

diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryJobMetadata.java b/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryJobMetadata.java
@@ -8,30 +8,44 @@
 package org.opensearch.sql.spark.asyncquery.model;
 
 import static org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken;
+import static org.opensearch.sql.spark.execution.session.SessionModel.DATASOURCE_NAME;
 import static org.opensearch.sql.spark.execution.statement.StatementModel.QUERY_ID;
 
 import com.google.gson.Gson;
 import java.io.IOException;
+import java.util.Locale;
 import lombok.Data;
 import lombok.EqualsAndHashCode;
 import lombok.SneakyThrows;
+import org.opensearch.core.common.Strings;
 import org.opensearch.core.xcontent.XContentBuilder;
 import org.opensearch.core.xcontent.XContentParser;
 import org.opensearch.index.seqno.SequenceNumbers;
+import org.opensearch.sql.spark.dispatcher.model.JobType;
 import org.opensearch.sql.spark.execution.statestore.StateModel;
 
 /** This class models all the metadata required for a job. */
 @Data
 @EqualsAndHashCode(callSuper = false)
 public class AsyncQueryJobMetadata extends StateModel {
   public static final String TYPE_JOBMETA = "jobmeta";
+  public static final String JOB_TYPE = "jobType";
+  public static final String INDEX_NAME = "indexName";
 
   private final AsyncQueryId queryId;
   private final String applicationId;
   private final String jobId;
   private final String resultIndex;
   // optional sessionId.
   private final String sessionId;
+  // since 2.13
+  // jobType could be null before OpenSearch 2.12. SparkQueryDispatcher use jobType to choose
+  // cancel query handler. if jobType is null, it will invoke BatchQueryHandler.cancel().
+  private final JobType jobType;
+  // null if JobType is null
+  private final String datasourceName;
+  // null if JobType is INTERACTIVE or null
+  private final String indexName;
 
   @EqualsAndHashCode.Exclude private final long seqNo;
   @EqualsAndHashCode.Exclude private final long primaryTerm;
@@ -44,6 +58,9 @@ public AsyncQueryJobMetadata(
         jobId,
         resultIndex,
         null,
+        null,
+        JobType.INTERACTIVE,
+        null,
         SequenceNumbers.UNASSIGNED_SEQ_NO,
         SequenceNumbers.UNASSIGNED_PRIMARY_TERM);
   }
@@ -60,6 +77,31 @@ public AsyncQueryJobMetadata(
         jobId,
         resultIndex,
         sessionId,
+        null,
+        JobType.INTERACTIVE,
+        null,
+        SequenceNumbers.UNASSIGNED_SEQ_NO,
+        SequenceNumbers.UNASSIGNED_PRIMARY_TERM);
+  }
+
+  public AsyncQueryJobMetadata(
+      AsyncQueryId queryId,
+      String applicationId,
+      String jobId,
+      String resultIndex,
+      String sessionId,
+      String datasourceName,
+      JobType jobType,
+      String indexName) {
+    this(
+        queryId,
+        applicationId,
+        jobId,
+        resultIndex,
+        sessionId,
+        datasourceName,
+        jobType,
+        indexName,
         SequenceNumbers.UNASSIGNED_SEQ_NO,
         SequenceNumbers.UNASSIGNED_PRIMARY_TERM);
   }
@@ -70,13 +112,19 @@ public AsyncQueryJobMetadata(
       String jobId,
       String resultIndex,
       String sessionId,
+      String datasourceName,
+      JobType jobType,
+      String indexName,
       long seqNo,
       long primaryTerm) {
     this.queryId = queryId;
     this.applicationId = applicationId;
     this.jobId = jobId;
     this.resultIndex = resultIndex;
     this.sessionId = sessionId;
+    this.datasourceName = datasourceName;
+    this.jobType = jobType;
+    this.indexName = indexName;
     this.seqNo = seqNo;
     this.primaryTerm = primaryTerm;
   }
@@ -102,6 +150,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
         .field("applicationId", applicationId)
         .field("resultIndex", resultIndex)
         .field("sessionId", sessionId)
+        .field(DATASOURCE_NAME, datasourceName)
+        .field(JOB_TYPE, jobType.getText().toLowerCase(Locale.ROOT))
+        .field(INDEX_NAME, indexName)
         .endObject();
     return builder;
   }
@@ -115,6 +166,9 @@ public static AsyncQueryJobMetadata copy(
         copy.getJobId(),
         copy.getResultIndex(),
         copy.getSessionId(),
+        copy.datasourceName,
+        copy.jobType,
+        copy.indexName,
         seqNo,
         primaryTerm);
   }
@@ -132,9 +186,11 @@ public static AsyncQueryJobMetadata fromXContent(
     AsyncQueryId queryId = null;
     String jobId = null;
     String applicationId = null;
-    boolean isDropIndexQuery = false;
     String resultIndex = null;
     String sessionId = null;
+    String datasourceName = null;
+    String jobTypeStr = null;
+    String indexName = null;
     ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser);
     while (!XContentParser.Token.END_OBJECT.equals(parser.nextToken())) {
       String fieldName = parser.currentName();
@@ -149,15 +205,18 @@ public static AsyncQueryJobMetadata fromXContent(
         case "applicationId":
           applicationId = parser.textOrNull();
           break;
-        case "isDropIndexQuery":
-          isDropIndexQuery = parser.booleanValue();
-          break;
         case "resultIndex":
           resultIndex = parser.textOrNull();
           break;
         case "sessionId":
           sessionId = parser.textOrNull();
           break;
+        case DATASOURCE_NAME:
+          datasourceName = parser.textOrNull();
+        case JOB_TYPE:
+          jobTypeStr = parser.textOrNull();
+        case INDEX_NAME:
+          indexName = parser.textOrNull();
         case "type":
           break;
         default:
@@ -168,7 +227,16 @@ public static AsyncQueryJobMetadata fromXContent(
       throw new IllegalArgumentException("jobId and applicationId are required fields.");
     }
     return new AsyncQueryJobMetadata(
-        queryId, applicationId, jobId, resultIndex, sessionId, seqNo, primaryTerm);
+        queryId,
+        applicationId,
+        jobId,
+        resultIndex,
+        sessionId,
+        datasourceName,
+        Strings.isNullOrEmpty(jobTypeStr) ? null : JobType.fromString(jobTypeStr),
+        indexName,
+        seqNo,
+        primaryTerm);
   }
 
   @Override

diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/RefreshQueryHandler.java b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/RefreshQueryHandler.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.spark.dispatcher;
+
+import org.opensearch.sql.datasource.model.DataSourceMetadata;
+import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata;
+import org.opensearch.sql.spark.client.EMRServerlessClient;
+import org.opensearch.sql.spark.dispatcher.model.DispatchQueryContext;
+import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest;
+import org.opensearch.sql.spark.dispatcher.model.DispatchQueryResponse;
+import org.opensearch.sql.spark.dispatcher.model.JobType;
+import org.opensearch.sql.spark.execution.statestore.StateStore;
+import org.opensearch.sql.spark.flint.FlintIndexMetadata;
+import org.opensearch.sql.spark.flint.FlintIndexMetadataReader;
+import org.opensearch.sql.spark.flint.operation.FlintIndexOp;
+import org.opensearch.sql.spark.flint.operation.FlintIndexOpCancel;
+import org.opensearch.sql.spark.leasemanager.LeaseManager;
+import org.opensearch.sql.spark.response.JobExecutionResponseReader;
+
+/** Handle Refresh Query. */
+public class RefreshQueryHandler extends BatchQueryHandler {
+
+  private final FlintIndexMetadataReader flintIndexMetadataReader;
+  private final StateStore stateStore;
+  private final EMRServerlessClient emrServerlessClient;
+
+  public RefreshQueryHandler(
+      EMRServerlessClient emrServerlessClient,
+      JobExecutionResponseReader jobExecutionResponseReader,
+      FlintIndexMetadataReader flintIndexMetadataReader,
+      StateStore stateStore,
+      LeaseManager leaseManager) {
+    super(emrServerlessClient, jobExecutionResponseReader, leaseManager);
+    this.flintIndexMetadataReader = flintIndexMetadataReader;
+    this.stateStore = stateStore;
+    this.emrServerlessClient = emrServerlessClient;
+  }
+
+  @Override
+  public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) {
+    String datasourceName = asyncQueryJobMetadata.getDatasourceName();
+    FlintIndexMetadata indexMetadata =
+        flintIndexMetadataReader.getFlintIndexMetadata(asyncQueryJobMetadata.getIndexName());
+    FlintIndexOp jobCancelOp =
+        new FlintIndexOpCancel(stateStore, datasourceName, emrServerlessClient);
+    jobCancelOp.apply(indexMetadata);
+    return asyncQueryJobMetadata.getQueryId().getId();
+  }
+
+  @Override
+  public DispatchQueryResponse submit(
+      DispatchQueryRequest dispatchQueryRequest, DispatchQueryContext context) {
+    DispatchQueryResponse resp = super.submit(dispatchQueryRequest, context);
+    DataSourceMetadata dataSourceMetadata = context.getDataSourceMetadata();
+    return new DispatchQueryResponse(
+        resp.getQueryId(),
+        resp.getJobId(),
+        resp.getResultIndex(),
+        resp.getSessionId(),
+        dataSourceMetadata.getName(),
+        JobType.BATCH,
+        context.getIndexQueryDetails().openSearchIndexName());
+  }
+}
diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcher.java b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcher.java
@@ -22,6 +22,7 @@
 import org.opensearch.sql.spark.dispatcher.model.DispatchQueryResponse;
 import org.opensearch.sql.spark.dispatcher.model.IndexQueryActionType;
 import org.opensearch.sql.spark.dispatcher.model.IndexQueryDetails;
+import org.opensearch.sql.spark.dispatcher.model.JobType;
 import org.opensearch.sql.spark.execution.session.SessionManager;
 import org.opensearch.sql.spark.execution.statestore.StateStore;
 import org.opensearch.sql.spark.flint.FlintIndexMetadataReader;
@@ -90,7 +91,12 @@ public DispatchQueryResponse dispatch(DispatchQueryRequest dispatchQueryRequest)
       } else if (IndexQueryActionType.REFRESH.equals(indexQueryDetails.getIndexQueryActionType())) {
         // manual refresh should be handled by batch handler
         asyncQueryHandler =
-            new BatchQueryHandler(emrServerlessClient, jobExecutionResponseReader, leaseManager);
+            new RefreshQueryHandler(
+                emrServerlessClient,
+                jobExecutionResponseReader,
+                flintIndexMetadataReader,
+                stateStore,
+                leaseManager);
       }
     }
     return asyncQueryHandler.submit(dispatchQueryRequest, contextBuilder.build());
@@ -117,6 +123,17 @@ public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) {
           new InteractiveQueryHandler(sessionManager, jobExecutionResponseReader, leaseManager);
     } else if (IndexDMLHandler.isIndexDMLQuery(asyncQueryJobMetadata.getJobId())) {
       queryHandler = createIndexDMLHandler(emrServerlessClient);
+    } else if (asyncQueryJobMetadata.getJobType() == JobType.BATCH) {
+      queryHandler =
+          new RefreshQueryHandler(
+              emrServerlessClient,
+              jobExecutionResponseReader,
+              flintIndexMetadataReader,
+              stateStore,
+              leaseManager);
+    } else if (asyncQueryJobMetadata.getJobType() == JobType.STREAMING) {
+      queryHandler =
+          new StreamingQueryHandler(emrServerlessClient, jobExecutionResponseReader, leaseManager);
     } else {
       queryHandler =
           new BatchQueryHandler(emrServerlessClient, jobExecutionResponseReader, leaseManager);

diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/StreamingQueryHandler.java b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/StreamingQueryHandler.java
@@ -13,6 +13,7 @@
 import org.opensearch.sql.legacy.metrics.MetricName;
 import org.opensearch.sql.legacy.utils.MetricUtils;
 import org.opensearch.sql.spark.asyncquery.model.AsyncQueryId;
+import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata;
 import org.opensearch.sql.spark.asyncquery.model.SparkSubmitParameters;
 import org.opensearch.sql.spark.client.EMRServerlessClient;
 import org.opensearch.sql.spark.client.StartJobRequest;
@@ -37,6 +38,13 @@ public StreamingQueryHandler(
     this.emrServerlessClient = emrServerlessClient;
   }
 
+  @Override
+  public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) {
+    throw new IllegalArgumentException(
+        "can't cancel index DML query, using ALTER auto_refresh=off statement to stop job, using"
+            + " VACUUM statement to stop job and delete data");
+  }
+
   @Override
   public DispatchQueryResponse submit(
       DispatchQueryRequest dispatchQueryRequest, DispatchQueryContext context) {
@@ -77,6 +85,9 @@ public DispatchQueryResponse submit(
         AsyncQueryId.newAsyncQueryId(dataSourceMetadata.getName()),
         jobId,
         dataSourceMetadata.getResultIndex(),
-        null);
+        null,
+        dataSourceMetadata.getName(),
+        JobType.STREAMING,
+        indexQueryDetails.openSearchIndexName());
   }
 }
diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/DispatchQueryResponse.java b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/DispatchQueryResponse.java
@@ -1,14 +1,37 @@
 package org.opensearch.sql.spark.dispatcher.model;
 
-import lombok.AllArgsConstructor;
-import lombok.Data;
+import lombok.Getter;
 import org.opensearch.sql.spark.asyncquery.model.AsyncQueryId;
 
-@Data
-@AllArgsConstructor
+@Getter
 public class DispatchQueryResponse {
-  private AsyncQueryId queryId;
-  private String jobId;
-  private String resultIndex;
-  private String sessionId;
+  private final AsyncQueryId queryId;
+  private final String jobId;
+  private final String resultIndex;
+  private final String sessionId;
+  private final String datasourceName;
+  private final JobType jobType;
+  private final String indexName;
+
+  public DispatchQueryResponse(
+      AsyncQueryId queryId, String jobId, String resultIndex, String sessionId) {
+    this(queryId, jobId, resultIndex, sessionId, null, JobType.INTERACTIVE, null);
+  }
+
+  public DispatchQueryResponse(
+      AsyncQueryId queryId,
+      String jobId,
+      String resultIndex,
+      String sessionId,
+      String datasourceName,
+      JobType jobType,
+      String indexName) {
+    this.queryId = queryId;
+    this.jobId = jobId;
+    this.resultIndex = resultIndex;
+    this.sessionId = sessionId;
+    this.datasourceName = datasourceName;
+    this.jobType = jobType;
+    this.indexName = indexName;
+  }
 }