Skip to content

Commit

Permalink
Splitting StateStore into multiple storage services
Browse files Browse the repository at this point in the history
Signed-off-by: Vamsi Manohar <[email protected]>
  • Loading branch information
vmmusings committed Apr 17, 2024
1 parent ff702ed commit a3774ae
Show file tree
Hide file tree
Showing 36 changed files with 607 additions and 287 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
import org.opensearch.sql.spark.cluster.ClusterManagerEventListener;
import org.opensearch.sql.spark.execution.statestore.StateStore;
import org.opensearch.sql.spark.flint.FlintIndexMetadataServiceImpl;
import org.opensearch.sql.spark.flint.FlintIndexStateModelService;
import org.opensearch.sql.spark.rest.RestAsyncQueryManagementAction;
import org.opensearch.sql.spark.storage.SparkStorageFactory;
import org.opensearch.sql.spark.transport.TransportCancelAsyncQueryRequestAction;
Expand Down Expand Up @@ -229,7 +230,8 @@ public Collection<Object> createComponents(
dataSourceService,
injector.getInstance(FlintIndexMetadataServiceImpl.class),
injector.getInstance(StateStore.class),
injector.getInstance(EMRServerlessClientFactory.class));
injector.getInstance(EMRServerlessClientFactory.class),
injector.getInstance(FlintIndexStateModelService.class));
return ImmutableList.of(
dataSourceService,
injector.getInstance(AsyncQueryExecutorService.class),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

package org.opensearch.sql.spark.asyncquery;

import static org.opensearch.sql.spark.execution.statestore.StateStore.createJobMetaData;
import static org.opensearch.sql.spark.execution.statestore.StateStore.DATASOURCE_TO_REQUEST_INDEX;

import java.util.Optional;
import lombok.RequiredArgsConstructor;
Expand All @@ -17,6 +17,7 @@
import org.opensearch.sql.spark.asyncquery.model.AsyncQueryId;
import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata;
import org.opensearch.sql.spark.execution.statestore.StateStore;
import org.opensearch.sql.spark.execution.xcontent.AsyncQueryJobMetadataXContentSerializer;

/** Opensearch implementation of {@link AsyncQueryJobMetadataStorageService} */
@RequiredArgsConstructor
Expand All @@ -31,15 +32,22 @@ public class OpensearchAsyncQueryJobMetadataStorageService
@Override
public void storeJobMetadata(AsyncQueryJobMetadata asyncQueryJobMetadata) {
AsyncQueryId queryId = asyncQueryJobMetadata.getQueryId();
createJobMetaData(stateStore, queryId.getDataSourceName()).apply(asyncQueryJobMetadata);
stateStore.create(
asyncQueryJobMetadata,
AsyncQueryJobMetadata::copy,
DATASOURCE_TO_REQUEST_INDEX.apply(queryId.getDataSourceName()));
}

@Override
public Optional<AsyncQueryJobMetadata> getJobMetadata(String qid) {
try {
AsyncQueryId queryId = new AsyncQueryId(qid);
return StateStore.getJobMetaData(stateStore, queryId.getDataSourceName())
.apply(queryId.docId());
AsyncQueryJobMetadataXContentSerializer asyncQueryJobMetadataXContentSerializer =
new AsyncQueryJobMetadataXContentSerializer();
return stateStore.get(
queryId.docId(),
asyncQueryJobMetadataXContentSerializer::fromXContent,
DATASOURCE_TO_REQUEST_INDEX.apply(queryId.getDataSourceName()));
} catch (Exception e) {
LOGGER.error("Error while fetching the job metadata.", e);
throw new AsyncQueryNotFoundException(String.format("Invalid QueryId: %s", qid));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.opensearch.sql.spark.client.EMRServerlessClientFactory;
import org.opensearch.sql.spark.execution.statestore.StateStore;
import org.opensearch.sql.spark.flint.FlintIndexMetadataService;
import org.opensearch.sql.spark.flint.FlintIndexStateModelService;
import org.opensearch.threadpool.Scheduler.Cancellable;
import org.opensearch.threadpool.ThreadPool;

Expand All @@ -39,6 +40,8 @@ public class ClusterManagerEventListener implements LocalNodeClusterManagerListe
private FlintIndexMetadataService flintIndexMetadataService;
private StateStore stateStore;
private EMRServerlessClientFactory emrServerlessClientFactory;

private FlintIndexStateModelService flintIndexStateModelService;
private Duration sessionTtlDuration;
private Duration resultTtlDuration;
private TimeValue streamingJobHouseKeepingInterval;
Expand All @@ -57,7 +60,8 @@ public ClusterManagerEventListener(
DataSourceService dataSourceService,
FlintIndexMetadataService flintIndexMetadataService,
StateStore stateStore,
EMRServerlessClientFactory emrServerlessClientFactory) {
EMRServerlessClientFactory emrServerlessClientFactory,
FlintIndexStateModelService flintIndexStateModelService) {
this.clusterService = clusterService;
this.threadPool = threadPool;
this.client = client;
Expand All @@ -70,7 +74,7 @@ public ClusterManagerEventListener(
this.sessionTtlDuration = toDuration(sessionTtl.get(settings));
this.resultTtlDuration = toDuration(resultTtl.get(settings));
this.streamingJobHouseKeepingInterval = streamingJobHouseKeepingInterval.get(settings);

this.flintIndexStateModelService = flintIndexStateModelService;
clusterService
.getClusterSettings()
.addSettingsUpdateConsumer(
Expand Down Expand Up @@ -153,7 +157,7 @@ private void initializeStreamingJobHouseKeeperCron() {
new FlintStreamingJobHouseKeeperTask(
dataSourceService,
flintIndexMetadataService,
stateStore,
flintIndexStateModelService,
emrServerlessClientFactory),
streamingJobHouseKeepingInterval,
executorName());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
import org.opensearch.sql.legacy.metrics.Metrics;
import org.opensearch.sql.spark.client.EMRServerlessClientFactory;
import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions;
import org.opensearch.sql.spark.execution.statestore.StateStore;
import org.opensearch.sql.spark.flint.FlintIndexMetadata;
import org.opensearch.sql.spark.flint.FlintIndexMetadataService;
import org.opensearch.sql.spark.flint.FlintIndexStateModelService;
import org.opensearch.sql.spark.flint.operation.FlintIndexOpAlter;
import org.opensearch.sql.spark.flint.operation.FlintIndexOpDrop;

Expand All @@ -31,7 +31,7 @@ public class FlintStreamingJobHouseKeeperTask implements Runnable {

private final DataSourceService dataSourceService;
private final FlintIndexMetadataService flintIndexMetadataService;
private final StateStore stateStore;
private final FlintIndexStateModelService flintIndexStateModelService;
private final EMRServerlessClientFactory emrServerlessClientFactory;

private static final Logger LOGGER = LogManager.getLogger(FlintStreamingJobHouseKeeperTask.class);
Expand Down Expand Up @@ -96,7 +96,8 @@ private void dropAutoRefreshIndex(
// When the datasource is deleted. Possibly Replace with VACUUM Operation.
LOGGER.info("Attempting to drop auto refresh index: {}", autoRefreshIndex);
FlintIndexOpDrop flintIndexOpDrop =
new FlintIndexOpDrop(stateStore, datasourceName, emrServerlessClientFactory.getClient());
new FlintIndexOpDrop(
flintIndexStateModelService, datasourceName, emrServerlessClientFactory.getClient());
flintIndexOpDrop.apply(flintIndexMetadata);
LOGGER.info("Successfully dropped index: {}", autoRefreshIndex);
}
Expand All @@ -109,7 +110,7 @@ private void alterAutoRefreshIndex(
FlintIndexOpAlter flintIndexOpAlter =
new FlintIndexOpAlter(
flintIndexOptions,
stateStore,
flintIndexStateModelService,
datasourceName,
emrServerlessClientFactory.getClient(),
flintIndexMetadataService);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import static org.opensearch.sql.spark.data.constants.SparkConstants.ERROR_FIELD;
import static org.opensearch.sql.spark.data.constants.SparkConstants.STATUS_FIELD;
import static org.opensearch.sql.spark.execution.statestore.StateStore.createIndexDMLResult;

import com.amazonaws.services.emrserverless.model.JobRunState;
import java.util.Map;
Expand All @@ -27,9 +26,10 @@
import org.opensearch.sql.spark.dispatcher.model.IndexDMLResult;
import org.opensearch.sql.spark.dispatcher.model.IndexQueryDetails;
import org.opensearch.sql.spark.execution.statement.StatementState;
import org.opensearch.sql.spark.execution.statestore.StateStore;
import org.opensearch.sql.spark.flint.FlintIndexMetadata;
import org.opensearch.sql.spark.flint.FlintIndexMetadataService;
import org.opensearch.sql.spark.flint.FlintIndexStateModelService;
import org.opensearch.sql.spark.flint.IndexDMLResultStorageService;
import org.opensearch.sql.spark.flint.operation.FlintIndexOp;
import org.opensearch.sql.spark.flint.operation.FlintIndexOpAlter;
import org.opensearch.sql.spark.flint.operation.FlintIndexOpDrop;
Expand All @@ -51,7 +51,8 @@ public class IndexDMLHandler extends AsyncQueryHandler {

private final FlintIndexMetadataService flintIndexMetadataService;

private final StateStore stateStore;
private final FlintIndexStateModelService flintIndexStateModelService;
private final IndexDMLResultStorageService indexDMLResultStorageService;

private final Client client;

Expand Down Expand Up @@ -106,7 +107,7 @@ private AsyncQueryId storeIndexDMLResult(
dispatchQueryRequest.getDatasource(),
System.currentTimeMillis() - startTime,
System.currentTimeMillis());
createIndexDMLResult(stateStore, dataSourceMetadata.getResultIndex()).apply(indexDMLResult);
indexDMLResultStorageService.createIndexDMLResult(indexDMLResult, dataSourceMetadata.getName());
return asyncQueryId;
}

Expand All @@ -118,22 +119,25 @@ private void executeIndexOp(
case DROP:
FlintIndexOp dropOp =
new FlintIndexOpDrop(
stateStore, dispatchQueryRequest.getDatasource(), emrServerlessClient);
flintIndexStateModelService,
dispatchQueryRequest.getDatasource(),
emrServerlessClient);
dropOp.apply(indexMetadata);
break;
case ALTER:
FlintIndexOpAlter flintIndexOpAlter =
new FlintIndexOpAlter(
indexQueryDetails.getFlintIndexOptions(),
stateStore,
flintIndexStateModelService,
dispatchQueryRequest.getDatasource(),
emrServerlessClient,
flintIndexMetadataService);
flintIndexOpAlter.apply(indexMetadata);
break;
case VACUUM:
FlintIndexOp indexVacuumOp =
new FlintIndexOpVacuum(stateStore, dispatchQueryRequest.getDatasource(), client);
new FlintIndexOpVacuum(
flintIndexStateModelService, dispatchQueryRequest.getDatasource(), client);
indexVacuumOp.apply(indexMetadata);
break;
default:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest;
import org.opensearch.sql.spark.dispatcher.model.DispatchQueryResponse;
import org.opensearch.sql.spark.dispatcher.model.JobType;
import org.opensearch.sql.spark.execution.statestore.StateStore;
import org.opensearch.sql.spark.flint.FlintIndexMetadata;
import org.opensearch.sql.spark.flint.FlintIndexMetadataService;
import org.opensearch.sql.spark.flint.FlintIndexStateModelService;
import org.opensearch.sql.spark.flint.operation.FlintIndexOp;
import org.opensearch.sql.spark.flint.operation.FlintIndexOpCancel;
import org.opensearch.sql.spark.leasemanager.LeaseManager;
Expand All @@ -25,18 +25,18 @@
public class RefreshQueryHandler extends BatchQueryHandler {

private final FlintIndexMetadataService flintIndexMetadataService;
private final StateStore stateStore;
private final FlintIndexStateModelService flintIndexStateModelService;
private final EMRServerlessClient emrServerlessClient;

public RefreshQueryHandler(
EMRServerlessClient emrServerlessClient,
JobExecutionResponseReader jobExecutionResponseReader,
FlintIndexMetadataService flintIndexMetadataService,
StateStore stateStore,
FlintIndexStateModelService flintIndexStateModelService,
LeaseManager leaseManager) {
super(emrServerlessClient, jobExecutionResponseReader, leaseManager);
this.flintIndexMetadataService = flintIndexMetadataService;
this.stateStore = stateStore;
this.flintIndexStateModelService = flintIndexStateModelService;
this.emrServerlessClient = emrServerlessClient;
}

Expand All @@ -52,7 +52,7 @@ public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) {
}
FlintIndexMetadata indexMetadata = indexMetadataMap.get(asyncQueryJobMetadata.getIndexName());
FlintIndexOp jobCancelOp =
new FlintIndexOpCancel(stateStore, datasourceName, emrServerlessClient);
new FlintIndexOpCancel(flintIndexStateModelService, datasourceName, emrServerlessClient);
jobCancelOp.apply(indexMetadata);
return asyncQueryJobMetadata.getQueryId().getId();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@
import org.opensearch.sql.spark.dispatcher.model.IndexQueryDetails;
import org.opensearch.sql.spark.dispatcher.model.JobType;
import org.opensearch.sql.spark.execution.session.SessionManager;
import org.opensearch.sql.spark.execution.statestore.StateStore;
import org.opensearch.sql.spark.flint.FlintIndexMetadataService;
import org.opensearch.sql.spark.flint.FlintIndexStateModelService;
import org.opensearch.sql.spark.flint.IndexDMLResultStorageService;
import org.opensearch.sql.spark.leasemanager.LeaseManager;
import org.opensearch.sql.spark.response.JobExecutionResponseReader;
import org.opensearch.sql.spark.rest.model.LangType;
Expand Down Expand Up @@ -53,7 +54,9 @@ public class SparkQueryDispatcher {

private LeaseManager leaseManager;

private StateStore stateStore;
private FlintIndexStateModelService flintIndexStateModelService;

private IndexDMLResultStorageService indexDMLResultStorageService;

public DispatchQueryResponse dispatch(DispatchQueryRequest dispatchQueryRequest) {
EMRServerlessClient emrServerlessClient = emrServerlessClientFactory.getClient();
Expand Down Expand Up @@ -91,7 +94,7 @@ public DispatchQueryResponse dispatch(DispatchQueryRequest dispatchQueryRequest)
emrServerlessClient,
jobExecutionResponseReader,
flintIndexMetadataService,
stateStore,
flintIndexStateModelService,
leaseManager);
}
}
Expand Down Expand Up @@ -145,7 +148,7 @@ public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) {
emrServerlessClient,
jobExecutionResponseReader,
flintIndexMetadataService,
stateStore,
flintIndexStateModelService,
leaseManager);
} else if (asyncQueryJobMetadata.getJobType() == JobType.STREAMING) {
queryHandler =
Expand All @@ -162,7 +165,8 @@ private IndexDMLHandler createIndexDMLHandler(EMRServerlessClient emrServerlessC
emrServerlessClient,
jobExecutionResponseReader,
flintIndexMetadataService,
stateStore,
flintIndexStateModelService,
indexDMLResultStorageService,
client);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
import static org.opensearch.sql.spark.execution.session.SessionState.END_STATE;
import static org.opensearch.sql.spark.execution.session.SessionState.FAIL;
import static org.opensearch.sql.spark.execution.statement.StatementId.newStatementId;
import static org.opensearch.sql.spark.execution.statestore.StateStore.createSession;
import static org.opensearch.sql.spark.execution.statestore.StateStore.getSession;

import java.util.Optional;
import lombok.Builder;
Expand All @@ -24,6 +22,8 @@
import org.opensearch.sql.spark.execution.statement.QueryRequest;
import org.opensearch.sql.spark.execution.statement.Statement;
import org.opensearch.sql.spark.execution.statement.StatementId;
import org.opensearch.sql.spark.execution.statement.StatementStorageService;
import org.opensearch.sql.spark.execution.statestore.SessionStorageService;
import org.opensearch.sql.spark.execution.statestore.StateStore;
import org.opensearch.sql.spark.rest.model.LangType;
import org.opensearch.sql.spark.utils.TimeProvider;
Expand All @@ -42,6 +42,8 @@ public class InteractiveSession implements Session {

private final SessionId sessionId;
private final StateStore stateStore;
private final StatementStorageService statementStorageService;
private final SessionStorageService sessionStorageService;
private final EMRServerlessClient serverlessClient;
private SessionModel sessionModel;
// the threshold of elapsed time in milliseconds before we say a session is stale
Expand All @@ -64,7 +66,7 @@ public void open(CreateSessionRequest createSessionRequest) {
sessionModel =
initInteractiveSession(
applicationId, jobID, sessionId, createSessionRequest.getDatasourceName());
createSession(stateStore, sessionModel.getDatasourceName()).apply(sessionModel);
sessionStorageService.createSession(sessionModel, sessionModel.getDatasourceName());
} catch (VersionConflictEngineException e) {
String errorMsg = "session already exist. " + sessionId;
LOG.error(errorMsg);
Expand All @@ -76,7 +78,7 @@ public void open(CreateSessionRequest createSessionRequest) {
@Override
public void close() {
Optional<SessionModel> model =
getSession(stateStore, sessionModel.getDatasourceName()).apply(sessionModel.getId());
sessionStorageService.getSession(sessionModel.getId(), sessionModel.getDatasourceName());
if (model.isEmpty()) {
throw new IllegalStateException("session does not exist. " + sessionModel.getSessionId());
} else {
Expand All @@ -88,7 +90,7 @@ public void close() {
/** Submit statement. If submit successfully, Statement in waiting state. */
public StatementId submit(QueryRequest request) {
Optional<SessionModel> model =
getSession(stateStore, sessionModel.getDatasourceName()).apply(sessionModel.getId());
sessionStorageService.getSession(sessionModel.getId(), sessionModel.getDatasourceName());
if (model.isEmpty()) {
throw new IllegalStateException("session does not exist. " + sessionModel.getSessionId());
} else {
Expand All @@ -102,6 +104,7 @@ public StatementId submit(QueryRequest request) {
.applicationId(sessionModel.getApplicationId())
.jobId(sessionModel.getJobId())
.stateStore(stateStore)
.statementStorageService(statementStorageService)
.statementId(statementId)
.langType(LangType.SQL)
.datasourceName(sessionModel.getDatasourceName())
Expand All @@ -124,8 +127,8 @@ public StatementId submit(QueryRequest request) {

@Override
public Optional<Statement> get(StatementId stID) {
return StateStore.getStatement(stateStore, sessionModel.getDatasourceName())
.apply(stID.getId())
return statementStorageService
.getStatementModel(stID.getId(), sessionModel.getDatasourceName())
.map(
model ->
Statement.builder()
Expand All @@ -137,6 +140,7 @@ public Optional<Statement> get(StatementId stID) {
.query(model.getQuery())
.queryId(model.getQueryId())
.stateStore(stateStore)
.statementStorageService(statementStorageService)
.statementModel(model)
.build());
}
Expand Down
Loading

0 comments on commit a3774ae

Please sign in to comment.