-
Notifications
You must be signed in to change notification settings - Fork 33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[POC] Store state and error using QueryMetadataService #608
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.flint.data | ||
|
||
object QueryState { | ||
val WAITING = "waiting" | ||
val RUNNING = "running" | ||
val SUCCESS = "success" | ||
val FAILED = "failed" | ||
} | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -89,12 +89,19 @@ object FlintREPL extends Logging with FlintJobExecutor { | |
logInfo(s"""streaming query ${query}""") | ||
configDYNMaxExecutors(conf, jobType) | ||
val streamingRunningCount = new AtomicInteger(0) | ||
|
||
val queryId = conf.get("spark.flint.job.queryId") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. unsure on FlintREPL having queryId here as well. Isn't queryId for interactive session per query? Gotten from flintCommand There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. queryId is already in FlintStatement / FlintCommand, considering reuse it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For streaming query, I think we cannot read from FlintStatement. Unless reading from env variable, we don't have any key to retrieve query info. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's go ahead with this approach for POC. |
||
if (queryId.isEmpty) { | ||
logWarning("Query ID was not specified.") | ||
} | ||
|
||
val jobOperator = | ||
JobOperator( | ||
createSparkSession(conf), | ||
query, | ||
dataSource, | ||
resultIndex, | ||
queryId, | ||
true, | ||
streamingRunningCount) | ||
registerGauge(MetricConstants.STREAMING_RUNNING_METRIC, streamingRunningCount) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,17 +14,19 @@ import scala.util.{Failure, Success, Try} | |
|
||
import org.opensearch.flint.core.metrics.MetricConstants | ||
import org.opensearch.flint.core.metrics.MetricsUtil.incrementCounter | ||
import org.opensearch.flint.data.QueryState | ||
|
||
import org.apache.spark.internal.Logging | ||
import org.apache.spark.sql.flint.config.FlintSparkConf | ||
import org.apache.spark.sql.util.ShuffleCleaner | ||
import org.apache.spark.sql.util.{CustomClassLoader, ShuffleCleaner} | ||
import org.apache.spark.util.ThreadUtils | ||
|
||
case class JobOperator( | ||
spark: SparkSession, | ||
query: String, | ||
dataSource: String, | ||
resultIndex: String, | ||
queryId: String, | ||
streaming: Boolean, | ||
streamingRunningCount: AtomicInteger) | ||
extends Logging | ||
|
@@ -42,10 +44,14 @@ case class JobOperator( | |
val startTime = System.currentTimeMillis() | ||
streamingRunningCount.incrementAndGet() | ||
|
||
val flintSparkConf = FlintSparkConf() | ||
val queryMetadataService = CustomClassLoader(flintSparkConf).getQueryMetadataService() | ||
queryMetadataService.updateQueryState(queryId, QueryState.RUNNING, null) | ||
// osClient needs spark session to be created first to get FlintOptions initialized. | ||
// Otherwise, we will have connection exception from EMR-S to OS. | ||
val osClient = new OSClient(FlintSparkConf().flintOptions()) | ||
val osClient = new OSClient(flintSparkConf.flintOptions()) | ||
var exceptionThrown = true | ||
var error: String = null | ||
try { | ||
val futureMappingCheck = Future { | ||
checkAndCreateIndex(osClient, resultIndex) | ||
|
@@ -61,30 +67,31 @@ case class JobOperator( | |
exceptionThrown = false | ||
} catch { | ||
case e: TimeoutException => | ||
val error = s"Getting the mapping of index $resultIndex timed out" | ||
error = s"Getting the mapping of index $resultIndex timed out" | ||
logError(error, e) | ||
dataToWrite = Some( | ||
getFailedData(spark, dataSource, error, "", query, "", startTime, currentTimeProvider)) | ||
case e: Exception => | ||
val error = processQueryException(e) | ||
error = processQueryException(e) | ||
dataToWrite = Some( | ||
getFailedData(spark, dataSource, error, "", query, "", startTime, currentTimeProvider)) | ||
} finally { | ||
cleanUpResources(exceptionThrown, threadPool, dataToWrite, resultIndex, osClient) | ||
try { | ||
dataToWrite.foreach(df => writeDataFrameToOpensearch(df, resultIndex, osClient)) | ||
} catch { | ||
case e: Exception => | ||
exceptionThrown = true | ||
error = s"Failed to write to result index. originalError='${error}'" | ||
logError(error, e) | ||
} | ||
val queryState = if (exceptionThrown) QueryState.FAILED else QueryState.SUCCESS | ||
queryMetadataService.updateQueryState(queryId, queryState, error); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can reuse https://github.com/opensearch-project/opensearch-spark/blob/poc/repl-metadata-refactor/spark-sql-application/src/main/scala/org/apache/spark/sql/CommandLifecycleManager.scala#L17 instead of a new queryMetadataService There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CommandLifecycleManager seems to require reading FlintCommand using other methods, but as we don't have session, we cannot use it. And CommandLifecycleManager is designed for command lifecycle in session, which does not seem to fit into this use case. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's go ahead with this approach for POC. |
||
|
||
cleanUpResources(exceptionThrown, threadPool) | ||
} | ||
} | ||
|
||
def cleanUpResources( | ||
exceptionThrown: Boolean, | ||
threadPool: ThreadPoolExecutor, | ||
dataToWrite: Option[DataFrame], | ||
resultIndex: String, | ||
osClient: OSClient): Unit = { | ||
try { | ||
dataToWrite.foreach(df => writeDataFrameToOpensearch(df, resultIndex, osClient)) | ||
} catch { | ||
case e: Exception => logError("fail to write to result index", e) | ||
} | ||
def cleanUpResources(exceptionThrown: Boolean, threadPool: ThreadPoolExecutor): Unit = { | ||
|
||
try { | ||
// Wait for streaming job complete if no error and there is streaming job running | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.apache.spark.sql | ||
|
||
import org.apache.spark.internal.Logging | ||
import org.apache.spark.sql.flint.config.FlintSparkConf | ||
|
||
/** | ||
* Temporary default implementation for QueryMetadataService. This should be replaced with an | ||
* implementation which write status to OpenSearch index | ||
*/ | ||
class NoOpQueryMetadataService(flintSparkConf: FlintSparkConf) | ||
extends QueryMetadataService | ||
with Logging { | ||
|
||
override def updateQueryState(queryId: String, state: String, error: String): Unit = | ||
logInfo(s"updateQueryState: queryId=${queryId}, state=`${state}`, error=`${error}`") | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.apache.spark.sql | ||
|
||
/** Interface for updating query state and error. */ | ||
trait QueryMetadataService { | ||
def updateQueryState(queryId: String, state: String, error: String): Unit | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.apache.spark.sql.util | ||
|
||
import org.apache.spark.sql.QueryMetadataService | ||
import org.apache.spark.sql.flint.config.FlintSparkConf | ||
import org.apache.spark.util.Utils | ||
|
||
case class CustomClassLoader(flintSparkConf: FlintSparkConf) { | ||
|
||
def getQueryMetadataService(): QueryMetadataService = { | ||
instantiateClass[QueryMetadataService]( | ||
flintSparkConf.flintOptions().getCustomQueryMetadataService) | ||
} | ||
|
||
private def instantiateClass[T](className: String): T = { | ||
try { | ||
val providerClass = Utils.classForName(className) | ||
val ctor = providerClass.getDeclaredConstructor(classOf[FlintSparkConf]) | ||
ctor.setAccessible(true) | ||
ctor.newInstance(flintSparkConf).asInstanceOf[T] | ||
} catch { | ||
case e: Exception => | ||
throw new RuntimeException(s"Failed to instantiate provider: $className", e) | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://github.com/opensearch-project/opensearch-spark/blob/poc/repl-metadata-refactor/flint-data/src/main/scala/org/opensearch/flint/data/FlintCommand.scala#L28-L54 can we reuse ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That cannot be referred from outside. I think we want to migrate to this (or with some other name) constants. (I wanted enum, but looked like Scala2 does not have simple way to implement enum)