Skip to content

Commit

Permalink
Refactor query input
Browse files Browse the repository at this point in the history
Signed-off-by: Louis Chu <[email protected]>
  • Loading branch information
noCharger committed Mar 10, 2024
1 parent 8e810e5 commit 35c0711
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ object FlintSparkConf {
FlintConfig(s"spark.flint.datasource.name")
.doc("data source name")
.createOptional()
val QUERY =
FlintConfig("spark.flint.job.query")
.doc("Flint query for batch and streaming job")
.createOptional()
val JOB_TYPE =
FlintConfig(s"spark.flint.job.type")
.doc("Flint job type. Including interactive and streaming")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ class FlintREPLITSuite extends SparkFunSuite with OpenSearchSuite with JobTest {
Map("SERVERLESS_EMR_JOB_ID" -> jobRunId, "SERVERLESS_EMR_VIRTUAL_CLUSTER_ID" -> appId))
FlintREPL.enableHiveSupport = false
FlintREPL.terminateJVM = false
FlintREPL.main(Array("select 1", resultIndex))
FlintREPL.main(Array(resultIndex))
}
futureResult.onComplete {
case Success(result) => logInfo(s"Success result: $result")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import org.opensearch.cluster.metadata.MappingMetadata
import org.opensearch.common.settings.Settings
import org.opensearch.common.xcontent.XContentType
import org.opensearch.flint.core.{FlintClient, FlintClientBuilder, FlintOptions}
import org.opensearch.flint.core.logging.CustomLogging
import org.opensearch.flint.core.metadata.FlintMetadata
import org.opensearch.flint.core.metrics.MetricConstants
import org.opensearch.flint.core.metrics.MetricsUtil.registerGauge
Expand All @@ -37,19 +38,24 @@ import org.apache.spark.sql.types.{StructField, _}
*/
object FlintJob extends Logging with FlintJobExecutor {
def main(args: Array[String]): Unit = {
CustomLogging.logInfo("Spark Job is Launching...")
// Validate command line arguments
if (args.length != 2) {
throw new IllegalArgumentException("Usage: FlintJob <query> <resultIndex>")
if (args.length != 1) {
throw new IllegalArgumentException("Usage: FlintJob <resultIndex>")
}

val Array(query, resultIndex) = args
val Array(resultIndex) = args

val conf = createSparkConf()
val jobType = conf.get("spark.flint.job.type", "batch")
logInfo(s"""Job type is: ${jobType}""")
conf.set(FlintSparkConf.JOB_TYPE.key, jobType)

val dataSource = conf.get("spark.flint.datasource.name", "")
val query = conf.get(FlintSparkConf.QUERY.key, "")
if (query.isEmpty) {
throw new IllegalArgumentException("Query undefined for the batch job.")
}
// https://github.com/opensearch-project/opensearch-spark/issues/138
/*
* To execute queries such as `CREATE SKIPPING INDEX ON my_glue1.default.http_logs_plain (`@timestamp` VALUE_SET) WITH (auto_refresh = true)`,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import org.opensearch.common.Strings
import org.opensearch.flint.app.{FlintCommand, FlintInstance}
import org.opensearch.flint.app.FlintInstance.formats
import org.opensearch.flint.core.FlintOptions
import org.opensearch.flint.core.logging.CustomLogging
import org.opensearch.flint.core.metrics.MetricConstants
import org.opensearch.flint.core.metrics.MetricsUtil.{decrementCounter, getTimerContext, incrementCounter, registerGauge, stopTimer}
import org.opensearch.flint.core.storage.{FlintReader, OpenSearchUpdater}
Expand Down Expand Up @@ -68,7 +69,8 @@ object FlintREPL extends Logging with FlintJobExecutor {
private val statementRunningCount = new AtomicInteger(0)

def main(args: Array[String]) {
val Array(query, resultIndex) = args
CustomLogging.logInfo("Spark Job is Launching...")
val Array(resultIndex) = args
if (Strings.isNullOrEmpty(resultIndex)) {
throw new IllegalArgumentException("resultIndex is not set")
}
Expand All @@ -91,6 +93,10 @@ object FlintREPL extends Logging with FlintJobExecutor {
conf.set(FlintSparkConf.JOB_TYPE.key, jobType)

if (jobType.equalsIgnoreCase("streaming")) {
val query = conf.get(FlintSparkConf.QUERY.key, "")
if (query.isEmpty) {
throw new IllegalArgumentException("Query undefined for the streaming job.")
}
logInfo(s"""streaming query ${query}""")
val streamingRunningCount = new AtomicInteger(0)
val jobOperator =
Expand Down

0 comments on commit 35c0711

Please sign in to comment.