opensearch-project · penghuo · Mar 14, 2024 · Feb 14, 2024 · Mar 13, 2024 · Mar 13, 2024
@@ -150,6 +150,10 @@ object FlintSparkConf {
     FlintConfig(s"spark.flint.datasource.name")
       .doc("data source name")
       .createOptional()
+  val QUERY =
+    FlintConfig("spark.flint.job.query")
+      .doc("Flint query for batch and streaming job")
+      .createOptional()
   val JOB_TYPE =
     FlintConfig(s"spark.flint.job.type")
       .doc("Flint job type. Including interactive and streaming")

@@ -168,7 +168,7 @@ class FlintREPLITSuite extends SparkFunSuite with OpenSearchSuite with JobTest {
         Map("SERVERLESS_EMR_JOB_ID" -> jobRunId, "SERVERLESS_EMR_VIRTUAL_CLUSTER_ID" -> appId))
       FlintREPL.enableHiveSupport = false
       FlintREPL.terminateJVM = false
-      FlintREPL.main(Array("select 1", resultIndex))
+      FlintREPL.main(Array(resultIndex))
     }
     futureResult.onComplete {
       case Success(result) => logInfo(s"Success result: $result")

@@ -14,6 +14,7 @@ import org.opensearch.cluster.metadata.MappingMetadata
 import org.opensearch.common.settings.Settings
 import org.opensearch.common.xcontent.XContentType
 import org.opensearch.flint.core.{FlintClient, FlintClientBuilder, FlintOptions}
+import org.opensearch.flint.core.logging.CustomLogging
 import org.opensearch.flint.core.metadata.FlintMetadata
 import org.opensearch.flint.core.metrics.MetricConstants
 import org.opensearch.flint.core.metrics.MetricsUtil.registerGauge
@@ -37,19 +38,24 @@ import org.apache.spark.sql.types.{StructField, _}
  */
 object FlintJob extends Logging with FlintJobExecutor {
   def main(args: Array[String]): Unit = {
+    CustomLogging.logInfo("Spark Job is Launching...")
     // Validate command line arguments
-    if (args.length != 2) {
-      throw new IllegalArgumentException("Usage: FlintJob <query> <resultIndex>")
+    if (args.length != 1) {
+      throw new IllegalArgumentException("Usage: FlintJob <resultIndex>")
     }
 
-    val Array(query, resultIndex) = args
+    val Array(resultIndex) = args
 
     val conf = createSparkConf()
     val jobType = conf.get("spark.flint.job.type", "batch")
     logInfo(s"""Job type is: ${jobType}""")
     conf.set(FlintSparkConf.JOB_TYPE.key, jobType)
 
     val dataSource = conf.get("spark.flint.datasource.name", "")
+    val query = conf.get(FlintSparkConf.QUERY.key, "")
+    if (query.isEmpty) {
+      throw new IllegalArgumentException("Query undefined for the batch job.")
+    }
     // https://github.com/opensearch-project/opensearch-spark/issues/138
     /*
      * To execute queries such as `CREATE SKIPPING INDEX ON my_glue1.default.http_logs_plain (`@timestamp` VALUE_SET) WITH (auto_refresh = true)`,

@@ -21,6 +21,7 @@ import org.opensearch.common.Strings
 import org.opensearch.flint.app.{FlintCommand, FlintInstance}
 import org.opensearch.flint.app.FlintInstance.formats
 import org.opensearch.flint.core.FlintOptions
+import org.opensearch.flint.core.logging.CustomLogging
 import org.opensearch.flint.core.metrics.MetricConstants
 import org.opensearch.flint.core.metrics.MetricsUtil.{decrementCounter, getTimerContext, incrementCounter, registerGauge, stopTimer}
 import org.opensearch.flint.core.storage.{FlintReader, OpenSearchUpdater}
@@ -68,7 +69,8 @@ object FlintREPL extends Logging with FlintJobExecutor {
   private val statementRunningCount = new AtomicInteger(0)
 
   def main(args: Array[String]) {
-    val Array(query, resultIndex) = args
+    CustomLogging.logInfo("Spark Job is Launching...")
+    val Array(resultIndex) = args
     if (Strings.isNullOrEmpty(resultIndex)) {
       throw new IllegalArgumentException("resultIndex is not set")
     }
@@ -91,6 +93,10 @@ object FlintREPL extends Logging with FlintJobExecutor {
     conf.set(FlintSparkConf.JOB_TYPE.key, jobType)
 
     if (jobType.equalsIgnoreCase("streaming")) {
+      val query = conf.get(FlintSparkConf.QUERY.key, "")
+      if (query.isEmpty) {
+        throw new IllegalArgumentException("Query undefined for the streaming job.")
+      }
       logInfo(s"""streaming query ${query}""")
       val streamingRunningCount = new AtomicInteger(0)
       val jobOperator =