diff --git a/spark-sql-application/src/main/scala/org/apache/spark/sql/FlintREPL.scala b/spark-sql-application/src/main/scala/org/apache/spark/sql/FlintREPL.scala index 256910e94..78314a68b 100644 --- a/spark-sql-application/src/main/scala/org/apache/spark/sql/FlintREPL.scala +++ b/spark-sql-application/src/main/scala/org/apache/spark/sql/FlintREPL.scala @@ -68,18 +68,7 @@ object FlintREPL extends Logging with FlintJobExecutor { private val statementRunningCount = new AtomicInteger(0) def main(args: Array[String]) { - val (queryOption, resultIndex) = args.length match { - case 1 => - (None, args(0)) // Starting from OS 2.13, resultIndex is the only argument - case 2 => - ( - Some(args(0)), - args(1) - ) // Before OS 2.13, there are two arguments, the second one is resultIndex - case _ => - throw new IllegalArgumentException( - "Unsupported number of arguments. Expected 1 or 2 arguments.") - } + val (queryOption, resultIndex) = parseArgs(args) if (Strings.isNullOrEmpty(resultIndex)) { throw new IllegalArgumentException("resultIndex is not set") @@ -102,15 +91,7 @@ object FlintREPL extends Logging with FlintJobExecutor { logInfo(s"""Job type is: ${FlintSparkConf.JOB_TYPE.defaultValue.get}""") conf.set(FlintSparkConf.JOB_TYPE.key, jobType) - val query = queryOption.getOrElse { - if (jobType.equalsIgnoreCase("streaming")) { - val defaultQuery = conf.get(FlintSparkConf.QUERY.key, "") - if (defaultQuery.isEmpty) { - throw new IllegalArgumentException("Query undefined for the streaming job.") - } - defaultQuery - } else "" - } + val query = getQuery(queryOption, jobType, conf) if (jobType.equalsIgnoreCase("streaming")) { logInfo(s"""streaming query ${query}""") @@ -250,6 +231,33 @@ object FlintREPL extends Logging with FlintJobExecutor { } } + def parseArgs(args: Array[String]): (Option[String], String) = { + args.length match { + case 1 => + (None, args(0)) // Starting from OS 2.13, resultIndex is the only argument + case 2 => + ( + Some(args(0)), + args(1) + ) // Before OS 2.13, there are two arguments, the second one is resultIndex + case _ => + throw new IllegalArgumentException( + "Unsupported number of arguments. Expected 1 or 2 arguments.") + } + } + + def getQuery(queryOption: Option[String], jobType: String, conf: SparkConf): String = { + queryOption.getOrElse { + if (jobType.equalsIgnoreCase("streaming")) { + val defaultQuery = conf.get(FlintSparkConf.QUERY.key, "") + if (defaultQuery.isEmpty) { + throw new IllegalArgumentException("Query undefined for the streaming job.") + } + defaultQuery + } else "" + } + } + /** * Sets up a Flint job with exclusion checks based on the job configuration. * diff --git a/spark-sql-application/src/test/scala/org/apache/spark/sql/FlintREPLTest.scala b/spark-sql-application/src/test/scala/org/apache/spark/sql/FlintREPLTest.scala index abae546b6..421457c4e 100644 --- a/spark-sql-application/src/test/scala/org/apache/spark/sql/FlintREPLTest.scala +++ b/spark-sql-application/src/test/scala/org/apache/spark/sql/FlintREPLTest.scala @@ -30,6 +30,7 @@ import org.scalatestplus.mockito.MockitoSugar import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite} import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.trees.Origin +import org.apache.spark.sql.flint.config.FlintSparkConf import org.apache.spark.sql.types.{LongType, NullType, StringType, StructField, StructType} import org.apache.spark.sql.util.{DefaultThreadPoolFactory, MockThreadPoolFactory, MockTimeProvider, RealTimeProvider, ShutdownHookManagerTrait} import org.apache.spark.util.ThreadUtils @@ -42,6 +43,78 @@ class FlintREPLTest // By using a type alias and casting, I can bypass the type checking error. type AnyScheduledFuture = ScheduledFuture[_] + test( + "parseArgs with one argument should return None for query and the argument as resultIndex") { + val args = Array("resultIndexName") + val (queryOption, resultIndex) = FlintREPL.parseArgs(args) + queryOption shouldBe None + resultIndex shouldBe "resultIndexName" + } + + test( + "parseArgs with two arguments should return the first argument as query and the second as resultIndex") { + val args = Array("SELECT * FROM table", "resultIndexName") + val (queryOption, resultIndex) = FlintREPL.parseArgs(args) + queryOption shouldBe Some("SELECT * FROM table") + resultIndex shouldBe "resultIndexName" + } + + test( + "parseArgs with no arguments should throw IllegalArgumentException with specific message") { + val args = Array.empty[String] + val exception = intercept[IllegalArgumentException] { + FlintREPL.parseArgs(args) + } + exception.getMessage shouldBe "Unsupported number of arguments. Expected 1 or 2 arguments." + } + + test( + "parseArgs with more than two arguments should throw IllegalArgumentException with specific message") { + val args = Array("arg1", "arg2", "arg3") + val exception = intercept[IllegalArgumentException] { + FlintREPL.parseArgs(args) + } + exception.getMessage shouldBe "Unsupported number of arguments. Expected 1 or 2 arguments." + } + + test("getQuery should return query from queryOption if present") { + val queryOption = Some("SELECT * FROM table") + val jobType = "streaming" + val conf = new SparkConf() + + val query = FlintREPL.getQuery(queryOption, jobType, conf) + query shouldBe "SELECT * FROM table" + } + + test("getQuery should return default query for streaming job if queryOption is None") { + val queryOption = None + val jobType = "streaming" + val conf = new SparkConf().set(FlintSparkConf.QUERY.key, "SELECT * FROM table") + + val query = FlintREPL.getQuery(queryOption, jobType, conf) + query shouldBe "SELECT * FROM table" + } + + test( + "getQuery should throw IllegalArgumentException if queryOption is None and default query is not defined for streaming job") { + val queryOption = None + val jobType = "streaming" + val conf = new SparkConf() // Default query not set + + intercept[IllegalArgumentException] { + FlintREPL.getQuery(queryOption, jobType, conf) + }.getMessage shouldBe "Query undefined for the streaming job." + } + + test("getQuery should return empty string for non-streaming job if queryOption is None") { + val queryOption = None + val jobType = "interactive" + val conf = new SparkConf() // Default query not needed + + val query = FlintREPL.getQuery(queryOption, jobType, conf) + query shouldBe "" + } + test("createHeartBeatUpdater should update heartbeat correctly") { // Mocks val flintSessionUpdater = mock[OpenSearchUpdater]