Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move query from entry point to SparkConf #274

Merged
merged 3 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ object FlintSparkConf {
FlintConfig(s"spark.flint.datasource.name")
.doc("data source name")
.createOptional()
val QUERY =
FlintConfig("spark.flint.job.query")
.doc("Flint query for batch and streaming job")
.createOptional()
dai-chen marked this conversation as resolved.
Show resolved Hide resolved
val JOB_TYPE =
FlintConfig(s"spark.flint.job.type")
.doc("Flint job type. Including interactive and streaming")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ class FlintREPLITSuite extends SparkFunSuite with OpenSearchSuite with JobTest {
Map("SERVERLESS_EMR_JOB_ID" -> jobRunId, "SERVERLESS_EMR_VIRTUAL_CLUSTER_ID" -> appId))
FlintREPL.enableHiveSupport = false
FlintREPL.terminateJVM = false
FlintREPL.main(Array("select 1", resultIndex))
FlintREPL.main(Array(resultIndex))
}
futureResult.onComplete {
case Success(result) => logInfo(s"Success result: $result")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import org.opensearch.cluster.metadata.MappingMetadata
import org.opensearch.common.settings.Settings
import org.opensearch.common.xcontent.XContentType
import org.opensearch.flint.core.{FlintClient, FlintClientBuilder, FlintOptions}
import org.opensearch.flint.core.logging.CustomLogging
import org.opensearch.flint.core.metadata.FlintMetadata
import org.opensearch.flint.core.metrics.MetricConstants
import org.opensearch.flint.core.metrics.MetricsUtil.registerGauge
Expand All @@ -37,19 +38,24 @@ import org.apache.spark.sql.types.{StructField, _}
*/
object FlintJob extends Logging with FlintJobExecutor {
def main(args: Array[String]): Unit = {
CustomLogging.logInfo("Spark Job is Launching...")
// Validate command line arguments
if (args.length != 2) {
throw new IllegalArgumentException("Usage: FlintJob <query> <resultIndex>")
if (args.length != 1) {
noCharger marked this conversation as resolved.
Show resolved Hide resolved
throw new IllegalArgumentException("Usage: FlintJob <resultIndex>")
}

val Array(query, resultIndex) = args
val Array(resultIndex) = args

val conf = createSparkConf()
val jobType = conf.get("spark.flint.job.type", "batch")
logInfo(s"""Job type is: ${jobType}""")
conf.set(FlintSparkConf.JOB_TYPE.key, jobType)

val dataSource = conf.get("spark.flint.datasource.name", "")
val query = conf.get(FlintSparkConf.QUERY.key, "")
if (query.isEmpty) {
throw new IllegalArgumentException("Query undefined for the batch job.")
}
// https://github.com/opensearch-project/opensearch-spark/issues/138
/*
* To execute queries such as `CREATE SKIPPING INDEX ON my_glue1.default.http_logs_plain (`@timestamp` VALUE_SET) WITH (auto_refresh = true)`,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import org.opensearch.common.Strings
import org.opensearch.flint.app.{FlintCommand, FlintInstance}
import org.opensearch.flint.app.FlintInstance.formats
import org.opensearch.flint.core.FlintOptions
import org.opensearch.flint.core.logging.CustomLogging
import org.opensearch.flint.core.metrics.MetricConstants
import org.opensearch.flint.core.metrics.MetricsUtil.{decrementCounter, getTimerContext, incrementCounter, registerGauge, stopTimer}
import org.opensearch.flint.core.storage.{FlintReader, OpenSearchUpdater}
Expand Down Expand Up @@ -68,7 +69,8 @@ object FlintREPL extends Logging with FlintJobExecutor {
private val statementRunningCount = new AtomicInteger(0)

def main(args: Array[String]) {
val Array(query, resultIndex) = args
CustomLogging.logInfo("Spark Job is Launching...")
val Array(resultIndex) = args
if (Strings.isNullOrEmpty(resultIndex)) {
throw new IllegalArgumentException("resultIndex is not set")
}
Expand All @@ -91,6 +93,10 @@ object FlintREPL extends Logging with FlintJobExecutor {
conf.set(FlintSparkConf.JOB_TYPE.key, jobType)

if (jobType.equalsIgnoreCase("streaming")) {
val query = conf.get(FlintSparkConf.QUERY.key, "")
if (query.isEmpty) {
throw new IllegalArgumentException("Query undefined for the streaming job.")
}
logInfo(s"""streaming query ${query}""")
val streamingRunningCount = new AtomicInteger(0)
val jobOperator =
Expand Down
Loading