Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unescape query from EMR spark submit parameter #306

Merged
merged 2 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ object FlintJob extends Logging with FlintJobExecutor {
conf.set(FlintSparkConf.JOB_TYPE.key, jobType)

val dataSource = conf.get("spark.flint.datasource.name", "")
val query = queryOption.getOrElse(conf.get(FlintSparkConf.QUERY.key, ""))
val query = queryOption.getOrElse(unescapeQuery(conf.get(FlintSparkConf.QUERY.key, "")))
if (query.isEmpty) {
throw new IllegalArgumentException(s"Query undefined for the ${jobType} job.")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import scala.concurrent.{ExecutionContext, Future, TimeoutException}
import scala.concurrent.duration.{Duration, MINUTES}

import com.amazonaws.services.s3.model.AmazonS3Exception
import org.apache.commons.text.StringEscapeUtils.unescapeJava
import org.opensearch.flint.core.{FlintClient, IRestHighLevelClient}
import org.opensearch.flint.core.metadata.FlintMetadata
import org.opensearch.flint.core.metrics.MetricConstants
Expand Down Expand Up @@ -361,6 +362,14 @@ trait FlintJobExecutor {
}
}

/**
* Unescape the query string which is escaped for EMR spark submit parameter parsing. Ref:
* https://github.com/opensearch-project/sql/pull/2587
*/
def unescapeQuery(query: String): String = {
unescapeJava(query)
}

def executeQuery(
spark: SparkSession,
query: String,
Expand All @@ -371,6 +380,7 @@ trait FlintJobExecutor {
val startTime = System.currentTimeMillis()
// we have to set job group in the same thread that started the query according to spark doc
spark.sparkContext.setJobGroup(queryId, "Job group for " + queryId, interruptOnCancel = true)
logInfo(s"Executing query: $query")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may require sensitive info anonymized like: https://github.com/opensearch-project/sql/blob/2649200e065dff48282dce438ceb0ee5ac39054e/sql/src/main/java/org/opensearch/sql/sql/antlr/AnonymizerListener.java#L34. Probably we can do this later since this is also helpful for analyzing query pattern.

Copy link
Collaborator

@dai-chen dai-chen Apr 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@seankao-az @noCharger could you address this in other 0.3 PR if any? Probably remove it for now?

val result: DataFrame = spark.sql(query)
// Get Data
getFormattedData(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ object FlintREPL extends Logging with FlintJobExecutor {
if (defaultQuery.isEmpty) {
throw new IllegalArgumentException("Query undefined for the streaming job.")
}
defaultQuery
unescapeQuery(defaultQuery)
} else ""
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,18 @@ class FlintREPLTest
query shouldBe "SELECT * FROM table"
}

test(
"getQuery should return unescaped default query for streaming job if queryOption is None") {
val queryOption = None
val jobType = "streaming"
val conf = new SparkConf().set(
FlintSparkConf.QUERY.key,
"SELECT \\\"1\\\" UNION SELECT '\\\"1\\\"' UNION SELECT \\\"\\\\\\\"1\\\\\\\"\\\"")

val query = FlintREPL.getQuery(queryOption, jobType, conf)
query shouldBe "SELECT \"1\" UNION SELECT '\"1\"' UNION SELECT \"\\\"1\\\"\""
}

test(
"getQuery should throw IllegalArgumentException if queryOption is None and default query is not defined for streaming job") {
val queryOption = None
Expand Down
Loading