From bc07e22416e013fe01a99c871426ef12b13330cd Mon Sep 17 00:00:00 2001 From: Louis Chu Date: Thu, 7 Nov 2024 22:42:47 -0800 Subject: [PATCH] Apply shaded rules --- build.sbt | 34 ++++++++++++++++++- .../apache/spark/sql/FlintJobExecutor.scala | 9 +++-- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/build.sbt b/build.sbt index e38dafa64..b5c70c259 100644 --- a/build.sbt +++ b/build.sbt @@ -3,10 +3,11 @@ * SPDX-License-Identifier: Apache-2.0 */ import Dependencies._ +import sbtassembly.AssemblyPlugin.autoImport.ShadeRule lazy val scala212 = "2.12.14" lazy val sparkVersion = "3.5.1" -// Spark jackson version. Spark jackson-module-scala strictly check the jackson-databind version should compatible +// Spark jackson version. Spark jackson-module-scala strictly check the jackson-databind version hould compatbile // https://github.com/FasterXML/jackson-module-scala/blob/2.18/src/main/scala/com/fasterxml/jackson/module/scala/JacksonModule.scala#L59 lazy val jacksonVersion = "2.15.2" @@ -43,7 +44,35 @@ lazy val compileScalastyle = taskKey[Unit]("compileScalastyle") // Run as part of test task. lazy val testScalastyle = taskKey[Unit]("testScalastyle") +// Explanation: +// - ThisBuild / assemblyShadeRules sets the shading rules for the entire build +// - ShadeRule.rename(...) creates a rule to rename multiple package patterns +// - "shaded.@0" means prepend "shaded." to the original package name +// - .inAll applies the rule to all dependencies, not just direct dependencies +val packagesToShade = Seq( + "com.amazonaws.cloudwatch.**", + "com.fasterxml.jackson.core.**", + "com.fasterxml.jackson.dataformat.**", + "com.fasterxml.jackson.databind.**", + "com.sun.jna.**", + "com.thoughtworks.paranamer.**", + "javax.annotation.**", + "org.apache.commons.codec.**", + "org.apache.commons.logging.**", + "org.apache.hc.**", + "org.apache.http.**", + "org.glassfish.json.**", + "org.joda.time.**", + "org.reactivestreams.**", + "org.yaml.**", + "software.amazon.**" +) +ThisBuild / assemblyShadeRules := Seq( + ShadeRule.rename( + packagesToShade.map(_ -> "shaded.flint.@0"): _* + ).inAll +) lazy val commonSettings = Seq( javacOptions ++= Seq("-source", "11"), @@ -89,6 +118,9 @@ lazy val flintCore = (project in file("flint-core")) "com.amazonaws" % "aws-java-sdk-cloudwatch" % "1.12.593" exclude("com.fasterxml.jackson.core", "jackson-databind"), "software.amazon.awssdk" % "auth-crt" % "2.28.10", + "com.fasterxml.jackson.core" % "jackson-core" % jacksonVersion, + "com.fasterxml.jackson.core" % "jackson-databind" % jacksonVersion, + "org.projectlombok" % "lombok" % "1.18.30" % "provided", "org.scalactic" %% "scalactic" % "3.2.15" % "test", "org.scalatest" %% "scalatest" % "3.2.15" % "test", "org.scalatest" %% "scalatest-flatspec" % "3.2.15" % "test", diff --git a/spark-sql-application/src/main/scala/org/apache/spark/sql/FlintJobExecutor.scala b/spark-sql-application/src/main/scala/org/apache/spark/sql/FlintJobExecutor.scala index c076f9974..8e037a53e 100644 --- a/spark-sql-application/src/main/scala/org/apache/spark/sql/FlintJobExecutor.scala +++ b/spark-sql-application/src/main/scala/org/apache/spark/sql/FlintJobExecutor.scala @@ -10,7 +10,6 @@ import java.util.Locale import com.amazonaws.services.glue.model.{AccessDeniedException, AWSGlueException} import com.amazonaws.services.s3.model.AmazonS3Exception import com.fasterxml.jackson.databind.ObjectMapper -import com.fasterxml.jackson.module.scala.DefaultScalaModule import org.apache.commons.text.StringEscapeUtils.unescapeJava import org.opensearch.common.Strings import org.opensearch.flint.core.IRestHighLevelClient @@ -45,7 +44,6 @@ trait FlintJobExecutor { this: Logging => val mapper = new ObjectMapper() - mapper.registerModule(DefaultScalaModule) var currentTimeProvider: TimeProvider = new RealTimeProvider() var threadPoolFactory: ThreadPoolFactory = new DefaultThreadPoolFactory() @@ -442,9 +440,10 @@ trait FlintJobExecutor { errorSource: Option[String] = None, statusCode: Option[Int] = None): String = { val errorMessage = s"$messagePrefix: ${e.getMessage}" - val errorDetails = Map("Message" -> errorMessage) ++ - errorSource.map("ErrorSource" -> _) ++ - statusCode.map(code => "StatusCode" -> code.toString) + val errorDetails = new java.util.LinkedHashMap[String, String]() + errorDetails.put("Message", errorMessage) + errorSource.foreach(es => errorDetails.put("ErrorSource", es)) + statusCode.foreach(code => errorDetails.put("StatusCode", code.toString)) val errorJson = mapper.writeValueAsString(errorDetails)