From 41e682f8db789a764b6ba8da15e831da76f18ce6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 24 Apr 2024 20:24:04 +0000 Subject: [PATCH] Use EMR serverless bundled iceberg JAR. (#2632) Instead of downloading the JAR from Maven, the JAR in the EMR serverless root file system can be used. Signed-off-by: Adi Suresh (cherry picked from commit e578a57f845c7aff7905c3cdc7288d02fda24f56) Signed-off-by: github-actions[bot] --- .../spark/asyncquery/model/SparkSubmitParameters.java | 9 ++------- .../sql/spark/data/constants/SparkConstants.java | 3 ++- .../sql/spark/dispatcher/SparkQueryDispatcherTest.java | 4 ++-- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/SparkSubmitParameters.java b/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/SparkSubmitParameters.java index 3942c9a772..e400e0a9ea 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/SparkSubmitParameters.java +++ b/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/SparkSubmitParameters.java @@ -54,15 +54,10 @@ private Builder() { config.put( HADOOP_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY, DEFAULT_GLUE_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY); + config.put(SPARK_JARS_KEY, ICEBERG_SPARK_RUNTIME_PACKAGE); config.put( SPARK_JAR_PACKAGES_KEY, - SPARK_STANDALONE_PACKAGE - + "," - + SPARK_LAUNCHER_PACKAGE - + "," - + PPL_STANDALONE_PACKAGE - + "," - + ICEBERG_SPARK_RUNTIME_PACKAGE); + SPARK_STANDALONE_PACKAGE + "," + SPARK_LAUNCHER_PACKAGE + "," + PPL_STANDALONE_PACKAGE); config.put(SPARK_JAR_REPOSITORIES_KEY, AWS_SNAPSHOT_REPOSITORY); config.put(SPARK_DRIVER_ENV_JAVA_HOME_KEY, JAVA_HOME_LOCATION); config.put(SPARK_EXECUTOR_ENV_JAVA_HOME_KEY, JAVA_HOME_LOCATION); diff --git a/spark/src/main/java/org/opensearch/sql/spark/data/constants/SparkConstants.java b/spark/src/main/java/org/opensearch/sql/spark/data/constants/SparkConstants.java index 0a574ef730..507b774a14 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/data/constants/SparkConstants.java +++ b/spark/src/main/java/org/opensearch/sql/spark/data/constants/SparkConstants.java @@ -41,6 +41,7 @@ public class SparkConstants { public static final String HADOOP_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY = "spark.hadoop.aws.catalog.credentials.provider.factory.class"; public static final String HIVE_METASTORE_GLUE_ARN_KEY = "spark.hive.metastore.glue.role.arn"; + public static final String SPARK_JARS_KEY = "spark.jars"; public static final String SPARK_JAR_PACKAGES_KEY = "spark.jars.packages"; public static final String SPARK_JAR_REPOSITORIES_KEY = "spark.jars.repositories"; public static final String SPARK_DRIVER_ENV_JAVA_HOME_KEY = @@ -101,7 +102,7 @@ public class SparkConstants { public static final String ICEBERG_SPARK_EXTENSION = "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions"; public static final String ICEBERG_SPARK_RUNTIME_PACKAGE = - "org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.5.0"; + "/usr/share/aws/iceberg/lib/iceberg-spark3-runtime.jar"; public static final String SPARK_CATALOG_CATALOG_IMPL = "spark.sql.catalog.spark_catalog.catalog-impl"; public static final String ICEBERG_GLUE_CATALOG = "org.apache.iceberg.aws.glue.GlueCatalog"; diff --git a/spark/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java b/spark/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java index 1f250a0aea..3bec6edcdb 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java +++ b/spark/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java @@ -949,8 +949,8 @@ private String constructExpectedSparkSubmitParameterString( + " spark.hadoop.fs.s3.customAWSCredentialsProvider=com.amazonaws.emr.AssumeRoleAWSCredentialsProvider" + " --conf" + " spark.hadoop.aws.catalog.credentials.provider.factory.class=com.amazonaws.glue.catalog.metastore.STSAssumeRoleSessionCredentialsProviderFactory" - + " --conf" - + " spark.jars.packages=org.opensearch:opensearch-spark-standalone_2.12:0.3.0-SNAPSHOT,org.opensearch:opensearch-spark-sql-application_2.12:0.3.0-SNAPSHOT,org.opensearch:opensearch-spark-ppl_2.12:0.3.0-SNAPSHOT,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.5.0" + + " --conf spark.jars=/usr/share/aws/iceberg/lib/iceberg-spark3-runtime.jar --conf" + + " spark.jars.packages=org.opensearch:opensearch-spark-standalone_2.12:0.3.0-SNAPSHOT,org.opensearch:opensearch-spark-sql-application_2.12:0.3.0-SNAPSHOT,org.opensearch:opensearch-spark-ppl_2.12:0.3.0-SNAPSHOT" + " --conf" + " spark.jars.repositories=https://aws.oss.sonatype.org/content/repositories/snapshots" + " --conf"