-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add some instructions to use Spark EMR docker image (#965)
* Add some instructions to use Spark EMR docker image * Also included some sample files Signed-off-by: Norman Jordan <[email protected]> * Added instructions for using Bitnami Spark images Signed-off-by: Norman Jordan <[email protected]> * Added docker compose files for EMR Signed-off-by: Norman Jordan <[email protected]> --------- Signed-off-by: Norman Jordan <[email protected]>
- Loading branch information
1 parent
62b6c5f
commit 16fbfea
Showing
16 changed files
with
647 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
MASTER_UI_PORT=8080 | ||
MASTER_PORT=7077 | ||
UI_PORT=4040 | ||
PPL_JAR=../../ppl-spark-integration/target/scala-2.12/ppl-spark-integration-assembly-0.7.0-SNAPSHOT.jar |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
services: | ||
spark: | ||
image: bitnami/spark:3.5.3 | ||
ports: | ||
- "${MASTER_UI_PORT:-8080}:8080" | ||
- "${MASTER_PORT:-7077}:7077" | ||
- "${UI_PORT:-4040}:4040" | ||
environment: | ||
- SPARK_MODE=master | ||
- SPARK_RPC_AUTHENTICATION_ENABLED=no | ||
- SPARK_RPC_ENCRYPTION_ENABLED=no | ||
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no | ||
- SPARK_SSL_ENABLED=no | ||
- SPARK_PUBLIC_DNS=localhost | ||
volumes: | ||
- type: bind | ||
source: ./spark-defaults.conf | ||
target: /opt/bitnami/spark/conf/spark-defaults.conf | ||
- type: bind | ||
source: $PPL_JAR | ||
target: /opt/bitnami/spark/jars/ppl-spark-integration.jar | ||
|
||
spark-worker: | ||
image: bitnami/spark:3.5.3 | ||
environment: | ||
- SPARK_MODE=worker | ||
- SPARK_MASTER_URL=spark://spark:7077 | ||
- SPARK_WORKER_MEMORY=${WORKER_MEMORY:-1G} | ||
- SPARK_WORKER_CORES=${WORKER_CORES:-1} | ||
- SPARK_RPC_AUTHENTICATION_ENABLED=no | ||
- SPARK_RPC_ENCRYPTION_ENABLED=no | ||
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no | ||
- SPARK_SSL_ENABLED=no | ||
- SPARK_PUBLIC_DNS=localhost | ||
volumes: | ||
- type: bind | ||
source: ./spark-defaults.conf | ||
target: /opt/bitnami/spark/conf/spark-defaults.conf | ||
- type: bind | ||
source: $PPL_JAR | ||
target: /opt/bitnami/spark/jars/ppl-spark-integration.jar |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||
# contributor license agreements. See the NOTICE file distributed with | ||
# this work for additional information regarding copyright ownership. | ||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
# (the "License"); you may not use this file except in compliance with | ||
# the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
# Default system properties included when running spark-submit. | ||
# This is useful for setting default environmental settings. | ||
|
||
# Example: | ||
# spark.master spark://master:7077 | ||
# spark.eventLog.enabled true | ||
# spark.eventLog.dir hdfs://namenode:8021/directory | ||
# spark.serializer org.apache.spark.serializer.KryoSerializer | ||
# spark.driver.memory 5g | ||
# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" | ||
spark.sql.extensions org.opensearch.flint.spark.FlintPPLSparkExtensions | ||
spark.sql.catalog.dev org.apache.spark.opensearch.catalog.OpenSearchCatalog |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
PPL_JAR=../../ppl-spark-integration/target/scala-2.12/ppl-spark-integration-assembly-0.7.0-SNAPSHOT.jar |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
services: | ||
spark-emr: | ||
image: public.ecr.aws/emr-serverless/spark/emr-7.5.0:20241125 | ||
volumes: | ||
- type: bind | ||
source: ./logging-conf | ||
target: /var/loggingConfiguration/spark | ||
- type: bind | ||
source: ../spark-sample-app/target/scala-2.12 | ||
target: /app | ||
- type: bind | ||
source: ./spark-conf | ||
target: /etc/spark/conf | ||
- type: bind | ||
source: ${PPL_JAR} | ||
target: /usr/lib/spark/jars/ppl-spark-integration.jar | ||
command: driver --class MyApp /app/myapp_2.12-1.0.jar |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
# Do nothing as default logging is sufficient |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
# Do nothing as default logging is sufficient |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
<?xml version="1.0"?> | ||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | ||
|
||
<!-- Licensed to the Apache Software Foundation (ASF) under one or more --> | ||
<!-- contributor license agreements. See the NOTICE file distributed with --> | ||
<!-- this work for additional information regarding copyright ownership. --> | ||
<!-- The ASF licenses this file to You under the Apache License, Version 2.0 --> | ||
<!-- (the "License"); you may not use this file except in compliance with --> | ||
<!-- the License. You may obtain a copy of the License at --> | ||
<!-- --> | ||
<!-- http://www.apache.org/licenses/LICENSE-2.0 --> | ||
<!-- --> | ||
<!-- Unless required by applicable law or agreed to in writing, software --> | ||
<!-- distributed under the License is distributed on an "AS IS" BASIS, --> | ||
<!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --> | ||
<!-- See the License for the specific language governing permissions and --> | ||
<!-- limitations under the License. --> | ||
|
||
<configuration> | ||
|
||
<property> | ||
<name>hive.metastore.connect.retries</name> | ||
<value>15</value> | ||
</property> | ||
</configuration> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||
# contributor license agreements. See the NOTICE file distributed with | ||
# this work for additional information regarding copyright ownership. | ||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
# (the "License"); you may not use this file except in compliance with | ||
# the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
# This property will be overridden for JVMs running inside YARN containers. | ||
# Other log4j configurations may reference the property, for example, in order to | ||
# cause a log file to appear in the usual log directory for the YARN container, | ||
# so that LogPusher will upload it to S3. The following provides a default value | ||
# to be used for this property such that logs are still written to a valid location | ||
# even for Spark processes run *outside* of a YARN container (e.g., a Spark | ||
# driver run in client deploy-mode). | ||
spark.yarn.app.container.log.dir=/var/log/spark/user/${user.name} | ||
|
||
# Set everything to be logged to the console | ||
rootLogger.level = info | ||
rootLogger.appenderRef.stdout.ref = console | ||
|
||
appender.console.type = Console | ||
appender.console.name = console | ||
appender.console.target = SYSTEM_ERR | ||
appender.console.layout.type = PatternLayout | ||
appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n | ||
|
||
# Set the default spark-shell/spark-sql log level to WARN. When running the | ||
# spark-shell/spark-sql, the log level for these classes is used to overwrite | ||
# the root logger's log level, so that the user can have different defaults | ||
# for the shell and regular Spark apps. | ||
logger.repl.name = org.apache.spark.repl.Main | ||
logger.repl.level = warn | ||
|
||
logger.thriftserver.name = org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver | ||
logger.thriftserver.level = warn | ||
|
||
# Settings to quiet third party logs that are too verbose | ||
logger.jetty1.name = org.sparkproject.jetty | ||
logger.jetty1.level = warn | ||
logger.jetty2.name = org.sparkproject.jetty.util.component.AbstractLifeCycle | ||
logger.jetty2.level = error | ||
logger.replexprTyper.name = org.apache.spark.repl.SparkIMain$exprTyper | ||
logger.replexprTyper.level = info | ||
logger.replSparkILoopInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter | ||
logger.replSparkILoopInterpreter.level = info | ||
logger.parquet1.name = org.apache.parquet | ||
logger.parquet1.level = error | ||
logger.parquet2.name = parquet | ||
logger.parquet2.level = error | ||
logger.hudi.name = org.apache.hudi | ||
logger.hudi.level = warn | ||
|
||
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support | ||
logger.RetryingHMSHandler.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler | ||
logger.RetryingHMSHandler.level = fatal | ||
logger.FunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry | ||
logger.FunctionRegistry.level = error | ||
|
||
# For deploying Spark ThriftServer | ||
# SPARK-34128: Suppress undesirable TTransportException warnings involved in THRIFT-4805 | ||
appender.console.filter.1.type = RegexFilter | ||
appender.console.filter.1.regex = .*Thrift error occurred during processing of message.* | ||
appender.console.filter.1.onMatch = deny | ||
appender.console.filter.1.onMismatch = neutral |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||
# contributor license agreements. See the NOTICE file distributed with | ||
# this work for additional information regarding copyright ownership. | ||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
# (the "License"); you may not use this file except in compliance with | ||
# the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
spark.driver.extraClassPath /usr/lib/livy/rsc-jars/*:/usr/lib/livy/repl_2.12-jars/*:/usr/lib/hadoop-lzo/lib/*:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/usr/share/aws/emr/goodies/lib/emr-serverless-spark-goodies.jar:/usr/share/aws/emr/security/conf:/usr/share/aws/emr/security/lib/*:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/usr/share/aws/sagemaker-spark-sdk/lib/sagemaker-spark-sdk.jar:/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar:/docker/usr/lib/hadoop-lzo/lib/*:/docker/usr/lib/hadoop/hadoop-aws.jar:/docker/usr/share/aws/aws-java-sdk/*:/docker/usr/share/aws/emr/emrfs/conf:/docker/usr/share/aws/emr/emrfs/lib/*:/docker/usr/share/aws/emr/emrfs/auxlib/*:/docker/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/docker/usr/share/aws/emr/security/conf:/docker/usr/share/aws/emr/security/lib/*:/docker/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/docker/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/docker/usr/share/aws/sagemaker-spark-sdk/lib/sagemaker-spark-sdk.jar:/docker/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar:/usr/share/aws/redshift/jdbc/RedshiftJDBC.jar:/usr/share/aws/redshift/spark-redshift/lib/*:/usr/share/aws/iceberg/lib/iceberg-emr-common.jar:/usr/share/aws/iceberg/lib/iceberg-spark3-runtime.jar | ||
spark.driver.extraLibraryPath /usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native:/docker/usr/lib/hadoop/lib/native:/docker/usr/lib/hadoop-lzo/lib/native | ||
spark.executor.extraClassPath /usr/lib/hadoop-lzo/lib/*:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/usr/share/aws/emr/goodies/lib/emr-serverless-spark-goodies.jar:/usr/share/aws/emr/security/conf:/usr/share/aws/emr/security/lib/*:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/usr/share/aws/sagemaker-spark-sdk/lib/sagemaker-spark-sdk.jar:/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar:/docker/usr/lib/hadoop-lzo/lib/*:/docker/usr/lib/hadoop/hadoop-aws.jar:/docker/usr/share/aws/aws-java-sdk/*:/docker/usr/share/aws/emr/emrfs/conf:/docker/usr/share/aws/emr/emrfs/lib/*:/docker/usr/share/aws/emr/emrfs/auxlib/*:/docker/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/docker/usr/share/aws/emr/security/conf:/docker/usr/share/aws/emr/security/lib/*:/docker/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/docker/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/docker/usr/share/aws/sagemaker-spark-sdk/lib/sagemaker-spark-sdk.jar:/docker/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar:/usr/share/aws/redshift/jdbc/RedshiftJDBC.jar:/usr/share/aws/redshift/spark-redshift/lib/*:/usr/share/aws/iceberg/lib/iceberg-emr-common.jar:/usr/share/aws/iceberg/lib/iceberg-spark3-runtime.jar | ||
spark.executor.extraLibraryPath /usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native:/docker/usr/lib/hadoop/lib/native:/docker/usr/lib/hadoop-lzo/lib/native | ||
spark.eventLog.enabled true | ||
spark.eventLog.dir file:///var/log/spark/apps | ||
spark.history.fs.logDirectory file:///var/log/spark/apps | ||
spark.history.ui.port 18080 | ||
spark.blacklist.decommissioning.enabled true | ||
spark.blacklist.decommissioning.timeout 1h | ||
spark.resourceManager.cleanupExpiredHost true | ||
spark.stage.attempt.ignoreOnDecommissionFetchFailure true | ||
spark.decommissioning.timeout.threshold 20 | ||
spark.files.fetchFailure.unRegisterOutputOnHost true | ||
spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version.emr_internal_use_only.EmrFileSystem 2 | ||
spark.hadoop.mapreduce.fileoutputcommitter.cleanup-failures.ignored.emr_internal_use_only.EmrFileSystem true | ||
spark.hadoop.fs.s3.getObject.initialSocketTimeoutMilliseconds 2000 | ||
spark.sql.parquet.output.committer.class com.amazon.emr.committer.EmrOptimizedSparkSqlParquetOutputCommitter | ||
spark.sql.parquet.fs.optimized.committer.optimization-enabled true | ||
spark.sql.emr.internal.extensions com.amazonaws.emr.spark.EmrSparkSessionExtensions | ||
spark.executor.memory 14G | ||
spark.executor.cores 4 | ||
spark.driver.memory 14G | ||
spark.driver.cores 4 | ||
spark.executor.defaultJavaOptions -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+UseParallelGC -XX:InitiatingHeapOccupancyPercent=70 -XX:OnOutOfMemoryError='kill -9 %p' | ||
spark.driver.defaultJavaOptions -XX:OnOutOfMemoryError='kill -9 %p' | ||
spark.hadoop.mapreduce.output.fs.optimized.committer.enabled true | ||
|
||
spark.master custom:emr-serverless | ||
spark.submit.deployMode client | ||
spark.submit.customResourceManager.submit.class org.apache.spark.deploy.emrserverless.submit.EmrServerlessClientApplication | ||
spark.hadoop.fs.defaultFS file:/// | ||
spark.dynamicAllocation.enabled true | ||
spark.dynamicAllocation.shuffleTracking.enabled true | ||
spark.hadoop.fs.s3.customAWSCredentialsProvider com.amazonaws.auth.DefaultAWSCredentialsProviderChain | ||
spark.authenticate true | ||
spark.ui.enabled false | ||
spark.ui.custom.executor.log.url /logs/{{CONTAINER_ID}}/{{FILE_NAME}}.gz | ||
|
||
spark.emr-serverless.client.create.batch.size 100 | ||
spark.emr-serverless.client.describe.batch.size 100 | ||
spark.emr-serverless.client.release.batch.size 100 | ||
spark.dynamicAllocation.initialExecutors 3 | ||
spark.dynamicAllocation.minExecutors 0 | ||
spark.executor.instances 3 | ||
spark.hadoop.fs.s3a.aws.credentials.provider software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider | ||
spark.sql.hive.metastore.sharedPrefixes software.amazon.awssdk.services.dynamodb | ||
spark.sql.legacy.createHiveTableByDefault false | ||
spark.sql.extensions org.opensearch.flint.spark.FlintPPLSparkExtensions | ||
spark.sql.catalog.dev org.apache.spark.opensearch.catalog.OpenSearchCatalog |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
#!/usr/bin/env bash | ||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||
# contributor license agreements. See the NOTICE file distributed with | ||
# this work for additional information regarding copyright ownership. | ||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
# (the "License"); you may not use this file except in compliance with | ||
# the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
export SPARK_HOME=${SPARK_HOME:-/usr/lib/spark} | ||
export SPARK_LOG_DIR=${SPARK_LOG_DIR:-/var/log/spark} | ||
export HADOOP_HOME=${HADOOP_HOME:-/usr/lib/hadoop} | ||
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop/conf} | ||
export HIVE_CONF_DIR=${HIVE_CONF_DIR:-/etc/hive/conf} | ||
|
||
export SPARK_MASTER_PORT=7077 | ||
export SPARK_MASTER_IP=$STANDALONE_SPARK_MASTER_HOST | ||
export SPARK_MASTER_WEBUI_PORT=8080 | ||
|
||
export SPARK_WORKER_DIR=${SPARK_WORKER_DIR:-/var/run/spark/work} | ||
export SPARK_WORKER_PORT=7078 | ||
export SPARK_WORKER_WEBUI_PORT=8081 | ||
|
||
export HIVE_SERVER2_THRIFT_BIND_HOST=0.0.0.0 | ||
export HIVE_SERVER2_THRIFT_PORT=10001 | ||
|
||
|
||
export SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -XX:OnOutOfMemoryError='kill -9 %p'" | ||
export PYSPARK_PYTHON=${PYSPARK_PYTHON:-/usr/bin/python3} | ||
export PYSPARK_DRIVER_PYTHON=${PYSPARK_DRIVER_PYTHON:-/usr/bin/python3} | ||
|
||
export AWS_STS_REGIONAL_ENDPOINTS=regional |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
name := "MyApp" | ||
|
||
version := "1.0" | ||
|
||
scalaVersion := "2.12.20" | ||
|
||
libraryDependencies += "org.apache.spark" %% "spark-sql" % "3.5.3" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
import org.apache.spark.sql.SparkSession | ||
|
||
object MyApp { | ||
def main(args: Array[String]): Unit = { | ||
var spark = SparkSession.builder() | ||
.master("local[1]") | ||
.appName("MyApp") | ||
.getOrCreate(); | ||
|
||
println("APP Name :" + spark.sparkContext.appName); | ||
println("Deploy Mode :" + spark.sparkContext.deployMode); | ||
println("Master :" + spark.sparkContext.master); | ||
|
||
spark.sql("CREATE table foo (id int, name varchar(100))").show() | ||
println(">>> Table created") | ||
spark.sql("SELECT * FROM foo").show() | ||
println(">>> SQL query of table completed") | ||
|
||
spark.sql("source=foo | fields id").show() | ||
println(">>> PPL query of table completed") | ||
} | ||
} |
Oops, something went wrong.