Skip to content

Commit

Permalink
[TOREE-537] Upgrade Spark 3.3.2 (#201)
Browse files Browse the repository at this point in the history
  • Loading branch information
pan3793 authored Aug 9, 2023
1 parent e107378 commit 0629402
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 21 deletions.
13 changes: 7 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ RUN curl -sL https://deb.nodesource.com/setup_0.12 | bash - && \
npm install -g bower

# for Apache Spark demos
ENV APACHE_SPARK_VERSION 3.0.3
ENV APACHE_SPARK_VERSION 3.3.2
ENV APACHE_SPARK_CUSTOM_NAME=hadoop3

RUN apt-get -y update && \
apt-get -y install software-properties-common
Expand All @@ -46,11 +47,11 @@ RUN echo "===> install Java" && \
update-java-alternatives -s java-8-oracle

RUN cd /tmp && \
wget -q http://apache.claz.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop2.7.tgz && \
tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop2.7.tgz -C /usr/local && \
rm spark-${APACHE_SPARK_VERSION}-bin-hadoop2.7.tgz
wget -q https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz && \
tar xzf spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz -C /usr/local && \
rm spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz

RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop2.7 spark
RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME} spark

# R support
RUN apt-get update && \
Expand All @@ -61,7 +62,7 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/*

ENV SPARK_HOME /usr/local/spark
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.5-src.zip
ENV PYSPARK_PYTHON /home/main/anaconda2/envs/python3/bin/python
ENV R_LIBS_USER $SPARK_HOME/R/lib

Expand Down
11 changes: 6 additions & 5 deletions Dockerfile.toree-dev
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ FROM jupyter/all-spark-notebook
USER root

# Spark dependencies
ENV APACHE_SPARK_VERSION 3.0.3
ENV APACHE_SPARK_VERSION 3.3.2
ENV APACHE_SPARK_CUSTOM_NAME=hadoop3

RUN apt-get -y update && \
apt-get install -y --no-install-recommends openjdk-8-jdk ca-certificates-java && \
Expand All @@ -35,14 +36,14 @@ RUN apt-get -y update && \

# Installing Spark3
RUN cd /tmp && \
wget -q https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop2.7.tgz && \
tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop2.7.tgz -C /usr/local && \
rm spark-${APACHE_SPARK_VERSION}-bin-hadoop2.7.tgz
wget -q https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz && \
tar xzf spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz -C /usr/local && \
rm spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz

# Overwrite symlink
RUN cd /usr/local && \
rm spark && \
ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop2.7 spark
ln -s spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME} spark

# Remove other scala kernels
RUN cd /opt/conda/share/jupyter/kernels/ && \
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ IS_SNAPSHOT?=true
SNAPSHOT:=-SNAPSHOT
endif

APACHE_SPARK_VERSION?=3.0.3
APACHE_SPARK_VERSION?=3.3.2
SCALA_VERSION?=2.12
IMAGE?=jupyter/all-spark-notebook:latest
EXAMPLE_IMAGE?=apache/toree-examples
Expand Down
8 changes: 4 additions & 4 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ ThisBuild / version := Properties.envOrElse("VERSION", "0.0.0-dev") +
(if ((ThisBuild / isSnapshot ).value) "-SNAPSHOT" else "")
ThisBuild / isSnapshot := Properties.envOrElse("IS_SNAPSHOT","true").toBoolean
ThisBuild / organization := "org.apache.toree.kernel"
ThisBuild / crossScalaVersions := Seq("2.12.15") // https://github.com/scala/bug/issues/12475, for Spark 3.2.0
ThisBuild / crossScalaVersions := Seq("2.12.15")
ThisBuild / scalaVersion := (ThisBuild / crossScalaVersions ).value.head
ThisBuild / Dependencies.sparkVersion := {
val envVar = "APACHE_SPARK_VERSION"
val defaultVersion = "3.0.0"
val defaultVersion = "3.3.2"

Properties.envOrNone(envVar) match {
case None =>
Expand Down Expand Up @@ -58,8 +58,8 @@ ThisBuild / javacOptions ++= Seq(
"-Xlint:-options",
"-Xlint:-processing",
"-Werror", // Treat warnings as errors
"-source", "1.6",
"-target", "1.6"
"-source", "1.8",
"-target", "1.8"
)
// Options provided to forked JVMs through sbt, based on our .jvmopts file
ThisBuild / javaOptions ++= Seq(
Expand Down
2 changes: 1 addition & 1 deletion etc/kernel.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
},
"display_name": "Apache Toree (development)",
"env": {
"PYTHONPATH": "/usr/local/spark/python:/usr/local/spark/python/lib/py4j-0.10.9-src.zip",
"PYTHONPATH": "/usr/local/spark/python:/usr/local/spark/python/lib/py4j-0.10.9.5-src.zip",
"SPARK_HOME": "/usr/local/spark",
"CAPTURE_STANDARD_ERR": "true",
"MAX_INTERPRETER_THREADS": "16",
Expand Down
8 changes: 4 additions & 4 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ object Dependencies {
val coursier = "io.get-coursier" %% "coursier" % coursierVersion // Apache v2
val coursierCache = "io.get-coursier" %% "coursier-cache" % coursierVersion // Apache v2

val ivy = "org.apache.ivy" % "ivy" % "2.4.0" // Apache v2
val ivy = "org.apache.ivy" % "ivy" % "2.5.1" // Apache v2

// use the same jackson version in test than the one provided at runtime by Spark 3.0.0
val jacksonDatabind = "com.fasterxml.jackson.core" % "jackson-databind" % "2.10.0" // Apache v2
// use the same jackson version in test than the one provided at runtime by Spark 3.3.2
val jacksonDatabind = "com.fasterxml.jackson.core" % "jackson-databind" % "2.13.4.2" // Apache v2

val jeroMq = "org.zeromq" % "jeromq" % "0.5.3" // MPL v2

Expand All @@ -55,7 +55,7 @@ object Dependencies {

val scalaTest = "org.scalatest" %% "scalatest" % "3.0.8" // Apache v2

val slf4jApi = "org.slf4j" % "slf4j-api" % "1.7.30" // MIT
val slf4jApi = "org.slf4j" % "slf4j-api" % "1.7.32" // MIT

val sparkVersion = settingKey[String]("Version of Apache Spark to use in Toree") // defined in root build
val sparkCore = Def.setting{ "org.apache.spark" %% "spark-core" % sparkVersion.value } // Apache v2
Expand Down

0 comments on commit 0629402

Please sign in to comment.