Skip to content

Commit

Permalink
Spark versions matrix (#43)
Browse files Browse the repository at this point in the history
* Compile and test against multiple versions

* Do not mark spark as provided in examples

* Vary spark versions only in example projects

* Update actions
  • Loading branch information
vincenzobaz authored Oct 6, 2023
1 parent 48b2b50 commit bb88367
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 41 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ jobs:
publish:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2.3.4
- uses: actions/checkout@4
with:
fetch-depth: 0
- uses: olafurpg/setup-scala@v10
- uses: olafurpg/setup-scala@v14
- name: Check version policy
run: sbt versionPolicyCheck
- run: sbt ci-release
Expand Down
16 changes: 5 additions & 11 deletions .github/workflows/run.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,12 @@ jobs:
runs-on: ubuntu-20.04
steps:
- name: Checkout spark-scala3
uses: actions/checkout@v2
- uses: olafurpg/setup-scala@v10
uses: actions/checkout@v4
- uses: olafurpg/setup-scala@v14
with:
java-version: [email protected]
- name: Run tests
run: sbt test
- name: Run WordCount
timeout-minutes: 5
run: sbt "examples / runMain rdd.wordcount"
- name: Run Dataset-based WordCount
timeout-minutes: 5
run: sbt "examples / runMain rdd.wordcountSql"
- name: Run StarWars
timeout-minutes: 5
run: sbt "examples / runMain sql.StarWars"
- name: Run all main classes for all spark versions
timeout-minutes: 15
run: sbt "runAllMains"
108 changes: 80 additions & 28 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
ThisBuild / scalaVersion := "3.3.1"
import sbt.internal.ProjectMatrix

val scalaVer = "3.3.1"
ThisBuild / scalaVersion := scalaVer
ThisBuild / semanticdbEnabled := true
ThisBuild / scalacOptions ++= List(
"-Wunused:imports"
)

val sparkVersion = "3.5.0"
val sparkSql = ("org.apache.spark" %% "spark-sql" % sparkVersion).cross(
CrossVersion.for3Use2_13
)
val munit = "org.scalameta" %% "munit" % "0.7.29"

val inputDirectory = Def.settingKey[File]("")

def sparkSqlDep(ver: String) =
("org.apache.spark" %% "spark-sql" % ver).cross(CrossVersion.for3Use2_13)

// See https://github.com/apache/spark/blob/v3.3.2/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java
val unnamedJavaOptions = List(
"-XX:+IgnoreUnrecognizedVMOptions",
Expand All @@ -31,48 +33,98 @@ val unnamedJavaOptions = List(
"--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED"
)

lazy val root = project
.in(file("."))
.aggregate(encoders, udf, examples)
.settings(publish / skip := true)
.settings(publishSettings)
lazy val root = (project in file("."))
.aggregate(udf)
.aggregate(encoders)
.aggregate(examples.projectRefs: _*)
.settings(
publishSettings,
publish / skip := true
)

lazy val encoders = project
.in(file("encoders"))
lazy val encoders = (project in file("encoders"))
.settings(
name := "spark-scala3-encoders",
libraryDependencies ++= Seq(sparkSql % Provided, munit % Test),
libraryDependencies ++= Seq(
sparkSqlDep(sparkVersions.head.sparkVersion),
munit % Test
),
Test / fork := true,
Test / javaOptions ++= unnamedJavaOptions
// Test / javaOptions += "-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=1044"
)
.settings(publishSettings)

lazy val udf = project
lazy val udf = (project in file("udf"))
.in(file("udf"))
.settings(
name := "spark-scala3-udf",
libraryDependencies ++= Seq(sparkSql % Provided, munit % Test),
libraryDependencies ++= Seq(
sparkSqlDep(sparkVersions.head.sparkVersion),
munit % Test
),
Test / fork := true,
Test / javaOptions ++= unnamedJavaOptions
)
.settings(publishSettings)
.dependsOn(encoders)

lazy val examples = project
.in(file("examples"))
.enablePlugins(BuildInfoPlugin)
.dependsOn(encoders, udf)
.settings(
publish / skip := true,
inputDirectory.withRank(
KeyRanks.Invisible
) := baseDirectory.value / "input",
buildInfoKeys := Seq[BuildInfoKey](inputDirectory),
libraryDependencies ++= Seq(sparkSql),
run / fork := true,
run / javaOptions ++= unnamedJavaOptions
lazy val examples =
sparkVersionMatrix(
projectMatrix in file("examples")
)
.enablePlugins(BuildInfoPlugin)
.settings(
publish / skip := true,
inputDirectory.withRank(
KeyRanks.Invisible
) := (ThisBuild / baseDirectory).value / "examples" / "input",
buildInfoKeys := Seq[BuildInfoKey](inputDirectory),
run / fork := true,
run / javaOptions ++= unnamedJavaOptions
)

addCommandAlias(
"latestExample",
s"${examples.finder(sparkVersions.head, VirtualAxis.jvm)(scalaVer).id}"
)

// Spark versions to check. Always most recent first.
lazy val sparkVersions = List(
SparkVersionAxis("_spark35_", "spark350", "3.5.0"),
SparkVersionAxis("_spark34_", "spark341", "3.4.1"),
SparkVersionAxis("_spark33_", "spark333", "3.3.3")
)

lazy val runAllMains = taskKey[Unit]("Run all mains")
runAllMains := Def.sequential {
examples.allProjects().map(_._1).map { project =>
Def.taskDyn {
val mainClasses = (project / Compile / discoveredMainClasses).value
Def.sequential {
mainClasses.map { mainClass =>
(project / Compile / runMain).toTask(" " + mainClass)
}
}
}
}
}.value

def sparkVersionMatrix(
projectRoot: ProjectMatrix
): ProjectMatrix = {
sparkVersions.foldLeft(projectRoot) { case (acc, axis) =>
acc.customRow(
scalaVersions = Seq(scalaVer),
axisValues = Seq(axis, VirtualAxis.jvm),
_.settings(
moduleName := name.value + axis.idSuffix,
publish / skip := true,
libraryDependencies += sparkSqlDep(axis.sparkVersion)
).dependsOn(encoders, udf)
)
}
}

import xerial.sbt.Sonatype._
lazy val publishSettings = Def.settings(
Expand Down
7 changes: 7 additions & 0 deletions project/SparkVersionAxis.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import sbt._

case class SparkVersionAxis(
idSuffix: String,
directorySuffix: String,
sparkVersion: String
) extends VirtualAxis.WeakAxis {}
1 change: 1 addition & 0 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.11.0")
addSbtPlugin("com.github.sbt" % "sbt-ci-release" % "1.5.12")
addSbtPlugin("ch.epfl.scala" % "sbt-version-policy" % "2.1.3")
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.0")
addSbtPlugin("com.eed3si9n" % "sbt-projectmatrix" % "0.9.1")

0 comments on commit bb88367

Please sign in to comment.