Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/branch-24.10' into 240513_optimi…
Browse files Browse the repository at this point in the history
…ze_expand

Signed-off-by: Hongbin Ma (Mahone) <[email protected]>
  • Loading branch information
binmahone committed Sep 6, 2024
2 parents 55eb7bd + 7de3fc4 commit 6f39db6
Show file tree
Hide file tree
Showing 621 changed files with 16,461 additions and 13,011 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/auto-merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ name: auto-merge HEAD to BASE
on:
pull_request_target:
branches:
- branch-24.06
- branch-24.08
types: [closed]

jobs:
Expand All @@ -29,13 +29,13 @@ jobs:
steps:
- uses: actions/checkout@v4
with:
ref: branch-24.06 # force to fetch from latest upstream instead of PR ref
ref: branch-24.08 # force to fetch from latest upstream instead of PR ref

- name: auto-merge job
uses: ./.github/workflows/auto-merge
env:
OWNER: NVIDIA
REPO_NAME: spark-rapids
HEAD: branch-24.06
BASE: branch-24.08
HEAD: branch-24.08
BASE: branch-24.10
AUTOMERGE_TOKEN: ${{ secrets.AUTOMERGE_TOKEN }} # use to merge PR
5 changes: 4 additions & 1 deletion .github/workflows/blossom-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,10 @@ jobs:
github.actor == 'binmahone' ||
github.actor == 'zpuller' ||
github.actor == 'pxLi' ||
github.actor == 'Feng-Jiang28'
github.actor == 'Feng-Jiang28' ||
github.actor == 'SurajAralihalli' ||
github.actor == 'jihoonson' ||
github.actor == 'ustcfy'
)
steps:
- name: Check if comment is issued by authorized person
Expand Down
176 changes: 124 additions & 52 deletions .github/workflows/mvn-verify-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,9 @@ jobs:
runs-on: ubuntu-latest
outputs:
dailyCacheKey: ${{ steps.generateCacheKey.outputs.dailyCacheKey }}
defaultSparkVersion: ${{ steps.allShimVersionsStep.outputs.defaultSparkVersion }}
sparkTailVersions: ${{ steps.allShimVersionsStep.outputs.tailVersions }}
sparkJDKVersions: ${{ steps.allShimVersionsStep.outputs.jdkVersions }}
scala213Versions: ${{ steps.allShimVersionsStep.outputs.scala213Versions }}
defaultSparkVersion: ${{ steps.all212ShimVersionsStep.outputs.defaultSparkVersion }}
sparkTailVersions: ${{ steps.all212ShimVersionsStep.outputs.tailVersions }}
sparkJDKVersions: ${{ steps.all212ShimVersionsStep.outputs.jdkVersions }}
steps:
- uses: actions/checkout@v4 # refs/pull/:prNumber/merge
- uses: actions/setup-java@v4
Expand All @@ -65,31 +64,12 @@ jobs:
restore-keys: ${{ runner.os }}-maven-
- name: populate-daily-cache
if: steps.cache.outputs.cache-hit != 'true'
env:
SCALA_VER: '2.12'
run: |
set -x
max_retry=3; delay=30; i=1
while true; do
for pom in pom.xml scala2.13/pom.xml
do
mvn ${{ env.COMMON_MVN_FLAGS }} --file $pom help:evaluate -pl dist \
-Dexpression=included_buildvers \
-DforceStdout -PnoSnapshots -q | tr -d ',' | \
xargs -n 1 bash -c \
'mvn ${{ env.COMMON_MVN_FLAGS }} --file $1 -Dbuildver=$2 de.qaware.maven:go-offline-maven-plugin:resolve-dependencies' _ $pom
# compile base versions to cache scala compiler and compiler bridge
mvn ${{ env.COMMON_MVN_FLAGS }} --file $pom \
process-test-resources -pl sql-plugin-api -am
done && break || {
if [[ $i -le $max_retry ]]; then
echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2))
else
echo "mvn command failed. Exit 1"; exit 1
fi
}
done
. .github/workflows/mvn-verify-check/populate-daily-cache.sh
- name: all shim versions
id: allShimVersionsStep
id: all212ShimVersionsStep
run: |
set -x
. jenkins/version-def.sh
Expand All @@ -113,30 +93,12 @@ jobs:
jdkHeadVersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":8}" "${SPARK_BASE_SHIM_VERSION}")
# jdk11
jdk11VersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":11}" "${SPARK_SHIM_VERSIONS_JDK11[@]}")
# jdk17
jdk17VersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":17}" "${SPARK_SHIM_VERSIONS_JDK17[@]}")
# jdk
jdkVersionArrBody=$jdkHeadVersionArrBody$jdk11VersionArrBody$jdk17VersionArrBody
jdkVersionArrBody=$jdkHeadVersionArrBody$jdk11VersionArrBody
jdkVersionArrBody=${jdkVersionArrBody:1}
jdkVersionJsonStr=$(printf {\"include\":[%s]} $jdkVersionArrBody)
echo "jdkVersions=$jdkVersionJsonStr" >> $GITHUB_OUTPUT
SCALA_BINARY_VER=2.13
. jenkins/version-def.sh
svArrBodyNoSnapshot=$(printf ",{\"spark-version\":\"%s\",\"isSnapshot\":false}" "${SPARK_SHIM_VERSIONS_NOSNAPSHOTS[@]}")
svArrBodyNoSnapshot=${svArrBodyNoSnapshot:1}
# get private artifact version
privateVer=$(mvn help:evaluate -q -pl dist -Dexpression=spark-rapids-private.version -DforceStdout)
# do not add empty snapshot versions or when private version is released one (does not include snapshot shims)
if [[ ${#SPARK_SHIM_VERSIONS_SNAPSHOTS_ONLY[@]} -gt 0 && $privateVer == *"-SNAPSHOT" ]]; then
svArrBodySnapshot=$(printf ",{\"spark-version\":\"%s\",\"isSnapshot\":true}" "${SPARK_SHIM_VERSIONS_SNAPSHOTS_ONLY[@]}")
svArrBodySnapshot=${svArrBodySnapshot:1}
svJsonStr=$(printf {\"include\":[%s]} $svArrBodyNoSnapshot,$svArrBodySnapshot)
else
svJsonStr=$(printf {\"include\":[%s]} $svArrBodyNoSnapshot)
fi
echo "scala213Versions=$svJsonStr" >> $GITHUB_OUTPUT
package-tests:
needs: cache-dependencies
Expand Down Expand Up @@ -187,27 +149,82 @@ jobs:
}
done
cache-dependencies-scala213:
runs-on: ubuntu-latest
outputs:
scala213dailyCacheKey: ${{ steps.generateCacheKey.outputs.scala213dailyCacheKey }}
scala213Versions: ${{ steps.all213ShimVersionsStep.outputs.scala213Versions }}
sparkJDK17Versions: ${{ steps.all213ShimVersionsStep.outputs.jdkVersions }}
steps:
- uses: actions/checkout@v4 # refs/pull/:prNumber/merge
- uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: 17
- name: Generate daily cache key
id: generateCacheKey
run: |
set -x
cacheKey="${{ runner.os }}-maven-scala213-${{ hashFiles('**/pom.xml') }}-${{ github.event.pull_request.base.ref }}-$(date +'%Y-%m-%d')"
echo "scala213dailyCacheKey=$cacheKey" | tee $GITHUB_ENV $GITHUB_OUTPUT
- name: Cache local Maven repository
id: cache
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ env.scala213dailyCacheKey }}
restore-keys: ${{ runner.os }}-maven-
- name: populate-daily-cache
if: steps.cache.outputs.cache-hit != 'true'
env:
SCALA_VER: '2.13'
run: |
. .github/workflows/mvn-verify-check/populate-daily-cache.sh
- name: all 213 shim verions
id: all213ShimVersionsStep
run: |
set -x
SCALA_BINARY_VER=2.13
. jenkins/version-def.sh
svArrBodyNoSnapshot=$(printf ",{\"spark-version\":\"%s\",\"isSnapshot\":false}" "${SPARK_SHIM_VERSIONS_NOSNAPSHOTS[@]}")
svArrBodyNoSnapshot=${svArrBodyNoSnapshot:1}
# get private artifact version
privateVer=$(mvn help:evaluate -q -pl dist -Dexpression=spark-rapids-private.version -DforceStdout)
svJsonStr=$(printf {\"include\":[%s]} $svArrBodyNoSnapshot)
echo "scala213Versions=$svJsonStr" >> $GITHUB_OUTPUT
# jdk17
jdk17VersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":17}" "${SPARK_SHIM_VERSIONS_JDK17_SCALA213[@]}")
jdkVersionArrBody=$jdk17VersionArrBody
jdkVersionArrBody=${jdkVersionArrBody:1}
jdkVersionJsonStr=$(printf {\"include\":[%s]} $jdkVersionArrBody)
echo "jdkVersions=$jdkVersionJsonStr" >> $GITHUB_OUTPUT
package-tests-scala213:
needs: cache-dependencies
needs: cache-dependencies-scala213
continue-on-error: ${{ matrix.isSnapshot }}
strategy:
matrix: ${{ fromJSON(needs.cache-dependencies.outputs.scala213Versions) }}
matrix: ${{ fromJSON(needs.cache-dependencies-scala213.outputs.scala213Versions) }}
fail-fast: false
runs-on: ubuntu-latest
steps:

- uses: actions/checkout@v4 # refs/pull/:prNumber/merge

- name: Setup Java and Maven Env
uses: actions/setup-java@v4
with:
distribution: adopt
java-version: 8
java-version: 17

- name: Cache local Maven repository
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ needs.cache-dependencies.outputs.dailyCacheKey }}
key: ${{ needs.cache-dependencies-scala213.outputs.scala213dailyCacheKey }}

- name: check runtime before tests
run: |
Expand All @@ -218,7 +235,7 @@ jobs:
run: |
# https://github.com/NVIDIA/spark-rapids/issues/8847
# specify expected versions
export JAVA_HOME=${JAVA_HOME_8_X64}
export JAVA_HOME=${JAVA_HOME_17_X64}
export PATH=${JAVA_HOME}/bin:${PATH}
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH"
# verify Scala 2.13 build files
Expand Down Expand Up @@ -246,8 +263,63 @@ jobs:
}
done
verify-213-modules:
needs: cache-dependencies-scala213
runs-on: ubuntu-latest
strategy:
matrix: ${{ fromJSON(needs.cache-dependencies-scala213.outputs.sparkJDK17Versions) }}
steps:
- uses: actions/checkout@v4 # refs/pull/:prNumber/merge

- name: Setup Java and Maven Env
uses: actions/setup-java@v4
with:
distribution: adopt
java-version: 17

- name: Cache local Maven repository
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ needs.cache-dependencies-scala213.outputs.scala213dailyCacheKey }}

- name: check runtime before tests
run: |
env | grep JAVA
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH"
- name: Build JDK
run: |
# https://github.com/NVIDIA/spark-rapids/issues/8847
# specify expected versions
export JAVA_HOME=${JAVA_HOME_${{ matrix.java-version }}_X64}
export PATH=${JAVA_HOME}/bin:${PATH}
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH"
# verify Scala 2.13 build files
./build/make-scala-version-build-files.sh 2.13
# verify git status
if [ -n "$(echo -n $(git status -s | grep 'scala2.13'))" ]; then
git add -N scala2.13/* && git diff 'scala2.13/*'
echo "Generated Scala 2.13 build files don't match what's in repository"
exit 1
fi
# change to Scala 2.13 Directory
cd scala2.13
# test command, will retry for 3 times if failed.
max_retry=3; delay=30; i=1
while true; do
mvn verify \
-P "individual,pre-merge,source-javadoc" -Dbuildver=${{ matrix.spark-version }} \
${{ env.COMMON_MVN_FLAGS }} && break || {
if [[ $i -le $max_retry ]]; then
echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2))
else
echo "mvn command failed. Exit 1"; exit 1
fi
}
done
verify-all-modules:
verify-all-212-modules:
needs: cache-dependencies
runs-on: ubuntu-latest
strategy:
Expand Down
40 changes: 40 additions & 0 deletions .github/workflows/mvn-verify-check/populate-daily-cache.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash

# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -x
max_retry=3; delay=30; i=1
if [[ $SCALA_VER == '2.12' ]]; then
pom='pom.xml'
elif [[ $SCALA_VER == '2.13' ]]; then
pom='scala2.13/pom.xml'
fi
while true; do
{
python build/get_buildvers.py "no_snapshots.buildvers" $pom | tr -d ',' | \
xargs -n 1 -I {} bash -c \
"mvn $COMMON_MVN_FLAGS --file $pom -Dbuildver={} de.qaware.maven:go-offline-maven-plugin:resolve-dependencies"

# compile base versions to cache scala compiler and compiler bridge
mvn $COMMON_MVN_FLAGS --file $pom \
process-test-resources -pl sql-plugin-api -am
} && break || {
if [[ $i -le $max_retry ]]; then
echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2))
else
echo "mvn command failed. Exit 1"; exit 1
fi
}
done
16 changes: 7 additions & 9 deletions .github/workflows/signoff-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,10 @@ jobs:
signoff-check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: sigoff-check job
uses: ./.github/workflows/signoff-check
env:
OWNER: NVIDIA
REPO_NAME: spark-rapids
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PULL_NUMBER: ${{ github.event.number }}
- name: signoff
uses: NVIDIA/spark-rapids-common/signoff-check@main
with:
owner: ${{ github.repository_owner }}
repo: spark-rapids
pull_number: ${{ github.event.number }}
token: ${{ secrets.GITHUB_TOKEN }}
22 changes: 0 additions & 22 deletions .github/workflows/signoff-check/Dockerfile

This file was deleted.

19 changes: 0 additions & 19 deletions .github/workflows/signoff-check/action.yml

This file was deleted.

Loading

0 comments on commit 6f39db6

Please sign in to comment.