diff --git a/build/build-in-docker b/build/build-in-docker
index 421cc1a855..49032185ba 100755
--- a/build/build-in-docker
+++ b/build/build-in-docker
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 #
-# Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -24,30 +24,27 @@ set -e
 SCRIPTDIR=$(cd $(dirname $0); pwd)
 
 LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"}
-CUDF_USE_PER_THREAD_DEFAULT_STREAM=${CUDF_USE_PER_THREAD_DEFAULT_STREAM:-ON}
 USE_GDS=${USE_GDS:-ON}
 export CMAKE_GENERATOR=${CMAKE_GENERATOR:-"Ninja"}
+# Make CUDA_VERSION consistent with the file run-in-docker
+export CUDA_VERSION=${CUDA_VERSION:-11.8.0}
+CUDA_CLASSIFIER=cuda${CUDA_VERSION%%.*}
+BUILD_FAULTINJ=${BUILD_FAULTINJ:-ON}
 
 if (( $# == 0 )); then
   echo "Usage: $0 <Maven build arguments>"
   exit 1
 fi
 
-_CUDF_CLEAN_SKIP=""
-# if ccache is enabled and libcudf.clean.skip not provided
-# by the user remove the cpp build directory
-#
-if [[ "$CCACHE_DISABLE" != "1" ]]; then
-  if [[ ! "$*" =~ " -Dlibcudf.clean.skip=" ]]; then
-    # Don't skip clean if ccache is enabled
-    # unless the user overrides
-    _CUDF_CLEAN_SKIP="-Dlibcudf.clean.skip=false"
-  fi
+# Set env for arm64 build, The possible values of 'uname -m' : [x86_64/i386/aarch64/mips/...]
+if [ "$(uname -m)" == "aarch64" ]; then
+  USE_GDS="OFF" # The GDS cuFiles RDMA libraries are not included in the arm64 CUDA toolkit.
+  BUILD_FAULTINJ="OFF" # libcupti_static.a linked by cufaultinj, does not exist in the arm64 CUDA toolkit.
 fi
 
 $SCRIPTDIR/run-in-docker mvn \
     -Dmaven.repo.local=$LOCAL_MAVEN_REPO \
-    -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=$CUDF_USE_PER_THREAD_DEFAULT_STREAM \
     -DUSE_GDS=$USE_GDS \
-    $_CUDF_CLEAN_SKIP \
+    -DBUILD_FAULTINJ=${BUILD_FAULTINJ} \
+    -Dcuda.version=$CUDA_CLASSIFIER \
     "$@"
diff --git a/build/run-in-docker b/build/run-in-docker
index 62d40aac48..81152a1d9d 100755
--- a/build/run-in-docker
+++ b/build/run-in-docker
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 #
-# Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -27,11 +27,16 @@ REPODIR=$SCRIPTDIR/..
 CUDA_VERSION=${CUDA_VERSION:-11.8.0}
 DOCKER_CMD=${DOCKER_CMD:-docker}
 DOCKER_BUILD_EXTRA_ARGS=${DOCKER_BUILD_EXTRA_ARGS:-""}
+if [ "$(uname -m)" == "aarch64" ]; then
+    DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/arm64 --build-arg CMAKE_ARCH=aarch64 $DOCKER_BUILD_EXTRA_ARGS"
+else
+    DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/amd64 --build-arg CMAKE_ARCH=x86_64 $DOCKER_BUILD_EXTRA_ARGS"
+fi
 DOCKER_RUN_EXTRA_ARGS=${DOCKER_RUN_EXTRA_ARGS:-""}
 LOCAL_CCACHE_DIR=${LOCAL_CCACHE_DIR:-"$HOME/.ccache"}
 LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"}
 
-SPARK_IMAGE_NAME="spark-rapids-jni-build:${CUDA_VERSION}-devel-centos7"
+SPARK_IMAGE_NAME="spark-rapids-jni-build:${CUDA_VERSION}-devel-rockylinux8"
 
 # ensure directories exist
 mkdir -p "$LOCAL_CCACHE_DIR" "$LOCAL_MAVEN_REPO"
@@ -74,4 +79,4 @@ $DOCKER_CMD run $DOCKER_GPU_OPTS $DOCKER_RUN_EXTRA_ARGS -u $(id -u):$(id -g) --r
   -e VERBOSE \
   $DOCKER_OPTS \
   $SPARK_IMAGE_NAME \
-  scl enable devtoolset-11 "$RUN_CMD"
+  scl enable gcc-toolset-11 "$RUN_CMD"
diff --git a/ci/Dockerfile b/ci/Dockerfile
old mode 100755
new mode 100644
index e3b703a11e..f36ede2233
--- a/ci/Dockerfile
+++ b/ci/Dockerfile
@@ -17,31 +17,29 @@
 ###
 # Build the image for spark-rapids-jni development environment.
 #
-# Arguments: CUDA_VERSION=11.8.0
+# Arguments: CUDA_VERSION=[11.X.Y, 12.X.Y], OS_RELEASE=[8, 9], TARGETPLATFORM=[linux/amd64, linux/arm64]
 #
 ###
 ARG CUDA_VERSION=11.8.0
-FROM nvidia/cuda:$CUDA_VERSION-devel-centos7
-ARG DEVTOOLSET_VERSION=11
+ARG OS_RELEASE=8
+ARG TARGETPLATFORM=linux/amd64
+# multi-platform build with: docker buildx build --platform linux/arm64,linux/amd64 <ARGS> on either amd64 or arm64 host
+# check available official arm-based docker images at https://hub.docker.com/r/nvidia/cuda/tags (OS/ARCH)
+FROM --platform=$TARGETPLATFORM nvidia/cuda:$CUDA_VERSION-devel-rockylinux$OS_RELEASE
+ARG TOOLSET_VERSION=11
 ### Install basic requirements
-RUN yum install -y centos-release-scl
-RUN yum install -y devtoolset-${DEVTOOLSET_VERSION} rh-python38 epel-release
-RUN yum install -y zlib-devel maven tar wget patch ninja-build
-# require git 2.18+ to keep consistent submodule operations
-RUN yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm && yum install -y git
-# pin urllib3<2.0 for https://github.com/psf/requests/issues/6432
-RUN scl enable rh-python38 "pip install requests 'urllib3<2.0'"
-
+RUN dnf --enablerepo=powertools install -y scl-utils gcc-toolset-${TOOLSET_VERSION} python39 zlib-devel maven tar wget patch ninja-build git
 ## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins
-RUN mkdir /usr/local/rapids && mkdir /rapids && chmod 777 /usr/local/rapids && chmod 777 /rapids
+RUN mkdir -m 777 /usr/local/rapids /rapids
 
 # 3.22.3: CUDA architecture 'native' support + flexible CMAKE_<LANG>_*_LAUNCHER for ccache
 ARG CMAKE_VERSION=3.26.4
-
-RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \
-   tar zxf cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \
-   rm cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz
-ENV PATH /usr/local/cmake-${CMAKE_VERSION}-linux-x86_64/bin:$PATH
+# default x86_64 from x86 build, aarch64 cmake for arm build
+ARG CMAKE_ARCH=x86_64
+RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \
+   tar zxf cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \
+   rm cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz
+ENV PATH /usr/local/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}/bin:$PATH
 
 # ccache for interactive builds
 ARG CCACHE_VERSION=4.6
@@ -51,7 +49,7 @@ RUN cd /tmp && wget --quiet https://github.com/ccache/ccache/releases/download/v
    cd ccache-${CCACHE_VERSION} && \
    mkdir build && \
    cd build && \
-   scl enable devtoolset-${DEVTOOLSET_VERSION} \
+   scl enable gcc-toolset-${TOOLSET_VERSION} \
       "cmake .. \
          -DCMAKE_BUILD_TYPE=Release \
          -DZSTD_FROM_INTERNET=ON \
diff --git a/ci/Dockerfile.multi b/ci/Dockerfile.multi
deleted file mode 100644
index d3b198530b..0000000000
--- a/ci/Dockerfile.multi
+++ /dev/null
@@ -1,76 +0,0 @@
-#
-# Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-###
-# JNI CI image for multi-platform build
-#
-# Arguments: CUDA_VERSION=11.8.0
-#
-###
-ARG CUDA_VERSION=11.8.0
-ARG OS_RELEASE=8
-# multi-platform build with: docker buildx build --platform linux/arm64,linux/amd64 <ARGS> on either amd64 or arm64 host
-# check available offcial arm-based docker images at https://hub.docker.com/r/nvidia/cuda/tags (OS/ARCH)
-FROM --platform=$TARGETPLATFORM nvidia/cuda:$CUDA_VERSION-devel-rockylinux$OS_RELEASE
-ARG TOOLSET_VERSION=11
-### Install basic requirements
-RUN dnf install -y scl-utils
-RUN dnf install -y gcc-toolset-${TOOLSET_VERSION} python39
-RUN dnf --enablerepo=powertools install -y zlib-devel maven tar wget patch ninja-build
-# require git 2.18+ to keep consistent submodule operations
-RUN dnf install -y git
-## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins
-RUN mkdir /usr/local/rapids && mkdir /rapids && chmod 777 /usr/local/rapids && chmod 777 /rapids
-
-# 3.22.3+: CUDA architecture 'native' support + flexible CMAKE_<LANG>_*_LAUNCHER for ccache
-ARG CMAKE_VERSION=3.26.4
-# default as arm64 release
-ARG CMAKE_ARCH=aarch64
-# aarch64 cmake for arm build
-RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \
-   tar zxf cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \
-   rm cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz
-ENV PATH /usr/local/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}/bin:$PATH
-
-# ccache for interactive builds
-ARG CCACHE_VERSION=4.6
-RUN cd /tmp && wget --quiet https://github.com/ccache/ccache/releases/download/v${CCACHE_VERSION}/ccache-${CCACHE_VERSION}.tar.gz && \
-   tar zxf ccache-${CCACHE_VERSION}.tar.gz && \
-   rm ccache-${CCACHE_VERSION}.tar.gz && \
-   cd ccache-${CCACHE_VERSION} && \
-   mkdir build && \
-   cd build && \
-   scl enable gcc-toolset-${TOOLSET_VERSION} \
-      "cmake .. \
-         -DCMAKE_BUILD_TYPE=Release \
-         -DZSTD_FROM_INTERNET=ON \
-         -DREDIS_STORAGE_BACKEND=OFF && \
-      cmake --build . --parallel 4 --target install" && \
-   cd ../.. && \
-   rm -rf ccache-${CCACHE_VERSION}
-
-## install a version of boost that is needed for arrow/parquet to work
-RUN cd /usr/local && wget --quiet https://archives.boost.io/release/1.79.0/source/boost_1_79_0.tar.gz && \
-  tar -xzf boost_1_79_0.tar.gz && \
-  rm boost_1_79_0.tar.gz && \
-  cd boost_1_79_0 && \
-  ./bootstrap.sh --prefix=/usr/local && \
-  ./b2 install --prefix=/usr/local --with-filesystem --with-system && \
-   cd /usr/local && \
-   rm -rf boost_1_79_0
-
-# disable cuda container constraints to allow running w/ elder drivers on data-center GPUs
-ENV NVIDIA_DISABLE_REQUIRE="true"
diff --git a/ci/Jenkinsfile.premerge b/ci/Jenkinsfile.premerge
index a59db1af9a..0a00eb6f1b 100644
--- a/ci/Jenkinsfile.premerge
+++ b/ci/Jenkinsfile.premerge
@@ -30,7 +30,7 @@ import ipp.blossom.*
 
 def githubHelper // blossom github helper
 def TEMP_IMAGE_BUILD = true
-def IMAGE_PREMERGE = "${common.ARTIFACTORY_NAME}/sw-spark-docker/plugin-jni:centos7-cuda11.8.0-blossom"
+def IMAGE_PREMERGE = "${common.ARTIFACTORY_NAME}/sw-spark-docker/plugin-jni:rockylinux8-cuda11.8.0-blossom"
 def cpuImage = pod.getCPUYAML(IMAGE_PREMERGE)
 def PREMERGE_DOCKERFILE = 'ci/Dockerfile'
 def PREMERGE_TAG
@@ -150,7 +150,7 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true"""
                         }
 
                         if (TEMP_IMAGE_BUILD) {
-                            PREMERGE_TAG = "centos7-cuda11.8.0-blossom-dev-${BUILD_TAG}"
+                            PREMERGE_TAG = "rockylinux8-cuda11.8.0-blossom-dev-${BUILD_TAG}"
                             IMAGE_PREMERGE = "${ARTIFACTORY_NAME}/sw-spark-docker-local/plugin-jni:${PREMERGE_TAG}"
                             docker.build(IMAGE_PREMERGE, "--network=host -f ${PREMERGE_DOCKERFILE} -t $IMAGE_PREMERGE .")
                             uploadDocker(IMAGE_PREMERGE)
@@ -212,7 +212,7 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true"""
                     container('gpu') {
                         timeout(time: 3, unit: 'HOURS') { // step only timeout for test run
                             common.resolveIncompatibleDriverIssue(this)
-                            sh 'scl enable devtoolset-11 "ci/premerge-build.sh"'
+                            sh 'scl enable gcc-toolset-11 "ci/premerge-build.sh"'
                             sh 'bash ci/fuzz-test.sh'
                         }
                     }
diff --git a/ci/submodule-sync.sh b/ci/submodule-sync.sh
index 18119dc45d..f591f73a23 100755
--- a/ci/submodule-sync.sh
+++ b/ci/submodule-sync.sh
@@ -18,7 +18,7 @@
 # NOTE:
 #     this script is for jenkins only, and should not be used for local development
 #     run with ci/Dockerfile in jenkins:
-#         scl enable devtoolset-11 rh-python38 "ci/submodule-sync.sh"
+#         scl enable gcc-toolset-11 rh-python38 "ci/submodule-sync.sh"
 
 set -ex
 
diff --git a/pom.xml b/pom.xml
index 745f8127d1..24daa4635e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -338,6 +338,11 @@
     </profile>
     <profile>
       <id>arm64</id>
+      <activation>
+        <os>
+          <arch>aarch64</arch>
+        </os>
+      </activation>
       <properties>
         <jni.classifier>${cuda.version}-arm64</jni.classifier>
       </properties>