Skip to content

Commit

Permalink
re-introduce updated CSCS CI (#2402)
Browse files Browse the repository at this point in the history
unit tests and examples now run again on CSCS premises:
- eiger: 2x AMD EPYC 7742 (multicore)
- todi: 4x GH200 modules (gpu: arbor is compiled with cuda 12.4 and runs
on the H100)

The distributed tests are not enabled as of now (due to current problems
with the gitlab runners). Once this is fixed, I will add another PR to
enable them.

The tests run automatically for every PR to master, but only for
whitelisted users. Manual trigger: make a comment with the content
`cscs-ci run default`.

Other changes:
- use rpaths for pyarb: helps the linker to find pugixml
- mark some more external CMake variables as advanced (from units)
  • Loading branch information
boeschf authored Sep 13, 2024
1 parent e1de4be commit e6b78db
Show file tree
Hide file tree
Showing 5 changed files with 218 additions and 1 deletion.
122 changes: 122 additions & 0 deletions .gitlab/cscs-ci-default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
include:
- remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml'

stages:
- build_base
- build_app
- build_multiarch
- test

# TARGET must be any of {daint-gpu daint-mc alps-zen2 alps-a100 alps-mi200 alps-gh200 alps-mi300a}
# eiger: 2x AMD EPYC 7742 64-Core, micro-arch: zen2
# todi: 4x gh200 72-Core + H100, micro-arch: neoverse-v2, cuda-arch: 90

build_base_image_x86_64:
extends: [.container-builder-cscs-zen2, .dynamic-image-name]
stage: build_base
variables:
DOCKERFILE: .gitlab/docker/Dockerfile.base
WATCH_FILECHANGES: '.gitlab/docker/Dockerfile.base'
PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/arbor/baseimg-x86_64
CSCS_BUILD_IN_MEMORY: TRUE
CSCS_REBUILD_POLICY: "if-not-exists"
DOCKER_BUILD_ARGS: '["IMG_BASE=ghcr.io/eth-cscs/docker-ci-ext/base-containers/spack-base:spack0.21.0-ubuntu22.04-cpu", "IMG_HELPER=ghcr.io/eth-cscs/docker-ci-ext/base-containers/spack-helper:ubuntu22.04-cpu", "TARGET=alps-zen2"]'

build_base_image_aarch64:
extends: [.container-builder-cscs-gh200, .dynamic-image-name]
stage: build_base
variables:
DOCKERFILE: .gitlab/docker/Dockerfile.base
WATCH_FILECHANGES: '.gitlab/docker/Dockerfile.base'
PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/arbor/baseimg-aarch64
CSCS_BUILD_IN_MEMORY: TRUE
CSCS_REBUILD_POLICY: "if-not-exists"
DOCKER_BUILD_ARGS: '["IMG_BASE=ghcr.io/eth-cscs/docker-ci-ext/base-containers/spack-base:spack0.21.0-ubuntu22.04-cuda12.4.1", "IMG_HELPER=ghcr.io/eth-cscs/docker-ci-ext/base-containers/spack-helper:ubuntu22.04-cuda12.4.1", "TARGET=alps-gh200"]'

build_app_image_x86_64:
extends: .container-builder-cscs-zen2
stage: build_app
needs:
- job: build_base_image_x86_64
artifacts: true
variables:
DOCKERFILE: .gitlab/docker/Dockerfile.app
PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/arbor/appimg-x86_64:$CI_COMMIT_SHORT_SHA
DOCKER_BUILD_ARGS: '["BASE_IMAGE=$BASE_IMAGE", "CXX_FLAGS=-march=znver2"]'

build_app_image_aarch64:
extends: .container-builder-cscs-gh200
stage: build_app
needs:
- job: build_base_image_aarch64
artifacts: true
variables:
DOCKERFILE: .gitlab/docker/Dockerfile.app
PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/arbor/appimg-aarch64:$CI_COMMIT_SHORT_SHA
DOCKER_BUILD_ARGS: '["BASE_IMAGE=$BASE_IMAGE", "CXX_FLAGS=-mcpu=neoverse-v2 -mtune=neoverse-v2", "GPU=cuda", "GPU_ARCH=90"]'

build_multiarch_image:
extends: .make-multiarch-image
stage: build_multiarch
variables:
PERSIST_IMAGE_NAME_X86_64: "$CSCS_REGISTRY_PATH/arbor/appimg-x86_64:$CI_COMMIT_SHORT_SHA"
PERSIST_IMAGE_NAME_AARCH64: "$CSCS_REGISTRY_PATH/arbor/appimg-aarch64:$CI_COMMIT_SHORT_SHA"
PERSIST_IMAGE_NAME: "$CSCS_REGISTRY_PATH/arbor/appimg:$CI_COMMIT_SHORT_SHA"

.test_unit:
stage: test
image: $CSCS_REGISTRY_PATH/arbor/appimg:$CI_COMMIT_SHORT_SHA
script:
- cd /arbor.src
- build/bin/unit-modcc
- build/bin/unit-local
- build/bin/unit
- scripts/run_cpp_examples.sh
- python -m venv --system-site-packages /arbor.install
- source /arbor.install/bin/activate
- python -m unittest discover -v -s python
- scripts/run_python_examples.sh
- scripts/test_executables.sh
- deactivate
variables:
SLURM_JOB_NUM_NODES: 1
SLURM_NTASKS: 1
SLURM_NTASKS_PER_NODE: 1
SLURM_TIMELIMIT: "00:30:00"
SLURM_CPU_BIND: "verbose,none"
USE_MPI: "NO"

test_x86_64:
extends: [.container-runner-eiger-mc, .test_unit]
variables:
SLURM_CONSTRAINT: mc

test_aarch64:
extends: [.container-runner-todi-gh200, .test_unit]

## distributed tests don't work yet - possible problem with the gitlab runners
#.test_distributed:
# stage: test
# image: $CSCS_REGISTRY_PATH/arbor/appimg:$CI_COMMIT_SHORT_SHA
# script:
# - cd /arbor.src
# - build/bin/unit-mpi
# - scripts/run_cpp_examples.sh -d
# variables:
# SLURM_JOB_NUM_NODES: 2
# SLURM_CPU_BIND: "verbose,rank_ldom"
# SLURM_TIMELIMIT: "00:30:00"
# USE_MPI: "YES"
#
#test_x86_64-distributed:
# extends: [.container-runner-eiger-mc, .test_distributed]
# variables:
# SLURM_CONSTRAINT: mc
# SLURM_NTASKS_PER_NODE: 8
#
#test_aarch64-distributed:
# extends: [.container-runner-todi-gh200, .test_distributed]
# variables:
# SLURM_GPUS_PER_NODE: 4
# SLURM_GPUS_PER_TASK: 1
# SLURM_NTASKS_PER_NODE: 4
32 changes: 32 additions & 0 deletions .gitlab/docker/Dockerfile.app
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
ARG BASE_IMAGE
FROM $BASE_IMAGE

COPY . /arbor.src

ARG NUM_PROCS
ARG CXX_FLAGS=""
ARG GPU=none
ARG GPU_ARCH=60

RUN echo ${CXX_FLAGS}

RUN mkdir -p /arbor.src/build \
&& cd /arbor.src/build \
&& cmake .. \
-GNinja \
-DCMAKE_INSTALL_PREFIX=/arbor.install \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_TESTING=ON \
-DARB_ARCH=none \
-DARB_CXX_FLAGS_TARGET="${CXX_FLAGS}" \
-DARB_WITH_ASSERTIONS=ON \
-DARB_WITH_PROFILING=ON \
-DARB_VECTORIZE=ON \
-DARB_WITH_PYTHON=ON \
-DARB_USE_HWLOC=ON \
-DARB_WITH_MPI=ON \
-DARB_GPU=$GPU\
-DCMAKE_CUDA_ARCHITECTURES=$GPU_ARCH \
-DARB_USE_GPU_RNG=ON \
&& ninja -j${NUM_PROCS} tests examples pyarb \
&& ninja install
50 changes: 50 additions & 0 deletions .gitlab/docker/Dockerfile.base
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
ARG IMG_BASE
FROM $IMG_BASE as builder

ARG TARGET
RUN spack-install-helper --target $TARGET \
"git" \
"meson" \
"ninja" \
"cmake" \
"valgrind" \
"python" \
"hwloc" \
"boost" \
"fmt" \
"random123" \
"py-mpi4py" \
"py-sphinx" \
"py-svgwrite" \
"nlohmann-json" \
"py-pybind11" \
"py-numpy" \
"py-flake8" \
"py-black" \
"py-pytest" \
"py-seaborn" \
"py-pandas" \
"pugixml" \
"googletest"

# end of builder container, now we are ready to copy necessary files
# copy only relevant parts to the final container
ARG IMG_HELPER
FROM $IMG_HELPER

# it is important to keep the paths, otherwise your installation is broken
# all these paths are created with the above `spack-install-helper` invocation
COPY --from=builder /opt/spack-environment /opt/spack-environment
COPY --from=builder /opt/software /opt/software
COPY --from=builder /opt/._view /opt/._view
COPY --from=builder /etc/profile.d/z10_spack_environment.sh /etc/profile.d/z10_spack_environment.sh

# Some boilerplate to get all paths correctly - fix_spack_install is part of thebase image
# and makes sure that all important things are being correctly setup
RUN fix_spack_install

# Finally install software that is needed, e.g. compilers
# It is also possible to build compilers via spack and let all dependencies be handled by spack
RUN apt-get -yqq update && apt-get -yqq upgrade \
&& apt-get -yqq install build-essential gfortran \
&& rm -rf /var/lib/apt/lists/*
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS ON CACHE INTERNAL "" FORCE)
# Make CUDA support throw errors if architectures remain unclear
cmake_policy(SET CMP0104 NEW)

# Ensure CMake is aware of the policies for modern RPATH behavior
cmake_policy(SET CMP0072 NEW)

# Set release as the default build type (CMake default is debug.)

if (NOT CMAKE_BUILD_TYPE)
Expand Down Expand Up @@ -392,7 +395,7 @@ mark_as_advanced(FORCE RANDOM123_INCLUDE_DIR)
mark_as_advanced(FORCE pybind11_DIR PYBIND11_PYTHONLIBS_OVERWRITE PYBIND11_PYTHON_VERSION PYBIND11_FINDPYTHON PYBIND11_INSTALL PYBIND11_INTERNALS_VERSION PYBIND11_NOPYTHON PYBIND11_SIMPLE_GIL_MANAGEMENT PYBIND11_TEST)
mark_as_advanced(FORCE pugixml_DIR)
mark_as_advanced(FORCE fmt_DIR)
mark_as_advanced(FORCE units_DIR UNITS_BUILD_OBJECT_LIBRARY UNITS_BUILD_SHARED_LIBRARY UNITS_HEADER_ONLY UNITS_NAMESPACE)
mark_as_advanced(FORCE units_DIR UNITS_BUILD_OBJECT_LIBRARY UNITS_BUILD_SHARED_LIBRARY UNITS_HEADER_ONLY UNITS_NAMESPACE UNITS_BUILD_FUZZ_TARGETS UNITS_ENABLE_TESTS)
mark_as_advanced(FORCE tinyopt_DIR)
mark_as_advanced(FORCE CXXFEATURECHECK_DEBUG)
mark_as_advanced(FORCE CPM_DONT_CREATE_PACKAGE_LOCK CPM_DONT_UPDATE_MODULE_PATH CPM_DOWNLOAD_ALL CPM_INCLUDE_ALL_IN_PACKAGE_LOCK CPM_LOCAL_PACKAGES_ONLY CPM_SOURCE_CACHE CPM_USE_NAMED_CACHE_DIRECTORIES)
Expand Down
10 changes: 10 additions & 0 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,20 @@ set_target_properties(pyarb PROPERTIES OUTPUT_NAME _arbor)
# arbor.cpython-36m-x86_64-linux-gnu.so
set_target_properties(pyarb PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}" SUFFIX "${PYTHON_MODULE_EXTENSION}")

# Set RPATH for the installation phase
if(APPLE)
set_target_properties(pyarb PROPERTIES INSTALL_RPATH "@loader_path") # Relative RPATH for macOS
else()
set_target_properties(pyarb PROPERTIES INSTALL_RPATH "$ORIGIN") # Use relative RPATH on Linux
endif()

# This dependency has to be spelt out again, despite being added to
# pyarb_obj because CMake.
target_link_libraries(pyarb PRIVATE arbor arborenv arborio pybind11::module)

# Ensure RPATH is preserved during installation
set_target_properties(pyarb PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE)

# Add support for mpi4py if available.
if (ARB_WITH_MPI)
find_python_module(mpi4py)
Expand Down

0 comments on commit e6b78db

Please sign in to comment.