From 510ea258293dfbf4c8db6f1b541f4483261dcd9d Mon Sep 17 00:00:00 2001 From: boeschf <48126478+boeschf@users.noreply.github.com> Date: Wed, 25 Sep 2024 10:48:19 +0200 Subject: [PATCH] use uenvs ftw --- .gitlab/cscs-ci-default.yaml | 121 +++++++++-------------------------- 1 file changed, 31 insertions(+), 90 deletions(-) diff --git a/.gitlab/cscs-ci-default.yaml b/.gitlab/cscs-ci-default.yaml index 90688b4b7..4f82c5749 100644 --- a/.gitlab/cscs-ci-default.yaml +++ b/.gitlab/cscs-ci-default.yaml @@ -2,73 +2,16 @@ include: - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml' stages: - - build_base - - build_app - - build_multiarch - test -# TARGET must be any of {daint-gpu daint-mc alps-zen2 alps-a100 alps-mi200 alps-gh200 alps-mi300a} -# eiger: 2x AMD EPYC 7742 64-Core, micro-arch: zen2 -# todi: 4x gh200 72-Core + H100, micro-arch: neoverse-v2, cuda-arch: 90 - -.base_compiler: - stage: build_base - variables: - DOCKER_BUILD_ARGS: '["COMPILER=gcc@13.2"]' - -build_base_image_x86_64: - extends: [.container-builder-cscs-zen2, .dynamic-image-name, .base_compiler] - variables: - DOCKERFILE: .gitlab/docker/Dockerfile.base - WATCH_FILECHANGES: '.gitlab/docker/Dockerfile.base' - PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/arbor/baseimg-x86_64 - CSCS_BUILD_IN_MEMORY: TRUE - CSCS_REBUILD_POLICY: "if-not-exists" - DOCKER_BUILD_ARGS: '["IMG_BASE=ghcr.io/eth-cscs/docker-ci-ext/base-containers/spack-base:spack0.21.0-ubuntu22.04-cpu", "IMG_HELPER=ghcr.io/eth-cscs/docker-ci-ext/base-containers/spack-helper:ubuntu22.04-cpu", "TARGET=alps-zen2"]' - -build_base_image_aarch64: - extends: [.container-builder-cscs-gh200, .dynamic-image-name, .base_compiler] - variables: - DOCKERFILE: .gitlab/docker/Dockerfile.base - WATCH_FILECHANGES: '.gitlab/docker/Dockerfile.base' - PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/arbor/baseimg-aarch64 - CSCS_BUILD_IN_MEMORY: TRUE - CSCS_REBUILD_POLICY: "if-not-exists" - DOCKER_BUILD_ARGS: '["IMG_BASE=ghcr.io/eth-cscs/docker-ci-ext/base-containers/spack-base:spack0.21.0-ubuntu22.04-cuda12.4.1", "IMG_HELPER=ghcr.io/eth-cscs/docker-ci-ext/base-containers/spack-helper:ubuntu22.04-cuda12.4.1", "TARGET=alps-gh200"]' - -build_app_image_x86_64: - extends: .container-builder-cscs-zen2 - stage: build_app - needs: - - job: build_base_image_x86_64 - artifacts: true - variables: - DOCKERFILE: .gitlab/docker/Dockerfile.app - PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/arbor/appimg-x86_64:$CI_COMMIT_SHORT_SHA - DOCKER_BUILD_ARGS: '["BASE_IMAGE=$BASE_IMAGE", "CXX_FLAGS=-march=znver2"]' - -build_app_image_aarch64: - extends: .container-builder-cscs-gh200 - stage: build_app - needs: - - job: build_base_image_aarch64 - artifacts: true - variables: - DOCKERFILE: .gitlab/docker/Dockerfile.app - PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/arbor/appimg-aarch64:$CI_COMMIT_SHORT_SHA - DOCKER_BUILD_ARGS: '["BASE_IMAGE=$BASE_IMAGE", "CXX_FLAGS=-mcpu=neoverse-v2 -mtune=neoverse-v2", "GPU=cuda", "GPU_ARCH=90"]' - -build_multiarch_image: - extends: .make-multiarch-image - stage: build_multiarch +.uenv_image: + stage: test + image: arbor/v0.9:latest variables: - PERSIST_IMAGE_NAME_X86_64: "$CSCS_REGISTRY_PATH/arbor/appimg-x86_64:$CI_COMMIT_SHORT_SHA" - PERSIST_IMAGE_NAME_AARCH64: "$CSCS_REGISTRY_PATH/arbor/appimg-aarch64:$CI_COMMIT_SHORT_SHA" - PERSIST_IMAGE_NAME: "$CSCS_REGISTRY_PATH/arbor/appimg:$CI_COMMIT_SHORT_SHA" + UENV_VIEW: 'develop' .test_unit: - stage: test - image: $CSCS_REGISTRY_PATH/arbor/appimg:$CI_COMMIT_SHORT_SHA + extends: .uenv_image script: - cd /arbor.src - build/bin/unit-modcc @@ -90,36 +33,34 @@ build_multiarch_image: USE_MPI: "NO" test_x86_64: - extends: [.container-runner-eiger-mc, .test_unit] + extends: [.uenv-runner-eiger-mc, .test_unit] variables: SLURM_CONSTRAINT: mc test_aarch64: - extends: [.container-runner-todi-gh200, .test_unit] + extends: [.uenv-runner-daint-gh200, .test_unit] -## distributed tests don't work yet - possible problem with the gitlab runners -#.test_distributed: -# stage: test -# image: $CSCS_REGISTRY_PATH/arbor/appimg:$CI_COMMIT_SHORT_SHA -# script: -# - cd /arbor.src -# - build/bin/unit-mpi -# - scripts/run_cpp_examples.sh -d -# variables: -# SLURM_JOB_NUM_NODES: 2 -# SLURM_CPU_BIND: "verbose,rank_ldom" -# SLURM_TIMELIMIT: "00:30:00" -# USE_MPI: "YES" -# -#test_x86_64-distributed: -# extends: [.container-runner-eiger-mc, .test_distributed] -# variables: -# SLURM_CONSTRAINT: mc -# SLURM_NTASKS_PER_NODE: 8 -# -#test_aarch64-distributed: -# extends: [.container-runner-todi-gh200, .test_distributed] -# variables: -# SLURM_GPUS_PER_NODE: 4 -# SLURM_GPUS_PER_TASK: 1 -# SLURM_NTASKS_PER_NODE: 4 +.test_distributed: + extends: .uenv_image + script: + - cd /arbor.src + - build/bin/unit-mpi + - scripts/run_cpp_examples.sh -d + variables: + SLURM_JOB_NUM_NODES: 2 + SLURM_CPU_BIND: "verbose,rank_ldom" + SLURM_TIMELIMIT: "00:30:00" + USE_MPI: "YES" + +test_x86_64-distributed: + extends: [.uenv-runner-eiger-mc, .test_distributed] + variables: + SLURM_CONSTRAINT: mc + SLURM_NTASKS_PER_NODE: 8 + +test_aarch64-distributed: + extends: [.uenv-runner-daint-gh200, .test_distributed] + variables: + SLURM_GPUS_PER_NODE: 4 + SLURM_GPUS_PER_TASK: 1 + SLURM_NTASKS_PER_NODE: 4