From aa39465188c6c0c29ebac0fd3e315d17473dcfee Mon Sep 17 00:00:00 2001 From: Stephen Sachs Date: Thu, 21 Mar 2024 20:45:05 +0100 Subject: [PATCH] Re enable aws pcluster buildcache stack (#38931) * Changes to re-enable aws-pcluster pipelines - Use compilers from pre-installed spack store such that compiler path relocation works when downloading from buildcache. - Install gcc from hash so there is no risk of building gcc from source in pipleine. - `packages.yam` files are now part of the pipelines. - No more eternal `postinstall.sh`. The necessary steps are in `setup=pcluster.sh` and will be version controlled within this repo. - Re-enable pipelines. * Add and * Debugging output & mv skylake -> skylake_avx512 * Explicilty check for packages * Handle case with no intel compiler * compatibility when using setup-pcluster.sh on a pre-installed cluster. * Disable palace as parser cannot read require clause at the moment * ifort cannot build superlu in buildcache `ifort` is unable to handle such long file names as used when cmake compiles test programs inside build cache. * Fix spack commit for intel compiler installation * Need to fetch other commits before using them * fix style * Add TODO * Update packages.yaml to not use 'compiler:', 'target:' or 'provider:' Synchronize with changes in https://github.com/spack/spack-configs/blob/main/AWS/parallelcluster/ * Use Intel compiler from later version (orig commit no longer found) * Use envsubst to deal with quoted newlines This is cleaner than the `eval` command used. * Need to fetch tags for checkout on version number * Intel compiler needs to be from version that has compatible DB * Install intel compiler with commit that has DB ver 7 * Decouple the intel compiler installation from current commit - Use a completely different spack installation such that this current pipeline commit remains untouched. - Make the script suceed even if the compiler installation fails (e.g. because the Database version has been updated) - Make the install targets fall back to gcc in case the compiler did not install correctly. * Use generic target for x86_64_vX There is no way to provision a skylake/icelake/zen runner. They are all in the same pools under x86_64_v3 and x86_64_v4. * Find the intel compiler in the current spack installation * Remove SPACK_TARGET_ARCH * Fix virtual package index & use package.yaml for intel compiler * Use only one stack & pipeline per generic architecture * Fix yaml format * Cleanup typos * Include fix for ifx.cfg to get the right gcc toolchain when linking * [removeme] Adding timeout to debug hang in make (palace) * Revert "[removeme] Adding timeout to debug hang in make (palace)" This reverts commit fee8a01580489a4ea364368459e9353b46d0d7e2. * palace x86_64_v4 gets stuck when compiling try newer oneapi * Update comment * Use the latest container image * Update gcc_hashes to match new container * Use only one tag providing tags per extends call Also removed an unnecessary tag. * Move generic setup script out of individual stack * Cleanup from last commit * Enable checking signature for packages available on the container * Remove commented packages / Add comment for palace * Enable openmpi@5 which needs pmix>3 * don't look for intel compiler on aarch64 --- .../gitlab/cloud_pipelines/.gitlab-ci.yml | 132 +++++------------- .../configs/linux/neoverse_n1/ci.yaml | 7 - .../configs/linux/neoverse_v1/ci.yaml | 3 - .../configs/linux/skylake_avx512/ci.yaml | 11 -- .../linux/{icelake => x86_64_v4}/ci.yaml | 3 - .../scripts/pcluster/setup-pcluster.sh | 130 +++++++++++++++++ .../stacks/aws-pcluster-icelake/spack.yaml | 56 -------- .../aws-pcluster-neoverse_n1/spack.yaml | 58 -------- .../aws-pcluster-neoverse_v1/packages.yaml | 64 +++++++++ .../aws-pcluster-neoverse_v1/spack.yaml | 39 +----- .../stacks/aws-pcluster-skylake/spack.yaml | 56 -------- .../aws-pcluster-x86_64_v4/packages.yaml | 110 +++++++++++++++ .../stacks/aws-pcluster-x86_64_v4/spack.yaml | 32 +++++ 13 files changed, 381 insertions(+), 320 deletions(-) delete mode 100644 share/spack/gitlab/cloud_pipelines/configs/linux/neoverse_n1/ci.yaml delete mode 100644 share/spack/gitlab/cloud_pipelines/configs/linux/skylake_avx512/ci.yaml rename share/spack/gitlab/cloud_pipelines/configs/linux/{icelake => x86_64_v4}/ci.yaml (87%) create mode 100755 share/spack/gitlab/cloud_pipelines/scripts/pcluster/setup-pcluster.sh delete mode 100644 share/spack/gitlab/cloud_pipelines/stacks/aws-pcluster-icelake/spack.yaml delete mode 100644 share/spack/gitlab/cloud_pipelines/stacks/aws-pcluster-neoverse_n1/spack.yaml create mode 100644 share/spack/gitlab/cloud_pipelines/stacks/aws-pcluster-neoverse_v1/packages.yaml delete mode 100644 share/spack/gitlab/cloud_pipelines/stacks/aws-pcluster-skylake/spack.yaml create mode 100644 share/spack/gitlab/cloud_pipelines/stacks/aws-pcluster-x86_64_v4/packages.yaml create mode 100644 share/spack/gitlab/cloud_pipelines/stacks/aws-pcluster-x86_64_v4/spack.yaml diff --git a/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml b/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml index 19169ae79d463c..641bde58d8fc47 100644 --- a/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml +++ b/share/spack/gitlab/cloud_pipelines/.gitlab-ci.yml @@ -39,20 +39,10 @@ default: SPACK_TARGET_PLATFORM: "linux" SPACK_TARGET_ARCH: "x86_64_v3" -.linux_skylake: +.linux_x86_64_v4: variables: SPACK_TARGET_PLATFORM: "linux" - SPACK_TARGET_ARCH: "skylake_avx512" - -.linux_icelake: - variables: - SPACK_TARGET_PLATFORM: "linux" - SPACK_TARGET_ARCH: "icelake" - -.linux_neoverse_n1: - variables: - SPACK_TARGET_PLATFORM: "linux" - SPACK_TARGET_ARCH: "neoverse_n1" + SPACK_TARGET_ARCH: "x86_64_v4" .linux_neoverse_v1: variables: @@ -815,104 +805,58 @@ deprecated-ci-build: job: deprecated-ci-generate ######################################## -# AWS PCLUSTER +# AWS ParallelCluster ######################################## -.aws-pcluster-generate-image: - image: { "name": "ghcr.io/spack/pcluster-amazonlinux-2:v2023-05-25", "entrypoint": [""] } - .aws-pcluster-generate: + image: { "name": "ghcr.io/spack/pcluster-amazonlinux-2:v2024-01-29", "entrypoint": [""] } before_script: - # Use gcc from local container buildcache + # Use gcc from pre-installed spack store - - . "./share/spack/setup-env.sh" - - . /etc/profile.d/modules.sh - - spack buildcache rebuild-index /bootstrap/local-cache/ - - spack mirror add local-cache /bootstrap/local-cache - - spack gpg trust /bootstrap/public-key - - cd "${CI_PROJECT_DIR}" && curl -sOL https://raw.githubusercontent.com/spack/spack-configs/main/AWS/parallelcluster/postinstall.sh - - sed -i -e "s/spack arch -t/echo ${SPACK_TARGET_ARCH}/g" postinstall.sh - - sed -i.bkp s/"spack install gcc"/"spack install --cache-only --reuse gcc"/ postinstall.sh - - diff postinstall.sh postinstall.sh.bkp || echo Done - - /bin/bash postinstall.sh -fg - - spack config --scope site add "packages:all:target:[${SPACK_TARGET_ARCH}]" - after_script: - - - mv "${CI_PROJECT_DIR}/postinstall.sh" "${CI_PROJECT_DIR}/jobs_scratch_dir/" - -# Icelake (one pipeline per target) -.aws-pcluster-icelake: - variables: - SPACK_CI_STACK_NAME: aws-pcluster-icelake - -# aws-pcluster-generate-icelake: -# extends: [ ".linux_icelake", ".aws-pcluster-icelake", ".generate-x86_64", ".tags-x86_64_v4", ".aws-pcluster-generate", ".aws-pcluster-generate-image" ] - -# aws-pcluster-build-icelake: -# extends: [ ".linux_icelake", ".aws-pcluster-icelake", ".build" ] -# trigger: -# include: -# - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml -# job: aws-pcluster-generate-icelake -# strategy: depend -# needs: -# - artifacts: True -# job: aws-pcluster-generate-icelake - -# Skylake_avx512 (one pipeline per target) -.aws-pcluster-skylake: - variables: - SPACK_CI_STACK_NAME: aws-pcluster-skylake - -# aws-pcluster-generate-skylake: -# extends: [ ".linux_skylake", ".aws-pcluster-skylake", ".generate-x86_64", ".tags-x86_64_v4", ".aws-pcluster-generate", ".aws-pcluster-generate-image" ] - -# aws-pcluster-build-skylake: -# extends: [ ".linux_skylake", ".aws-pcluster-skylake", ".build" ] -# trigger: -# include: -# - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml -# job: aws-pcluster-generate-skylake -# strategy: depend -# needs: -# - artifacts: True -# job: aws-pcluster-generate-skylake + - . "/etc/profile.d/modules.sh" + - diff -q "/bootstrap/cloud_pipelines-config.yaml" "share/spack/gitlab/cloud_pipelines/configs/config.yaml" || echo "WARNING Install tree might have changed. You need to rebuild the pcluster-amazonlinux-2 container in spack/gitlab-runners." + - cp "share/spack/gitlab/cloud_pipelines/configs/config.yaml" "etc/spack/" + - /bin/bash "${SPACK_ROOT}/share/spack/gitlab/cloud_pipelines/scripts/pcluster/setup-pcluster.sh" + - rm "etc/spack/config.yaml" -# Neoverse_n1 (one pipeline per target) -.aws-pcluster-neoverse_n1: +# X86_64_V4 (one pipeline per target) +.aws-pcluster-x86_64_v4: variables: - SPACK_CI_STACK_NAME: aws-pcluster-neoverse_n1 + SPACK_CI_STACK_NAME: aws-pcluster-x86_64_v4 -# aws-pcluster-generate-neoverse_n1: -# extends: [ ".linux_neoverse_n1", ".aws-pcluster-neoverse_n1", ".generate-aarch64", ".aws-pcluster-generate", ".aws-pcluster-generate-image" ] +aws-pcluster-generate-x86_64_v4: + extends: [ ".linux_x86_64_v4", ".aws-pcluster-x86_64_v4", ".generate-base", ".tags-x86_64_v4", ".aws-pcluster-generate"] -# aws-pcluster-build-neoverse_n1: -# extends: [ ".linux_neoverse_n1", ".aws-pcluster-neoverse_n1", ".build" ] -# trigger: -# include: -# - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml -# job: aws-pcluster-generate-neoverse_n1 -# strategy: depend -# needs: -# - artifacts: True -# job: aws-pcluster-generate-neoverse_n1 +aws-pcluster-build-x86_64_v4: + extends: [ ".linux_x86_64_v4", ".aws-pcluster-x86_64_v4", ".build" ] + trigger: + include: + - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml + job: aws-pcluster-generate-x86_64_v4 + strategy: depend + needs: + - artifacts: True + job: aws-pcluster-generate-x86_64_v4 # Neoverse_v1 (one pipeline per target) .aws-pcluster-neoverse_v1: variables: SPACK_CI_STACK_NAME: aws-pcluster-neoverse_v1 -# aws-pcluster-generate-neoverse_v1: -# extends: [ ".linux_neoverse_v1", ".aws-pcluster-neoverse_v1", ".generate-aarch64", ".aws-pcluster-generate", ".aws-pcluster-generate-image" ] +aws-pcluster-generate-neoverse_v1: + # TODO: Use updated runner tags: https://github.com/spack/spack-infrastructure/pull/694/files + extends: [ ".linux_neoverse_v1", ".aws-pcluster-neoverse_v1", ".generate-neoverse_v1", ".aws-pcluster-generate"] -# aws-pcluster-build-neoverse_v1: -# extends: [ ".linux_neoverse_v1", ".aws-pcluster-neoverse_v1", ".build" ] -# trigger: -# include: -# - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml -# job: aws-pcluster-generate-neoverse_v1 -# strategy: depend -# needs: -# - artifacts: True -# job: aws-pcluster-generate-neoverse_v1 +aws-pcluster-build-neoverse_v1: + extends: [ ".linux_neoverse_v1", ".aws-pcluster-neoverse_v1", ".build" ] + trigger: + include: + - artifact: jobs_scratch_dir/cloud-ci-pipeline.yml + job: aws-pcluster-generate-neoverse_v1 + strategy: depend + needs: + - artifacts: True + job: aws-pcluster-generate-neoverse_v1 # Cray definitions .generate-cray: diff --git a/share/spack/gitlab/cloud_pipelines/configs/linux/neoverse_n1/ci.yaml b/share/spack/gitlab/cloud_pipelines/configs/linux/neoverse_n1/ci.yaml deleted file mode 100644 index 9ba2680702bc07..00000000000000 --- a/share/spack/gitlab/cloud_pipelines/configs/linux/neoverse_n1/ci.yaml +++ /dev/null @@ -1,7 +0,0 @@ -ci: - pipeline-gen: - - any-job: - variables: - SPACK_TARGET_ARCH: neoverse_n1 - - build-job: - tags: ["aarch64", "graviton2"] diff --git a/share/spack/gitlab/cloud_pipelines/configs/linux/neoverse_v1/ci.yaml b/share/spack/gitlab/cloud_pipelines/configs/linux/neoverse_v1/ci.yaml index e874fc6522fb27..82aa1eae7bfc0d 100644 --- a/share/spack/gitlab/cloud_pipelines/configs/linux/neoverse_v1/ci.yaml +++ b/share/spack/gitlab/cloud_pipelines/configs/linux/neoverse_v1/ci.yaml @@ -1,7 +1,4 @@ ci: pipeline-gen: - - any-job: - variables: - SPACK_TARGET_ARCH: neoverse_v1 - build-job: tags: ["aarch64", "graviton3"] diff --git a/share/spack/gitlab/cloud_pipelines/configs/linux/skylake_avx512/ci.yaml b/share/spack/gitlab/cloud_pipelines/configs/linux/skylake_avx512/ci.yaml deleted file mode 100644 index 0a7bbb6f19d186..00000000000000 --- a/share/spack/gitlab/cloud_pipelines/configs/linux/skylake_avx512/ci.yaml +++ /dev/null @@ -1,11 +0,0 @@ -ci: - pipeline-gen: - - any-job: - variables: - SPACK_TARGET_ARCH: skylake_avx512 - - build-job: - before_script: - - - curl -LfsS "https://github.com/JuliaBinaryWrappers/GNUMake_jll.jl/releases/download/GNUMake-v4.3.0+1/GNUMake.v4.3.0.x86_64-linux-gnu.tar.gz" -o gmake.tar.gz - - printf "fef1f59e56d2d11e6d700ba22d3444b6e583c663d6883fd0a4f63ab8bd280f0f gmake.tar.gz" | sha256sum --check --strict --quiet - - tar -xzf gmake.tar.gz -C /usr bin/make 2> /dev/null - tags: ["x86_64_v4"] diff --git a/share/spack/gitlab/cloud_pipelines/configs/linux/icelake/ci.yaml b/share/spack/gitlab/cloud_pipelines/configs/linux/x86_64_v4/ci.yaml similarity index 87% rename from share/spack/gitlab/cloud_pipelines/configs/linux/icelake/ci.yaml rename to share/spack/gitlab/cloud_pipelines/configs/linux/x86_64_v4/ci.yaml index 036a4419464304..ae14967dd5a04c 100644 --- a/share/spack/gitlab/cloud_pipelines/configs/linux/icelake/ci.yaml +++ b/share/spack/gitlab/cloud_pipelines/configs/linux/x86_64_v4/ci.yaml @@ -1,8 +1,5 @@ ci: pipeline-gen: - - any-job: - variables: - SPACK_TARGET_ARCH: icelake - build-job: before_script: - - curl -LfsS "https://github.com/JuliaBinaryWrappers/GNUMake_jll.jl/releases/download/GNUMake-v4.3.0+1/GNUMake.v4.3.0.x86_64-linux-gnu.tar.gz" -o gmake.tar.gz diff --git a/share/spack/gitlab/cloud_pipelines/scripts/pcluster/setup-pcluster.sh b/share/spack/gitlab/cloud_pipelines/scripts/pcluster/setup-pcluster.sh new file mode 100755 index 00000000000000..dfd5af1b437efc --- /dev/null +++ b/share/spack/gitlab/cloud_pipelines/scripts/pcluster/setup-pcluster.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +# +# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) +set -e + +# Intel compiler needs to be installed from a specific spack git commit. +# The best solution would be to have the compilers hash (or packages contents) be part of the +# individual packages hashes. I don't see this at the moment. +# Set to the latest tag including a recent oneapi compiler. +spack_intel_compiler_commit="develop-2023-08-06" + +set_pcluster_defaults() { + # Set versions of pre-installed software in packages.yaml + [ -z "${SLURM_VERSION}" ] && SLURM_VERSION=$(strings /opt/slurm/lib/libslurm.so | grep -e '^VERSION' | awk '{print $2}' | sed -e 's?"??g') + [ -z "${LIBFABRIC_VERSION}" ] && LIBFABRIC_VERSION=$(awk '/Version:/{print $2}' "$(find /opt/amazon/efa/ -name libfabric.pc | head -n1)" | sed -e 's?~??g' -e 's?amzn.*??g') + export SLURM_VERSION LIBFABRIC_VERSION + + envsubst < "${SPACK_ROOT}/share/spack/gitlab/cloud_pipelines/stacks/${SPACK_CI_STACK_NAME}/packages.yaml" > "${SPACK_ROOT}"/etc/spack/packages.yaml +} + +setup_spack() { + spack compiler add --scope site + spack external find --scope site + # Remove all autotools/buildtools packages. These versions need to be managed by spack or it will + # eventually end up in a version mismatch (e.g. when compiling gmp). + spack tags build-tools | xargs -I {} spack config --scope site rm packages:{} +} + +patch_compilers_yaml() { + # Graceful exit if package not found by spack + set -o pipefail + compilers_yaml="${SPACK_ROOT}/etc/spack/compilers.yaml" + [ -f "${compilers_yaml}" ] || { + echo "Cannot find ${compilers_yaml}, compiler setup might now be optimal." + return + } + + # System ld is too old for amzn linux2 + spack_gcc_version=$(spack find --format '{version}' gcc) + binutils_path=$(spack find -p binutils | awk '/binutils/ {print $2}' | head -n1) + if [ -d "${binutils_path}" ] && [ -n "${spack_gcc_version}" ]; then python3 <