Skip to content

Commit

Permalink
GH artifacts based CI (#1043)
Browse files Browse the repository at this point in the history
  • Loading branch information
sandeepd-nv authored Sep 26, 2023
1 parent 443f751 commit 31aa336
Show file tree
Hide file tree
Showing 17 changed files with 458 additions and 212 deletions.
48 changes: 48 additions & 0 deletions .github/actions/download-artifacts/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: setup-legate-conda

description: Download dependencies (artifacts)

inputs:
device: {type: string, required: true}
git_sha: {type: string, required: true}

runs:
using: composite
steps:

- id: cache
name: Cache conda artifacts
uses: actions/cache@v3
with:
key: "nv-legate/legate.core@${{ inputs.git_sha }}-${{ inputs.device }}"
path: .artifacts

- if: steps.cache.outputs.cache-hit != 'true'
name: Download conda artifacts
uses: dawidd6/action-download-artifact@v2
with:
path: .artifacts-dl
repo: nv-legate/legate.core
commit: ${{ inputs.git_sha }}
workflow_conclusion: success
workflow: "ci-gh-${{ inputs.device }}-build-and-test.yml"
name: "legate.core-${{ inputs.device }}-[0-9a-z]{40}"
name_is_regexp: true

- if: steps.cache.outputs.cache-hit != 'true'
name: Move conda artifacts into cached dir
shell: bash --noprofile --norc -xeo pipefail {0}
run: |
mkdir -p .artifacts;
find .artifacts-dl/legate.core-${{ inputs.device }}-*/ \
-maxdepth 2 -type d -name legate_core -exec mv {} .artifacts/ \;
find .artifacts-dl/legate.core-${{ inputs.device }}-*/ \
-maxdepth 2 -type f -name "environment*.yaml" -exec mv {} .artifacts/ \;
- name: Copy and change cache dir ownership
shell: bash --noprofile --norc -xeo pipefail {0}
run: |
# Copy and change directory ownership
cp -ar .artifacts /home/coder/.artifacts;
chown -R coder:coder /home/coder/.artifacts;
ls -R /home/coder/.artifacts
11 changes: 7 additions & 4 deletions .github/workflows/ci-gh.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@ jobs:
fail-fast: false
matrix:
include:
- {build-target: cpu}
- {build-target: gpu}
- device: "gpu"
image: "rapidsai/devcontainers:23.06-cpp-mambaforge-ubuntu22.04"

- device: "cpu"
image: "rapidsai/devcontainers:23.06-cpp-mambaforge-ubuntu22.04"
uses:
./.github/workflows/gh-build-and-test.yml
with:
build-target: ${{ matrix.build-target }}
sha: ${{ github.sha }}
device: ${{ matrix.device }}
image: ${{ matrix.image }}
88 changes: 74 additions & 14 deletions .github/workflows/gh-build-and-test.yml
Original file line number Diff line number Diff line change
@@ -1,32 +1,92 @@
on:
workflow_call:
inputs:
build-target:
required: true
image:
type: string
sha:
required: true
device:
type: string
required: true


jobs:
build:
name: "Build cunumeric (with ${{ inputs.build-target }} legate) on GH"
name: "Build cunumeric (with ${{ inputs.device }} legate) on GH"
uses:
./.github/workflows/gh-build.yml
with:
build-target: ${{ inputs.build-target }}
# Ref: https://docs.rapids.ai/resources/github-actions/#cpu-labels for `linux-amd64-cpu4`
runs-on: ${{ github.repository_owner == 'nv-legate' && 'linux-amd64-cpu4' || 'ubuntu-latest' }}
sha: ${{ inputs.sha }}
device: ${{ inputs.device }}
image: ${{ inputs.image }}
runs-on: ${{ github.repository_owner == 'nv-legate' && 'linux-amd64-32cpu' || 'ubuntu-latest' }}

cleanup:
test:
needs:
- build
strategy:
fail-fast: false
matrix:
include:
- name: 1 CPU test
options: test --cpus 1 --unit --debug
runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu4' }}
has-gpu: false
enabled: true

- name: 2 CPUs test
options: test --cpus 2 --debug
runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu8' }}
has-gpu: false
enabled: true

- name: GPU test
options: test --use cuda --gpus 1 --debug
runner: linux-amd64-gpu-v100-latest-1
has-gpu: true
enabled: ${{ inputs.device == 'gpu' }}

- name: 2 GPUs test
options: test --use cuda --gpus 2 --debug
runner: linux-amd64-2gpu
has-gpu: true
enabled: ${{ inputs.device == 'gpu' }}

- name: OpenMP test
options: test --use openmp --omps 1 --ompthreads 2 --debug
runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-32cpu' }}
has-gpu: ${{ inputs.device == 'gpu' }}
enabled: false

- name: 2 NUMA OpenMPs test
options: test --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug
runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-32cpu' }}
has-gpu: ${{ inputs.device == 'gpu' }}
enabled: false

- name: Eager execution test
options: test --use eager --debug
runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu4' }}
has-gpu: ${{ inputs.device == 'gpu' }}
enabled: true

- name: mypy
options: mypy
runner: linux-amd64-cpu4
has-gpu: false
enabled: true

- name: documentation
options: docs
runner: linux-amd64-32cpu
has-gpu: false
enabled: ${{ inputs.device == 'gpu' }}

# This ensures the cleanup job runs even if previous jobs fail or the workflow is cancelled.
if: always()
uses:
./.github/workflows/gh-cleanup.yml
./.github/workflows/gh-test.yml
with:
build-target: ${{ inputs.build-target }}
sha: ${{ inputs.sha }}
name: ${{ matrix.name }}
device: ${{ inputs.device }}
image: ${{ inputs.image }}
runs-on: ${{ matrix.runner }}
has-gpu: ${{ matrix.has-gpu }}
test-options: ${{ matrix.options }}
enabled: ${{ matrix.enabled }}
148 changes: 63 additions & 85 deletions .github/workflows/gh-build.yml
Original file line number Diff line number Diff line change
@@ -1,123 +1,101 @@
name: Build cunumeric on GH
name: Build

on:
workflow_call:
inputs:
build-target:
required: true
image:
type: string
runs-on:
required: true
device:
required: true
type: string
sha:
runs-on:
required: true
type: string

env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
BASE_IMAGE: rapidsai/devcontainers:23.06-cpp-cuda11.8-mambaforge-ubuntu22.04
IMAGE_NAME_LEGATE: legate.core-${{ inputs.build-target }}
IMAGE_NAME_CUNUMERIC: cunumeric-${{ inputs.build-target }}
USE_CUDA: ${{ (inputs.build-target == 'cpu' && 'OFF') || 'ON' }}

jobs:
build:
name: build-${{ inputs.build-target }}-sub-workflow
name: build-${{ inputs.device }}-sub-workflow

permissions:
id-token: write # This is required for configure-aws-credentials
contents: read # This is required for actions/checkout
packages: write # This is required to push docker image to ghcr.io


runs-on: ${{ inputs.runs-on }}

steps:
- name: Checkout legate.core
uses: actions/checkout@v3
with:
repository: nv-legate/legate.core
fetch-depth: 0
path: legate
container:
options: -u root
image: "${{ inputs.image }}"
env:
CUDA_VERSION: "12.0"
CUDA_VERSION_MAJOR: "12"
CUDA_VERSION_MINOR: "0"
SCCACHE_REGION: "us-east-2"
SCCACHE_BUCKET: "rapids-sccache-devs"
SCCACHE_S3_KEY_PREFIX: "legate-cunumeric-dev"
USE_CUDA: "${{ inputs.device == 'gpu' && 'ON' || 'OFF' }}"
GH_TOKEN: "${{ env.GH_TOKEN }}"
GITHUB_TOKEN: "${{ env.GITHUB_TOKEN }}"
VAULT_HOST: "${{ github.repository_owner != 'nv-legate' && 'https://vault.ops.k8s.rapids.ai' || '' }}"
defaults:
run:
shell: su coder {0}
working-directory: /home/coder

steps:
- name: Checkout cunumeric (= this repo)
uses: actions/checkout@v3
with:
fetch-depth: 0
path: cunumeric
persist-credentials: false

- if: github.repository_owner == 'nv-legate'
name: Get AWS credentials for sccache bucket
uses: aws-actions/configure-aws-credentials@v2
with:
aws-region: us-east-2
role-duration-seconds: 28800 # 8 hours
role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-nv-legate

- name: Docker system prune
- name: Dump environment
run: |
docker version
docker system prune --all --force
env
- name: Build legate.core using docker build
- name: Copy source folder
run: |
echo BUILD_TARGET: ${{ inputs.build-target }}
echo USE_CUDA: ${{ env.USE_CUDA }}
export LEGATE_SHA=$(cat cunumeric/cmake/versions.json | jq -r '.packages.legate_core.git_tag')
echo "Checking out LEGATE_SHA: ${LEGATE_SHA}"
git -C legate checkout $LEGATE_SHA
IMAGE_TAG_LEGATE=${{ env.IMAGE_NAME_LEGATE }}:${{ inputs.sha }}
chmod +x legate/continuous_integration/build-docker-image
legate/continuous_integration/build-docker-image \
--base-image "$BASE_IMAGE" \
--image-tag "$IMAGE_TAG_LEGATE" \
--source-dir legate
- name: Build cunumeric using docker build
run: |
IMAGE_TAG_CUNUMERIC=${{ env.IMAGE_NAME_CUNUMERIC }}:${{ inputs.sha }}
IMAGE_TAG_LEGATE=${{ env.IMAGE_NAME_LEGATE }}:${{ inputs.sha }}
legate/continuous_integration/build-docker-image \
--base-image "$IMAGE_TAG_LEGATE" \
--image-tag "$IMAGE_TAG_CUNUMERIC" \
--source-dir cunumeric
- name: Dump docker history of image before upload
set -x
pwd
cp -r $GITHUB_WORKSPACE/cunumeric .
chown -R coder:coder cunumeric;
ls -R
- name: Copy .gitconfig
run: cp ~/cunumeric/continuous_integration/dot-gitconfig ~/.gitconfig

- id: legate_core_info
name: Read legate.core SHA
shell: bash --noprofile --norc -xeo pipefail {0}
run: |
IMAGE_TAG=${{ env.IMAGE_NAME_CUNUMERIC }}:${{ inputs.sha }}
docker history $IMAGE_TAG
- name: Log in to container image registry
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin

- name: Push cunumeric image
run: |
IMAGE_TAG=${{ env.IMAGE_NAME_CUNUMERIC }}:${{ inputs.sha }}
IMAGE_ID=ghcr.io/${{ github.repository_owner }}
git_tag="$(jq -r '.packages.legate_core.git_tag' cunumeric/cmake/versions.json)";
# Change all uppercase to lowercase
IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]')
echo "git_tag=$git_tag" | tee -a "${GITHUB_OUTPUT}";
IMAGE_ID=$IMAGE_ID/$IMAGE_TAG
- name: Download dependencies (artifacts)
uses: ./cunumeric/.github/actions/download-artifacts
with:
device: "${{ inputs.device }}"
git_sha: "${{ steps.legate_core_info.outputs.git_tag }}"

docker tag $IMAGE_TAG $IMAGE_ID
docker push $IMAGE_ID
- if: github.repository_owner == 'nv-legate'
name: Get AWS credentials for sccache bucket
uses: aws-actions/configure-aws-credentials@v2
with:
aws-region: us-east-2
role-duration-seconds: 28800 # 8 hours
role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-nv-legate

- name: Copy artifacts back to the host
- name: Build cunumeric
run: |
IMAGE_TAG=${{ env.IMAGE_NAME_CUNUMERIC }}:${{ inputs.sha }}
mkdir -p artifacts
docker run -v "$(pwd)/artifacts:/home/coder/.artifacts" --rm -t $IMAGE_TAG copy-artifacts
- name: Display structure of workdir
run: ls -R
export PATH="/home/coder/cunumeric/continuous_integration/scripts:$PATH"
build-cunumeric-all
- name: Upload build artifacts
uses: actions/upload-artifact@v3
with:
name: "cunumeric-${{ inputs.build-target }}-${{ inputs.sha }}"
path: artifacts
name: "cunumeric-${{ inputs.device }}-${{ github.sha }}"
path: |
/tmp/out
/tmp/conda-build
Loading

0 comments on commit 31aa336

Please sign in to comment.