nv-legate · sandeepd-nv · Jul 20, 2023 · Aug 31, 2023 · Sep 4, 2023 · marcinz
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml
@@ -16,15 +16,97 @@ jobs:
     with:
       build-target: ${{ inputs.build-target }}
       # Ref: https://docs.rapids.ai/resources/github-actions/#cpu-labels for `linux-amd64-cpu4`
-      runs-on: ${{ github.repository_owner == 'nv-legate' && 'linux-amd64-cpu4' || 'ubuntu-latest' }}
+      runs-on: ${{ github.repository_owner == 'nv-legate' && 'linux-amd64-32cpu' || 'ubuntu-latest' }}
       sha: ${{ inputs.sha }}
 
-  cleanup:
+  test:
     needs:
       - build
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: 1 CPU test
+            options: test --cpus 1 --unit --debug
+            log: cpu
+            runner: ${{ inputs.build-target == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu4' }}
+            has-gpu: false
+            enabled: true
+
+          - name: 2 CPUs test
+            options: test --cpus 2 --debug
+            log: cpus
+            runner: ${{ inputs.build-target == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu8' }}
+            has-gpu: false
+            enabled: true
+
+          - name: GPU test
+            options: test --use cuda --gpus 1 --debug
+            log: gpu
+            runner: linux-amd64-gpu-v100-latest-1
+            has-gpu: true
+            enabled: ${{ inputs.build-target == 'gpu' }}
+
+          - name: 2 GPUs test
+            options: test --use cuda --gpus 2 --debug
+            log: gpus
+            runner: linux-amd64-2gpu
+            has-gpu: true
+            enabled: ${{ inputs.build-target == 'gpu' }}
+
+          - name: OpenMP test
+            options: test --use openmp --omps 1 --ompthreads 2 --debug
+            log: omp
+            runner: ${{ inputs.build-target == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-32cpu' }}
+            has-gpu: ${{ inputs.build-target == 'gpu' }}
+            enabled: false
 
-    # This ensures the cleanup job runs even if previous jobs fail or the workflow is cancelled.
-    if: always()
+          - name: 2 NUMA OpenMPs test
+            options: test --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug
+            log: omps
+            runner: ${{ inputs.build-target == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-32cpu' }}
+            has-gpu: ${{ inputs.build-target == 'gpu' }}
+            enabled: false
+
+          - name: Eager execution test
+            options: test --use eager --debug
+            log: eager
+            runner: ${{ inputs.build-target == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu4' }}
+            has-gpu: ${{ inputs.build-target == 'gpu' }}
+            enabled: true
+
+          - name: mypy
+            options: mypy
+            log: mypy
+            runner: linux-amd64-cpu4
+            has-gpu: false
+            enabled: true
+
+          - name: documentation
+            options: docs
+            log: docs
+            runner: linux-amd64-32cpu
+            has-gpu: false
+            enabled: ${{ inputs.build-target == 'gpu' }}
+    # name: ${{ matrix.name }}
+    uses:
+      ./.github/workflows/gh-test.yml
+    with:
+      name: ${{ matrix.name }}
+      build-target: ${{ inputs.build-target }}
+      runs-on: ${{ matrix.runner }}
+      has-gpu: ${{ matrix.has-gpu }}
+      sha: ${{ inputs.sha }}
+      test-options: ${{ matrix.options }}
+      log-name: ${{ matrix.log }}
+      enabled: ${{ matrix.enabled }}
+
+
+  cleanup:
+    if: inputs.skip-cleanup == false
+    needs:
+      - build
+      - test
     uses:
       ./.github/workflows/gh-cleanup.yml
     with:

diff --git a/.github/workflows/gh-test.yml b/.github/workflows/gh-test.yml
@@ -0,0 +1,65 @@
+name: Test cunumeric on GH
+
+on:
+  workflow_call:
+    inputs:
+      name:
+        required: true
+        type: string
+      build-target:
+        required: true
+        type: string
+      runs-on:
+        required: true
+        type: string
+      has-gpu:
+        required: true
+        type: boolean
+        description: "The runner has GPU(s)."
+      sha:
+        required: true
+        type: string
+      test-options:
+        required: true
+        type: string
+      log-name:
+        required: true
+        type: string
+      enabled:
+        required: true
+        type: boolean
+
+jobs:
+  test:
+    name: ${{ inputs.name }}
+    if: inputs.enabled && github.repository_owner == 'nv-legate'
+    runs-on: ${{ inputs.runs-on }}
+    container:
+      options: -u root
+      image: ghcr.io/nv-legate/cunumeric-${{ inputs.build-target }}:${{ inputs.sha }}
+      volumes:
+        - ${{ github.workspace }}/test_logs:/home/coder/.test_logs
+      env:
+        PYTHONDONTWRITEBYTECODE: 1
+        NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
+
+    steps:
+      - if: inputs.has-gpu
+        name: Run nvidia-smi to make sure GPU is working
+        run: nvidia-smi
+
+      - name: Run cunumeric test / analysis
+        shell: su coder {0}
+        run: |
+          set -x
+          mkdir -p ~/.test_logs
+          sudo chown -R coder:coder ~/.test_logs
+
+          set -eo pipefail
+          test-cunumeric ${{ inputs.test-options }} 2>&1 | tee ~/.test_logs/cunumeric-${{ inputs.sha }}-test-${{ inputs.log-name }}.log
+
+      - name: Upload logs
+        uses: actions/upload-artifact@v3
+        with:
+          name: "cunumeric-${{ inputs.build-target }}-${{ inputs.sha }}-test-${{ inputs.log-name }}-log"
+          path: test_logs/cunumeric-${{ inputs.sha }}-test-${{ inputs.log-name }}.log
diff --git a/continuous_integration/Dockerfile b/continuous_integration/Dockerfile
@@ -30,12 +30,19 @@ ENV AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
 ARG AWS_SECRET_ACCESS_KEY
 ENV AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
 
+ENV USE_OPENMP=ON
+
 COPY --chown=coder:coder .creds /run/secrets
 
 RUN entrypoint build-cunumeric-all
 
 #---------------------------------------------------
 FROM stage0 as final
+USER root
+RUN export DEBIAN_FRONTEND=noninteractive && \
+    apt-get update && \
+    apt-get install -y numactl
+
 USER coder
 WORKDIR /home/coder
 

diff --git a/continuous_integration/home/coder/.local/bin/cunumeric-conda-utils b/continuous_integration/home/coder/.local/bin/cunumeric-conda-utils
@@ -0,0 +1,48 @@
+. conda-utils
+
+make_empty_conda_env() {
+    mamba create -n "${DEFAULT_CONDA_ENV:-legate}"
+}
+
+generate_conda_env_yaml_file_for_test() {
+    local cuda_version="${CUDA_VERSION:-${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}}";
+    cuda_version="$(echo "${cuda_version}" | cut -d'.' -f3 --complement)";
+
+    local python_version="${PYTHON_VERSION:-}";
+
+    if [ -z "${python_version}" ]; then
+        python_version="$(python3 --version 2>&1 | cut -d' ' -f2 | cut -d'.' -f3 --complement)";
+    fi
+
+    yaml_file=~/"$(         \
+        ~/legate/scripts/generate-conda-envs.py \
+            --os linux                          \
+            --compilers                         \
+            --ctk ${cuda_version}               \
+            --python ${python_version}          \
+            --openmpi                           \
+            --no-ucx                            \
+            --sections "$@"                     \
+        | head -n1 | cut -d' ' -f3              \
+    )"
+
+    sed -i -re "s/legate-test/${DEFAULT_CONDA_ENV:-legate}/g" "${yaml_file}";
+
+    echo "\"$@\" YAML file: ${yaml_file}"
+    cat ${yaml_file}
+
+    mkdir -p /tmp/out
+    cp "${yaml_file}" /tmp/out
+}
+
+update_conda_env_using_section() {
+    local yaml_file="";
+
+    generate_conda_env_yaml_file_for_test "$@";
+
+    mamba env update -n "${DEFAULT_CONDA_ENV:-legate}" -f "${yaml_file}";
+}
+
+install_cunumeric() {
+    mamba install -y -n "${DEFAULT_CONDA_ENV:-legate}" -c nvidia -c conda-forge -c /tmp/conda-build/cunumeric -c /tmp/conda-build/legate_core cunumeric;
+}