Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gh docker reusable with tests #1022

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 0 additions & 136 deletions .github/workflows/ci.yml

This file was deleted.

90 changes: 86 additions & 4 deletions .github/workflows/gh-build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,97 @@ jobs:
with:
build-target: ${{ inputs.build-target }}
# Ref: https://docs.rapids.ai/resources/github-actions/#cpu-labels for `linux-amd64-cpu4`
runs-on: ${{ github.repository_owner == 'nv-legate' && 'linux-amd64-cpu4' || 'ubuntu-latest' }}
runs-on: ${{ github.repository_owner == 'nv-legate' && 'linux-amd64-32cpu' || 'ubuntu-latest' }}
sha: ${{ inputs.sha }}

cleanup:
test:
needs:
- build
strategy:
fail-fast: false
matrix:
include:
- name: 1 CPU test
options: test --cpus 1 --unit --debug
log: cpu
runner: ${{ inputs.build-target == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu4' }}
has-gpu: false
enabled: true

- name: 2 CPUs test
options: test --cpus 2 --debug
log: cpus
runner: ${{ inputs.build-target == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu8' }}
has-gpu: false
enabled: true

- name: GPU test
options: test --use cuda --gpus 1 --debug
log: gpu
runner: linux-amd64-gpu-v100-latest-1
has-gpu: true
enabled: ${{ inputs.build-target == 'gpu' }}

- name: 2 GPUs test
options: test --use cuda --gpus 2 --debug
log: gpus
runner: linux-amd64-2gpu
has-gpu: true
enabled: ${{ inputs.build-target == 'gpu' }}

- name: OpenMP test
options: test --use openmp --omps 1 --ompthreads 2 --debug
log: omp
runner: ${{ inputs.build-target == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-32cpu' }}
has-gpu: ${{ inputs.build-target == 'gpu' }}
enabled: false

# This ensures the cleanup job runs even if previous jobs fail or the workflow is cancelled.
if: always()
- name: 2 NUMA OpenMPs test
options: test --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug
log: omps
runner: ${{ inputs.build-target == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-32cpu' }}
has-gpu: ${{ inputs.build-target == 'gpu' }}
enabled: false

- name: Eager execution test
options: test --use eager --debug
log: eager
runner: ${{ inputs.build-target == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu4' }}
has-gpu: ${{ inputs.build-target == 'gpu' }}
enabled: true

- name: mypy
options: mypy
log: mypy
runner: linux-amd64-cpu4
has-gpu: false
enabled: true

- name: documentation
options: docs
log: docs
runner: linux-amd64-32cpu
has-gpu: false
enabled: ${{ inputs.build-target == 'gpu' }}
# name: ${{ matrix.name }}
uses:
./.github/workflows/gh-test.yml
with:
name: ${{ matrix.name }}
build-target: ${{ inputs.build-target }}
runs-on: ${{ matrix.runner }}
has-gpu: ${{ matrix.has-gpu }}
sha: ${{ inputs.sha }}
test-options: ${{ matrix.options }}
log-name: ${{ matrix.log }}
enabled: ${{ matrix.enabled }}


cleanup:
if: inputs.skip-cleanup == false
needs:
- build
- test
uses:
./.github/workflows/gh-cleanup.yml
with:
Expand Down
65 changes: 65 additions & 0 deletions .github/workflows/gh-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
name: Test cunumeric on GH

on:
workflow_call:
inputs:
name:
required: true
type: string
build-target:
required: true
type: string
runs-on:
required: true
type: string
has-gpu:
required: true
type: boolean
description: "The runner has GPU(s)."
sha:
required: true
type: string
test-options:
required: true
type: string
log-name:
required: true
type: string
enabled:
required: true
type: boolean

jobs:
test:
name: ${{ inputs.name }}
if: inputs.enabled && github.repository_owner == 'nv-legate'
runs-on: ${{ inputs.runs-on }}
container:
options: -u root
marcinz marked this conversation as resolved.
Show resolved Hide resolved
image: ghcr.io/nv-legate/cunumeric-${{ inputs.build-target }}:${{ inputs.sha }}
volumes:
- ${{ github.workspace }}/test_logs:/home/coder/.test_logs
env:
PYTHONDONTWRITEBYTECODE: 1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't remember why do we need this one.

NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}

steps:
- if: inputs.has-gpu
name: Run nvidia-smi to make sure GPU is working
run: nvidia-smi

- name: Run cunumeric test / analysis
shell: su coder {0}
run: |
set -x
mkdir -p ~/.test_logs
sudo chown -R coder:coder ~/.test_logs

set -eo pipefail
test-cunumeric ${{ inputs.test-options }} 2>&1 | tee ~/.test_logs/cunumeric-${{ inputs.sha }}-test-${{ inputs.log-name }}.log

- name: Upload logs
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we upload logs? The logs that are in the workflow are not enough?

uses: actions/upload-artifact@v3
with:
name: "cunumeric-${{ inputs.build-target }}-${{ inputs.sha }}-test-${{ inputs.log-name }}-log"
path: test_logs/cunumeric-${{ inputs.sha }}-test-${{ inputs.log-name }}.log
7 changes: 7 additions & 0 deletions continuous_integration/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,19 @@ ENV AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
ARG AWS_SECRET_ACCESS_KEY
ENV AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}

ENV USE_OPENMP=ON

COPY --chown=coder:coder .creds /run/secrets

RUN entrypoint build-cunumeric-all

#---------------------------------------------------
FROM stage0 as final
USER root
RUN export DEBIAN_FRONTEND=noninteractive && \
apt-get update && \
apt-get install -y numactl

USER coder
WORKDIR /home/coder

Expand Down
48 changes: 48 additions & 0 deletions continuous_integration/home/coder/.local/bin/cunumeric-conda-utils
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
. conda-utils

make_empty_conda_env() {
mamba create -n "${DEFAULT_CONDA_ENV:-legate}"
}

generate_conda_env_yaml_file_for_test() {
local cuda_version="${CUDA_VERSION:-${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}}";
cuda_version="$(echo "${cuda_version}" | cut -d'.' -f3 --complement)";

local python_version="${PYTHON_VERSION:-}";

if [ -z "${python_version}" ]; then
python_version="$(python3 --version 2>&1 | cut -d' ' -f2 | cut -d'.' -f3 --complement)";
fi

yaml_file=~/"$( \
~/legate/scripts/generate-conda-envs.py \
--os linux \
--compilers \
--ctk ${cuda_version} \
--python ${python_version} \
--openmpi \
--no-ucx \
--sections "$@" \
| head -n1 | cut -d' ' -f3 \
)"

sed -i -re "s/legate-test/${DEFAULT_CONDA_ENV:-legate}/g" "${yaml_file}";

echo "\"$@\" YAML file: ${yaml_file}"
cat ${yaml_file}

mkdir -p /tmp/out
cp "${yaml_file}" /tmp/out
}

update_conda_env_using_section() {
local yaml_file="";

generate_conda_env_yaml_file_for_test "$@";

mamba env update -n "${DEFAULT_CONDA_ENV:-legate}" -f "${yaml_file}";
}

install_cunumeric() {
mamba install -y -n "${DEFAULT_CONDA_ENV:-legate}" -c nvidia -c conda-forge -c /tmp/conda-build/cunumeric -c /tmp/conda-build/legate_core cunumeric;
}
Loading
Loading