Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Basic CUDA support #68

Merged
merged 17 commits into from
Oct 6, 2023
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ jobs:

- name: Pytest
run: |
pytest tests/ --cov
pytest tests/ --cov -k "not cuda"
91 changes: 91 additions & 0 deletions Jenkinsfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
pipeline {
agent none
options {
disableConcurrentBuilds()
buildDiscarder(logRotator(numToKeepStr: '8', daysToKeepStr: '20'))
timeout(time: 1, unit: 'HOURS')
}
stages {
stage('CUDA Tests') {
agent {
dockerfile {
filename 'ci/docker/Dockerfile-cuda11.8'
args '--gpus 2'
label 'docker && v100'
}
}
environment {
HOME = "$WORKSPACE"
PYBIN = "/opt/python/cp39-cp39/bin"
LIBRARY_PATH = "$WORKSPACE/finufft/build"
LD_LIBRARY_PATH = "$WORKSPACE/finufft/build"
}
steps {

// TODO - reconsider install strategy once finufft/cufinufft 2.2 is released
checkout scmGit(branches: [[name: '*/master']],
extensions: [cloneOption(noTags: true, reference: '', shallow: true),
[$class: 'RelativeTargetDirectory', relativeTargetDir: 'finufft'],
cleanAfterCheckout()],
userRemoteConfigs: [[url: 'https://github.com/flatironinstitute/finufft']])

sh '''#!/bin/bash -ex
nvidia-smi
'''
sh '''#!/bin/bash -ex
echo $HOME
ls
'''
sh '''#!/bin/bash -ex
cd finufft
# v100 cuda arch
cuda_arch="70"

cmake -B build . -DFINUFFT_USE_CUDA=ON \
-DFINUFFT_USE_CPU=OFF \
-DFINUFFT_BUILD_TESTS=OFF \
-DCMAKE_CUDA_ARCHITECTURES="$cuda_arch" \
-DBUILD_TESTING=ON
cd build
make -j4
'''

sh '${PYBIN}/python3 -m venv $HOME'
sh '''#!/bin/bash -ex
source $HOME/bin/activate
python3 -m pip install --upgrade pip
# we could also move pytorch install inside docker
python3 -m pip install "torch~=2.1.0" --index-url https://download.pytorch.org/whl/cu118
python3 -m pip install finufft/python/cufinufft

python3 -m pip install -e .[dev]

python3 -m pytest -k "cuda" tests/ --cov -v
'''
}
}
}
post {
failure {
emailext subject: '$PROJECT_NAME - Build #$BUILD_NUMBER - $BUILD_STATUS',
body: '''$PROJECT_NAME - Build #$BUILD_NUMBER - $BUILD_STATUS

Check console output at $BUILD_URL to view full results.

Building $BRANCH_NAME for $CAUSE
$JOB_DESCRIPTION

Chages:
$CHANGES

End of build log:
${BUILD_LOG,maxLines=200}
''',
recipientProviders: [
[$class: 'DevelopersRecipientProvider'],
],
replyTo: '$DEFAULT_REPLYTO',
to: '[email protected]'
}
}
}
58 changes: 58 additions & 0 deletions ci/docker/Dockerfile-cuda11.8
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Based on https://github.com/flatironinstitute/finufft/blob/master/tools/cufinufft/docker/cuda11.2/Dockerfile-x86_64

FROM quay.io/pypa/manylinux2014_x86_64
LABEL maintainer "Brian Ward"

ENV CUDA_MAJOR 11
ENV CUDA_MINOR 8
ENV CUDA_DASH_VERSION ${CUDA_MAJOR}-${CUDA_MINOR}
ENV CUDA_DOT_VERSION ${CUDA_MAJOR}.${CUDA_MINOR}

# ---- The following block adds layers for CUDA --- #
# base
RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/D42D0685.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict -

COPY ci/docker/cuda.repo /etc/yum.repos.d/cuda.repo

# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a
RUN yum install -y \
cuda-cudart-${CUDA_DASH_VERSION} \
cuda-compat-${CUDA_DASH_VERSION} && \
ln -s cuda-${CUDA_DOT_VERSION} /usr/local/cuda

# nvidia-docker 1.0
RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf

ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
ENV LD_LIBRARY_PATH ${LD_LIBRARY_PATH}:/usr/local/nvidia/lib:/usr/local/nvidia/lib64

# nvidia-container-runtime
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
ENV NVIDIA_REQUIRE_CUDA "cuda>=${CUDA_DOT_VERSION} brand=tesla,driver>=418,driver<419 brand=tesla,driver>=440,driver<441"

# runtime
RUN yum install -y \
cuda-libraries-${CUDA_DASH_VERSION} \
cuda-nvtx-${CUDA_DASH_VERSION} \
cuda-cudart-devel-${CUDA_DASH_VERSION} \
cuda-libraries-devel-${CUDA_DASH_VERSION} \
cuda-nvprof-${CUDA_DASH_VERSION} \
cuda-nvcc-${CUDA_DASH_VERSION}

ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs

# /CUDA #

# CUDA 11 doesn't work on gcc/g++ newer than v9
RUN yum install -y \
devtoolset-9-gcc \
devtoolset-9-gcc-c++ \
cmake && \
rm -rf /var/cache/yum/*

ENV PATH /opt/rh/devtoolset-9/root/usr/bin:${PATH}

6 changes: 6 additions & 0 deletions ci/docker/cuda.repo
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[cuda]
name=cuda
baseurl=https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64
enabled=1
gpgcheck=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA
Loading
Loading