diff --git a/.github/workflows/linux-gpu-x64-build.yml b/.github/workflows/linux-gpu-x64-build.yml index 6699fc7ef..4bde267a5 100644 --- a/.github/workflows/linux-gpu-x64-build.yml +++ b/.github/workflows/linux-gpu-x64-build.yml @@ -1,5 +1,5 @@ name: "Linux GPU x64 Build" -on: [ workflow_dispatch ] +on: [ workflow_dispatch, pull_request ] concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} @@ -16,8 +16,6 @@ jobs: - name: Checkout OnnxRuntime GenAI repo uses: actions/checkout@v2 - - - name: Download OnnxRuntime run: | curl -L -o ${{ env.ort_zip }} ${{ env.ort_url }} @@ -35,7 +33,7 @@ jobs: run: | git submodule update --init --recursive - - name: Build with CMake and GCC + - name: Download Docker Image run: | set -e -x python3 tools/ci_build/get_docker_image.py --dockerfile tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile \ @@ -43,8 +41,19 @@ jobs: --docker-build-args "--build-arg BUILD_UID=$( id -u )" \ --container-registry onnxruntimebuildcache \ --repository onnxruntimegpubuild - docker run --rm --volume $GITHUB_WORKSPACE:/onnxruntime_src --volume $RUNNER_TEMP:/build -w /build onnxruntimegpubuild bash -c "echo $PATH && /usr/bin/cmake -G Ninja /onnxruntime_src -DBUILD_SHARED_LIBS=ON -DUSE_CUDA=ON -DCMAKE_BUILD_TYPE=Release && ninja" - + - name: Print Docker Image Environment Variables + run: | + echo "Printing docker image environment variables" + docker run --rm onnxruntimegpubuild env + - name: Build with Cmake in Docker + run: | + echo "Running docker image onnxruntimegpubuild" + docker run \ + --gpus all \ + --rm \ + --volume $GITHUB_WORKSPACE:/onnxruntime_src \ + --volume $RUNNER_TEMP:/build \ + -w /build onnxruntimegpubuild bash -c "/usr/bin/cmake -G Ninja /onnxruntime_src -DBUILD_SHARED_LIBS=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CUDA_ARCHITECTURES=75 -DUSE_CUDA=1 -DCMAKE_BUILD_TYPE=Release && ninja" diff --git a/CMakeLists.txt b/CMakeLists.txt index c8602c3b8..26a90c341 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,7 @@ include(CheckLanguage) check_language(CUDA) if (CMAKE_CUDA_COMPILER) message(STATUS "CUDA found CUDAToolkit_VERSION ${CUDAToolkit_VERSION} with CMAKE_CUDA_COMPILER ${CMAKE_CUDA_COMPILER}" ) + set(CMAKE_CUDA_RUNTIME_LIBRARY Shared) else() message(STATUS "CUDA not found") endif() @@ -67,7 +68,6 @@ if(USE_CUDA AND CMAKE_CUDA_COMPILER) message( STATUS "CMAKE_CUDA_COMPILER_VERSION: ${CMAKE_CUDA_COMPILER_VERSION}") # set(CUDA_PROPAGATE_HOST_FLAGS ON) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe --diag_suppress=2803 --expt-relaxed-constexpr") - file(GLOB generator_cuda_srcs CONFIGURE_DEPENDS "${GENERATORS_ROOT}/*.cu" "${GENERATORS_ROOT}/*.cuh" @@ -75,9 +75,7 @@ if(USE_CUDA AND CMAKE_CUDA_COMPILER) "${MODELS_ROOT}/*.cuh" ) list(APPEND generator_srcs ${generator_cuda_srcs}) - add_compile_definitions(USE_CUDA=1) - include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}") else() file(GLOB generator_cuda_srcs "${GENERATORS_ROOT}/*_cuda*.*") @@ -120,6 +118,15 @@ target_include_directories(python PRIVATE ${CMAKE_SOURCE_DIR}/ort/include) target_link_directories(python PRIVATE ${CMAKE_SOURCE_DIR}/ort/lib) target_link_libraries(python PRIVATE onnxruntime-genai ${ONNXRUNTIME_LIB}) set_target_properties(python PROPERTIES OUTPUT_NAME "onnxruntime_genai") +if(USE_CUDA AND CMAKE_CUDA_COMPILER) + set_target_properties(onnxruntime-genai PROPERTIES LINKER_LANGUAGE CUDA) + set_target_properties(python PROPERTIES LINKER_LANGUAGE CUDA) + set_target_properties(Tests PROPERTIES LINKER_LANGUAGE CUDA) + target_link_libraries(onnxruntime-genai PRIVATE cublasLt cublas cudnn curand cufft cudart) + target_link_libraries(Tests PRIVATE cublasLt cublas cudnn curand cufft cudart) + target_link_libraries(python PRIVATE cublasLt cublas cudnn curand cufft cudart) +endif() + # Visual C++ - /MP Multiple process build, /WX Warnings as errors # Enable: diff --git a/tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile index 086aafd60..2ff6c38f5 100644 --- a/tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile @@ -5,10 +5,13 @@ ARG BASEIMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubi8 FROM $BASEIMAGE -ENV PATH /opt/rh/gcc-toolset-12/root/usr/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +ENV PATH="/usr/local/cuda/binet:/opt/rh/gcc-toolset-12/root/usr/bin:${PATH}" ENV LANG=en_US.UTF-8 ENV LC_ALL=en_US.UTF-8 - +ENV CUDA_HOME=/usr/local/cuda +ENV CUDNN_HOME=/usr/lib/x86_64-linux-gnu/ +ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" +ENV CUDAHOSTCXX=/opt/rh/gcc-toolset-12/root/usr/bin/g++ ADD scripts /tmp/scripts RUN cd /tmp/scripts && /tmp/scripts/install_centos.sh && /tmp/scripts/install_deps.sh && rm -rf /tmp/scripts