microsoft · jchen351 · Jan 26, 2024 · Jan 25, 2024 · Jan 25, 2024 · Jan 25, 2024
diff --git a/.github/workflows/linux-gpu-x64-build.yml b/.github/workflows/linux-gpu-x64-build.yml
@@ -1,5 +1,5 @@
 name: "Linux GPU x64 Build"
-on: [ workflow_dispatch ]
+on: [ workflow_dispatch, pull_request ]
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
@@ -16,8 +16,6 @@ jobs:
       - name: Checkout OnnxRuntime GenAI repo
         uses: actions/checkout@v2
 
-
-
       - name: Download OnnxRuntime
         run: |
           curl -L -o ${{ env.ort_zip }} ${{ env.ort_url }} 
@@ -35,16 +33,27 @@ jobs:
         run: |
           git submodule update --init --recursive
 
-      - name: Build with CMake and GCC
+      - name: Download Docker Image
         run: |
           set -e -x
           python3 tools/ci_build/get_docker_image.py --dockerfile tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile \
             --context tools/ci_build/github/linux/docker/inference/x64/default/gpu \
             --docker-build-args "--build-arg BUILD_UID=$( id -u )" \
             --container-registry onnxruntimebuildcache \
             --repository onnxruntimegpubuild
-          docker run --rm --volume $GITHUB_WORKSPACE:/onnxruntime_src --volume $RUNNER_TEMP:/build -w /build onnxruntimegpubuild bash -c "echo $PATH && /usr/bin/cmake -G Ninja /onnxruntime_src -DBUILD_SHARED_LIBS=ON -DUSE_CUDA=ON -DCMAKE_BUILD_TYPE=Release && ninja"
 
-
+      - name: Print Docker Image Environment Variables
+        run: |
+          echo "Printing docker image environment variables"
+          docker run --rm onnxruntimegpubuild env
 
+      - name: Build with Cmake in Docker
+        run: |
+          echo "Running docker image onnxruntimegpubuild"
+          docker run \
+            --gpus all \
+            --rm \
+            --volume $GITHUB_WORKSPACE:/onnxruntime_src \
+            --volume $RUNNER_TEMP:/build \
+            -w /build onnxruntimegpubuild bash -c "/usr/bin/cmake -G Ninja /onnxruntime_src -DBUILD_SHARED_LIBS=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CUDA_ARCHITECTURES=75 -DUSE_CUDA=1 -DCMAKE_BUILD_TYPE=Release && ninja"
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -14,6 +14,7 @@ include(CheckLanguage)
 check_language(CUDA)
 if (CMAKE_CUDA_COMPILER)
     message(STATUS "CUDA found CUDAToolkit_VERSION ${CUDAToolkit_VERSION} with CMAKE_CUDA_COMPILER ${CMAKE_CUDA_COMPILER}" )
+    set(CMAKE_CUDA_RUNTIME_LIBRARY Shared)
 else()
     message(STATUS "CUDA not found")
 endif()
@@ -67,17 +68,14 @@ if(USE_CUDA AND CMAKE_CUDA_COMPILER)
     message( STATUS "CMAKE_CUDA_COMPILER_VERSION: ${CMAKE_CUDA_COMPILER_VERSION}")
     # set(CUDA_PROPAGATE_HOST_FLAGS ON)
     set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe --diag_suppress=2803 --expt-relaxed-constexpr")
-
     file(GLOB generator_cuda_srcs CONFIGURE_DEPENDS
         "${GENERATORS_ROOT}/*.cu"
         "${GENERATORS_ROOT}/*.cuh"
         "${MODELS_ROOT}/*.cu"
         "${MODELS_ROOT}/*.cuh"
     )
     list(APPEND generator_srcs ${generator_cuda_srcs})
-
     add_compile_definitions(USE_CUDA=1)
-
     include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}")
 else()
     file(GLOB generator_cuda_srcs "${GENERATORS_ROOT}/*_cuda*.*")
@@ -120,6 +118,15 @@ target_include_directories(python PRIVATE ${CMAKE_SOURCE_DIR}/ort/include)
 target_link_directories(python PRIVATE ${CMAKE_SOURCE_DIR}/ort/lib)
 target_link_libraries(python PRIVATE onnxruntime-genai ${ONNXRUNTIME_LIB})
 set_target_properties(python PROPERTIES OUTPUT_NAME "onnxruntime_genai")
+if(USE_CUDA AND CMAKE_CUDA_COMPILER)
+    set_target_properties(onnxruntime-genai PROPERTIES LINKER_LANGUAGE CUDA)
+    set_target_properties(python PROPERTIES LINKER_LANGUAGE CUDA)
+    set_target_properties(Tests PROPERTIES LINKER_LANGUAGE CUDA)
+    target_link_libraries(onnxruntime-genai PRIVATE cublasLt cublas cudnn curand cufft cudart)
+    target_link_libraries(Tests PRIVATE cublasLt cublas cudnn curand cufft cudart)
+    target_link_libraries(python PRIVATE cublasLt cublas cudnn curand cufft cudart)
+endif()
+
 
 # Visual C++ - /MP Multiple process build, /WX Warnings as errors
 # Enable:

diff --git a/tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/x64/default/gpu/Dockerfile
@@ -5,10 +5,13 @@
 ARG BASEIMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubi8
 FROM $BASEIMAGE
 
-ENV PATH /opt/rh/gcc-toolset-12/root/usr/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+ENV PATH="/usr/local/cuda/binet:/opt/rh/gcc-toolset-12/root/usr/bin:${PATH}"
 ENV LANG=en_US.UTF-8
 ENV LC_ALL=en_US.UTF-8
-
+ENV CUDA_HOME=/usr/local/cuda
+ENV CUDNN_HOME=/usr/lib/x86_64-linux-gnu/
+ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
+ENV CUDAHOSTCXX=/opt/rh/gcc-toolset-12/root/usr/bin/g++
 ADD scripts /tmp/scripts
 RUN cd /tmp/scripts && /tmp/scripts/install_centos.sh && /tmp/scripts/install_deps.sh && rm -rf /tmp/scripts