triton-inference-server · mc-nv · Jun 28, 2024 · Jun 18, 2024 · Jun 19, 2024 · Jun 24, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -229,6 +229,8 @@ if (${TRITON_PYTORCH_DOCKER_BUILD})
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_cuda_linalg.so libtorch_cuda_linalg.so
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_global_deps.so libtorch_global_deps.so
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libcaffe2_nvrtc.so libcaffe2_nvrtc.so
+    # TODO: Revisit when not needed by making it part of cuda base container.
+    COMMAND docker cp -L pytorch_backend_ptlib:/usr/local/cuda/lib64/libcusparseLt.so libcusparseLt.so
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/libtorchvision.so libtorchvision.so
     COMMAND /bin/sh -c "if [ ${TRITON_PYTORCH_ENABLE_TORCHTRT} = 'ON' ]; then docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch_tensorrt/lib/libtorchtrt_runtime.so libtorchtrt_runtime.so; fi"
     COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch_tensorrt/bin/torchtrtc torchtrtc || echo "error ignored..." || true
@@ -434,6 +436,7 @@ if (${TRITON_PYTORCH_DOCKER_BUILD})
   install(
     FILES
       ${PT_LIB_PATHS}
+      ${CMAKE_CURRENT_BINARY_DIR}/libcusparseLt.so
       ${CMAKE_CURRENT_BINARY_DIR}/LICENSE.pytorch
     DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/pytorch
   )
@@ -474,6 +477,7 @@ if (${TRITON_PYTORCH_DOCKER_BUILD})
         COMMAND ln -sf libopencv_flann.so libopencv_flann.so.${OPENCV_VERSION}
         COMMAND ln -sf libpng16.so libpng16.so.16
         COMMAND ln -sf libjpeg.so libjpeg.so.8
+        COMMAND ln -sf libcusparseLt.so libcusparseLt.so.0
         RESULT_VARIABLE LINK_STATUS
         WORKING_DIRECTORY ${CMAKE_INSTALL_PREFIX}/backends/pytorch)
       if(LINK_STATUS AND NOT LINK_STATUS EQUAL 0)

diff --git a/README.md b/README.md
@@ -146,11 +146,11 @@ key: "INFERENCE_MODE"
 
 * `DISABLE_CUDNN`: Boolean flag to disable the cuDNN library. By default, cuDNN is enabled.
 
-[cuDNN](https://developer.nvidia.com/cudnn) is a GPU-accelerated library of primitives for 
+[cuDNN](https://developer.nvidia.com/cudnn) is a GPU-accelerated library of primitives for
 deep neural networks. cuDNN provides highly tuned implementations for standard routines.
 
 Typically, models run with cuDNN enabled are faster. However there are some exceptions
-where using cuDNN can be slower, cause higher memory usage or result in errors. 
+where using cuDNN can be slower, cause higher memory usage or result in errors.
 
 
 The section of model config file specifying this parameter will look like: