From 2ad56898da586603aae076e7f0ec8b486ae428de Mon Sep 17 00:00:00 2001 From: Jambay Kinley Date: Mon, 23 Oct 2023 22:27:48 +0000 Subject: [PATCH] template spacing --- .../quantization/dequantize_blockwise_bnb4.cu | 14 +++++------ .../cuda/quantization/matmul_bnb4.cu | 24 +++++++++---------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/onnxruntime/contrib_ops/cuda/quantization/dequantize_blockwise_bnb4.cu b/onnxruntime/contrib_ops/cuda/quantization/dequantize_blockwise_bnb4.cu index e1236d4119c6d..12c956fee7488 100644 --- a/onnxruntime/contrib_ops/cuda/quantization/dequantize_blockwise_bnb4.cu +++ b/onnxruntime/contrib_ops/cuda/quantization/dequantize_blockwise_bnb4.cu @@ -111,13 +111,13 @@ template Status DequantizeBnb4( cudaStream_t stream); template Status DequantizeBnb4( - const half* quant_map, - half *output, - const uint8_t *quant_data, - const half *absmax, - int block_size, - int numel, - cudaStream_t stream); + const half* quant_map, + half *output, + const uint8_t *quant_data, + const half *absmax, + int block_size, + int numel, + cudaStream_t stream); } // namespace cuda } // namespace contrib diff --git a/onnxruntime/contrib_ops/cuda/quantization/matmul_bnb4.cu b/onnxruntime/contrib_ops/cuda/quantization/matmul_bnb4.cu index ce5288948c5d4..a5eeed82e2bb4 100644 --- a/onnxruntime/contrib_ops/cuda/quantization/matmul_bnb4.cu +++ b/onnxruntime/contrib_ops/cuda/quantization/matmul_bnb4.cu @@ -13,18 +13,18 @@ namespace cuda { #define num_values_4bit 32 template __global__ void kgemm_4bit_inference_naive( - int M, - int N, - int K, - const T* __restrict__ A, - const uint8_t *B, - const T *absmax, - const T *datatype, - T * out, - int lda, - int ldb, - int ldc, - int block_size) { + int M, + int N, + int K, + const T* __restrict__ A, + const uint8_t *B, + const T *absmax, + const T *datatype, + T * out, + int lda, + int ldb, + int ldc, + int block_size) { // per threadblock: // load step-by-step in chunks of [32,warps]: 1x32 * [32,warps] -> [1,warps] // 4 warps -> 4 loads per iter