Skip to content

Commit

Permalink
template spacing
Browse files Browse the repository at this point in the history
  • Loading branch information
jambayk committed Oct 23, 2023
1 parent 7f1d345 commit 2ad5689
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,13 @@ template Status DequantizeBnb4<float>(
cudaStream_t stream);

template Status DequantizeBnb4<half>(
const half* quant_map,
half *output,
const uint8_t *quant_data,
const half *absmax,
int block_size,
int numel,
cudaStream_t stream);
const half* quant_map,
half *output,
const uint8_t *quant_data,
const half *absmax,
int block_size,
int numel,
cudaStream_t stream);

} // namespace cuda
} // namespace contrib
Expand Down
24 changes: 12 additions & 12 deletions onnxruntime/contrib_ops/cuda/quantization/matmul_bnb4.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,18 @@ namespace cuda {
#define num_values_4bit 32
template <typename T, int THREADS, int BITS>
__global__ void kgemm_4bit_inference_naive(
int M,
int N,
int K,
const T* __restrict__ A,
const uint8_t *B,
const T *absmax,
const T *datatype,
T * out,
int lda,
int ldb,
int ldc,
int block_size) {
int M,
int N,
int K,
const T* __restrict__ A,
const uint8_t *B,
const T *absmax,
const T *datatype,
T * out,
int lda,
int ldb,
int ldc,
int block_size) {
// per threadblock:
// load step-by-step in chunks of [32,warps]: 1x32 * [32,warps] -> [1,warps]
// 4 warps -> 4 loads per iter
Expand Down

0 comments on commit 2ad5689

Please sign in to comment.