diff --git a/bestla/bestla/bestla_prologue_b.h b/bestla/bestla/bestla_prologue_b.h index 17eaf8bfd..136cfa35b 100644 --- a/bestla/bestla/bestla_prologue_b.h +++ b/bestla/bestla/bestla_prologue_b.h @@ -628,6 +628,8 @@ class WeightKBlockNInteger { if (qtype == BTLA_DTYPE::S4_CLIP) return compressBit4Weight(N, K, B, dstptr, qtype, threading); if (qtype == BTLA_DTYPE::S3_CLIP) return compressBit3Weight(N, K, B, dstptr, qtype, threading); if (qtype == BTLA_DTYPE::S2_CLIP) return compressBit2Weight(N, K, B, dstptr, qtype, threading); + if (qtype == BTLA_DTYPE::F4_BNB || qtype == BTLA_DTYPE::F4_NF4 || qtype == BTLA_DTYPE::F4_E2M1) + return compressBit4Weight(N, K, B, dstptr, qtype, threading); } template diff --git a/bestla/bestla/ut/bestla_prologue_b.cpp b/bestla/bestla/ut/bestla_prologue_b.cpp index 6f2c44491..f30422a5d 100644 --- a/bestla/bestla/ut/bestla_prologue_b.cpp +++ b/bestla/bestla/ut/bestla_prologue_b.cpp @@ -411,7 +411,7 @@ class UT_TransposeBlockQuantize_F4 { } }; #ifdef BTLA_UT_PROLOGUE_B -static UT_TransposeBlockQuantize_F4 sUT_TransposeBlockQuantize_F4; +static UT_TransposeBlockQuantize_F4 sUT_TransposeBlockQuantize_F4; #endif class UT_BlockQuantize_INT4 {