Skip to content

Commit

Permalink
hsum_float_16
Browse files Browse the repository at this point in the history
Signed-off-by: liqunfu <[email protected]>
  • Loading branch information
liqunfu committed Jul 29, 2024
1 parent 705aa1f commit 012e9c4
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 18 deletions.
18 changes: 0 additions & 18 deletions onnxruntime/core/mlas/lib/sqnbitgemm_kernel_avx512_int8_blklen32.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,6 @@
#include "sqnbitgemm_kernel_avx2_int8_blklen32.h"
#include "sqnbitgemm_kernel_avx512_int8_blklen64.h"

static MLAS_FORCEINLINE __m256
h_add_512(__m512 a)
{
return _mm256_add_ps(_mm512_castps512_ps256(a), _mm512_extractf32x8_ps(a, 1));
}

static MLAS_FORCEINLINE float
hsum_float_16(const __m512 x)
{
__m256 hi = h_add_512(x);
__m128 hi128 = _mm256_extractf128_ps(hi, 1);
__m128 lo128 = _mm256_castps256_ps128(hi);
hi128 = _mm_add_ps(hi128, lo128);
hi128 = _mm_add_ps(hi128, _mm_movehl_ps(hi128, hi128));
hi128 = _mm_add_ss(hi128, _mm_movehdup_ps(hi128));
return _mm_cvtss_f32(hi128);
}

static MLAS_FORCEINLINE void
load_4blk_4b_packed_blklen32(const std::byte* QuantBDataPtr, __m512i& bv0_64_epi8, __m512i& bv1_64_epi8)
{
Expand Down
18 changes: 18 additions & 0 deletions onnxruntime/core/mlas/lib/sqnbitgemm_kernel_avx512_int8_blklen64.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,24 @@
#include "sqnbitgemm.h"
#include "sqnbitgemm_kernel_avx_common.h"

static MLAS_FORCEINLINE __m256
h_add_512(__m512 a)
{
return _mm256_add_ps(_mm512_castps512_ps256(a), _mm512_extractf32x8_ps(a, 1));
}

static MLAS_FORCEINLINE float
hsum_float_16(const __m512 x)
{
__m256 hi = h_add_512(x);
__m128 hi128 = _mm256_extractf128_ps(hi, 1);
__m128 lo128 = _mm256_castps256_ps128(hi);
hi128 = _mm_add_ps(hi128, lo128);
hi128 = _mm_add_ps(hi128, _mm_movehl_ps(hi128, hi128));
hi128 = _mm_add_ss(hi128, _mm_movehdup_ps(hi128));
return _mm_cvtss_f32(hi128);
}

static MLAS_FORCEINLINE __m512i
combine_two_m256i_to_m512i(const __m256i& a, const __m256i& b)
{
Expand Down

0 comments on commit 012e9c4

Please sign in to comment.