Skip to content

Commit

Permalink
missed 2 files from (__GNUC__ > 10)
Browse files Browse the repository at this point in the history
Signed-off-by: liqunfu <[email protected]>
  • Loading branch information
liqunfu committed Jul 30, 2024
1 parent 1fb1c83 commit 85918e9
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 11 deletions.
16 changes: 8 additions & 8 deletions onnxruntime/core/mlas/lib/sqnbitgemm_kernel_avx2_int8_blklen32.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ accumulate_blklen32_r2c1blk2_avx2(
// const __m256i bv1_32_epi8 = _mm256_and_si256(_mm256_srli_epi16(bv_packed, 4), low_mask);
__m256i bv1_32_epi8 = _mm256_srli_epi16(_mm256_sub_epi8(bv_packed, bv0_32_epi8), 4); // 32~63

#if !defined(__GNUC__) || (__GNUC__ > 9)
#if !defined(__GNUC__) || (__GNUC__ > 10)
if constexpr (vnni) {
__m256 scale_b_2_ps = _mm256_castpd_ps(_mm256_broadcast_sd((double*)scale_b));
{
Expand Down Expand Up @@ -108,7 +108,7 @@ accumulate_blklen32_r2c1blk2_avx2(
__m256 scale_8_ps_ = _mm256_permute_ps(_mm256_mul_ps(scale_a1_2_ps, scale_b_2_ps), _MM_SHUFFLE(1, 1, 0, 0));
acc1 = _mm256_fmadd_ps(sum_ps_, scale_8_ps_, acc1);
//}
#if !defined(__GNUC__) || (__GNUC__ > 9)
#if !defined(__GNUC__) || (__GNUC__ > 10)
}
#endif
}
Expand All @@ -130,7 +130,7 @@ accumulate_blklen32_r1c1blk2_avx2(
__m256i bv0_32_epi8 = _mm256_and_si256(bv_packed, low_mask); // 0~31
__m256i bv1_32_epi8 = _mm256_srli_epi16(_mm256_sub_epi8(bv_packed, bv0_32_epi8), 4); // 32~63

#if !defined(__GNUC__) || (__GNUC__ > 9)
#if !defined(__GNUC__) || (__GNUC__ > 10)
if constexpr (vnni) {
const __m256i dot0_8_epi32 = _mm256_dpbusds_avx_epi32(_mm256_setzero_si256(), bv0_32_epi8, av00_32_epi8);
const __m256i dot1_8_epi32 = _mm256_dpbusds_avx_epi32(_mm256_setzero_si256(), bv1_32_epi8, av01_32_epi8);
Expand Down Expand Up @@ -158,7 +158,7 @@ accumulate_blklen32_r1c1blk2_avx2(
// 1 0 1 0 1 0 1 0 -> 1 1 0 0 1 1 0 0
__m256 scale_8_ps = _mm256_permute_ps(_mm256_mul_ps(scale_a0_2_ps, scale_b_2_ps), _MM_SHUFFLE(1, 1, 0, 0));
acc0 = _mm256_fmadd_ps(sum_ps, scale_8_ps, acc0);
#if !defined(__GNUC__) || (__GNUC__ > 9)
#if !defined(__GNUC__) || (__GNUC__ > 10)
}
#endif
}
Expand All @@ -180,7 +180,7 @@ accumulate_blklen32_r2c1blk1_avx2(
__m256i bv_32_epi8 = _mm256_set_m128i(_mm_srli_epi16(bv_packed0, 4), bv_packed0);
bv_32_epi8 = _mm256_and_si256(_mm256_set1_epi8(0x0F), bv_32_epi8);

#if !defined(__GNUC__) || (__GNUC__ > 9)
#if !defined(__GNUC__) || (__GNUC__ > 10)
if constexpr (vnni) {
accumulate_1blk_dot_vnni(av00_32_epi8, bv_32_epi8, combined_scale00, acc0);
accumulate_1blk_dot_vnni(av10_32_epi8, bv_32_epi8, combined_scale10, acc1);
Expand All @@ -189,7 +189,7 @@ accumulate_blklen32_r2c1blk1_avx2(
__m256i one_16_epi16 = _mm256_srli_epi16(_mm256_cmpeq_epi16(bv_32_epi8, bv_32_epi8), 15);
accumulate_1blk_dot(av00_32_epi8, bv_32_epi8, combined_scale00, one_16_epi16, acc0);
accumulate_1blk_dot(av10_32_epi8, bv_32_epi8, combined_scale10, one_16_epi16, acc1);
#if !defined(__GNUC__) || (__GNUC__ > 9)
#if !defined(__GNUC__) || (__GNUC__ > 10)
}
#endif
}
Expand All @@ -208,14 +208,14 @@ accumulate_blklen32_r1c1blk1_avx2(
__m256i bv_32_epi8 = _mm256_set_m128i(_mm_srli_epi16(bv_packed0, 4), bv_packed0);
bv_32_epi8 = _mm256_and_si256(_mm256_set1_epi8(0x0F), bv_32_epi8);

#if !defined(__GNUC__) || (__GNUC__ > 9)
#if !defined(__GNUC__) || (__GNUC__ > 10)
if constexpr (vnni) {
accumulate_1blk_dot_vnni(av00_32_epi8, bv_32_epi8, combined_scale00, acc0);
} else {
#endif
__m256i one_16_epi16 = _mm256_srli_epi16(_mm256_cmpeq_epi16(bv_32_epi8, bv_32_epi8), 15);
accumulate_1blk_dot(av00_32_epi8, bv_32_epi8, combined_scale00, one_16_epi16, acc0);
#if !defined(__GNUC__) || (__GNUC__ > 9)
#if !defined(__GNUC__) || (__GNUC__ > 10)
}
#endif
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ accumulate_blklen64_r2c1blk1_avx2(
__m256i bv0_32_epi8 = _mm256_and_si256(bv_packed, low_mask); // 0, 1,...30, 31
__m256i bv1_32_epi8 = _mm256_srli_epi16(_mm256_sub_epi8(bv_packed, bv0_32_epi8), 4); // 32, 33,...62, 63

#if !defined(__GNUC__) || (__GNUC__ > 9)
#if !defined(__GNUC__) || (__GNUC__ > 10)
if constexpr (vnni) {
__m256i sum_8_epi32 = _mm256_dpbusds_avx_epi32(_mm256_setzero_si256(), bv0_32_epi8, av00_32_epi8);
sum_8_epi32 = _mm256_dpbusds_avx_epi32(sum_8_epi32, bv1_32_epi8, av01_32_epi8);
Expand Down Expand Up @@ -71,7 +71,7 @@ accumulate_blklen64_r2c1blk1_avx2(
__m256 scale_a1_ps = _mm256_broadcast_ss(scale_a1);

acc1 = _mm256_fmadd_ps(sum_ps, _mm256_mul_ps(scale_a1_ps, scale_b_ps), acc1);
#if !defined(__GNUC__) || (__GNUC__ > 9)
#if !defined(__GNUC__) || (__GNUC__ > 10)
}
#endif
}
Expand All @@ -93,7 +93,7 @@ accumulate_blklen64_r1c1blk1_avx2(
__m256i bv0_32_epi8 = _mm256_and_si256(bv_packed, low_mask); // 0, 1,...30, 31
__m256i bv1_32_epi8 = _mm256_srli_epi16(_mm256_sub_epi8(bv_packed, bv0_32_epi8), 4); // 32, 33,...62, 63

#if !defined(__GNUC__) || (__GNUC__ > 9)
#if !defined(__GNUC__) || (__GNUC__ > 10)
if constexpr (vnni) {
__m256i sum_8_epi32 = _mm256_dpbusds_avx_epi32(_mm256_setzero_si256(), bv0_32_epi8, av00_32_epi8);
sum_8_epi32 = _mm256_dpbusds_avx_epi32(sum_8_epi32, bv1_32_epi8, av01_32_epi8);
Expand Down

0 comments on commit 85918e9

Please sign in to comment.