Skip to content

Commit

Permalink
#3908: Uplift the latest changes from core llk lib. Common files are …
Browse files Browse the repository at this point in the history
…now identical
  • Loading branch information
acejkov authored and rtawfik01 committed Dec 13, 2023
1 parent 1371942 commit 80c6e43
Show file tree
Hide file tree
Showing 21 changed files with 69 additions and 51 deletions.
4 changes: 2 additions & 2 deletions tt_metal/hw/ckernels/wormhole_b0/common/inc/ckernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#define OVERLAY_DECOUPLE 0
#endif

#ifdef LLK_TB_TEST
#if defined(EN_KERNEL_SLOWDOWN)
#include "kernel_slowdown_config.h"
#endif

Expand All @@ -61,10 +61,10 @@
#include "ckernel_include.h"
#include "tensix.h"
#include "fw_debug.h"
#include "tt_log.h"
// #include <cstring>
#if defined(PERF_DUMP) || DELAY_EN > 0
#include <l1_address_map.h>
#include "tt_log.h"
#include "perf_lib/scratch_api.h"
#endif

Expand Down
20 changes: 20 additions & 0 deletions tt_metal/hw/ckernels/wormhole_b0/common/inc/ckernel_sfpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -1319,5 +1319,25 @@ inline void _dequant_int32_(const int iterations, const uint dst_offset)
}
}

template <bool APPROXIMATION_MODE, int ITERATIONS>
inline void _add_int32_(const int iterations, const uint dst_offset) {
// Operand A is input1 (int32)
// Operand B is input2 (int32)
// Output is int32
#pragma GCC unroll 8
for (int d = 0; d < ITERATIONS; d++) {
// operand A - int32
TTI_SFPLOAD(0, 12, 3, 0);
// operand B - int32
TT_SFPLOAD(1, 12, 3, dst_offset * 64);
TTI_SFPIADD(0, 1, 0, 4);
// MAD has a 2-cycle pipeline latency so we need one cycle latency until next instr can consume the result
TTI_NOP;
// LREG_0 -> dest as int32
TTI_SFPSTORE(0, 12, 3, 0);
dst_reg++;
}
}

} // namespace sfpu
} // namespace ckernel
4 changes: 1 addition & 3 deletions tt_metal/hw/ckernels/wormhole_b0/llk_lib/llk_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,11 @@

namespace ckernel {

enum Dim {
enum VectorMode {
None = 0,
R = 1,
C = 2,
Z = 3,
RC = 4,
ZR = 5,
Invalid = 0xFF,
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ inline void llk_math_eltwise_binary_sfpu(
const uint operand,
uint dst_index_a,
uint dst_index_b,
int vector_mode = (int)Dim::RC,
int vector_mode = (int)VectorMode::RC,
uint param0 = 0,
uint param1 = 0,
uint param2 = 0,
Expand All @@ -38,7 +38,7 @@ inline void llk_math_eltwise_binary_sfpu_init(

template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
inline void llk_math_eltwise_binary_sfpu_quant_int32(
uint dst_index_a, uint dst_index_b, int vector_mode = (int)Dim::RC) {
uint dst_index_a, uint dst_index_b, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_binary_sfpu<SfpuType::quant_int32, APPROXIMATE, dst_sync>(dst_index_a, dst_index_b, vector_mode);
}

Expand All @@ -49,7 +49,7 @@ inline void llk_math_eltwise_binary_sfpu_quant_int32_init(const uint zero_point)

template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
inline void llk_math_eltwise_binary_sfpu_requant_int32(
uint dst_index_a, uint dst_index_b, int vector_mode = (int)Dim::RC) {
uint dst_index_a, uint dst_index_b, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_binary_sfpu<SfpuType::requant_int32, APPROXIMATE, dst_sync>(dst_index_a, dst_index_b, vector_mode);
}

Expand All @@ -60,7 +60,7 @@ inline void llk_math_eltwise_binary_sfpu_requant_int32_init(const uint zero_poin

template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
inline void llk_math_eltwise_binary_sfpu_dequant_int32(
uint dst_index_a, uint dst_index_b, int vector_mode = (int)Dim::RC) {
uint dst_index_a, uint dst_index_b, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_binary_sfpu<SfpuType::dequant_int32, APPROXIMATE, dst_sync>(dst_index_a, dst_index_b, vector_mode);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ inline void llk_math_eltwise_unary_sfpu_rsqrt_init() {
}

template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
inline void llk_math_eltwise_unary_sfpu_log(uint dst_index, int vector_mode = Dim::RC) {
inline void llk_math_eltwise_unary_sfpu_log(uint dst_index, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_unary_sfpu<SfpuType::log, APPROXIMATE, dst_sync>(dst_index, vector_mode);
}

Expand All @@ -45,7 +45,7 @@ inline void llk_math_eltwise_unary_sfpu_log_with_base_init() {
}

template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
inline void llk_math_eltwise_unary_sfpu_tanh(uint dst_index, int vector_mode = Dim::RC) {
inline void llk_math_eltwise_unary_sfpu_tanh(uint dst_index, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_unary_sfpu<SfpuType::tanh, APPROXIMATE, dst_sync>(dst_index, vector_mode);
}

Expand Down Expand Up @@ -88,7 +88,7 @@ inline void llk_math_eltwise_unary_sfpu_dropout_init(uint seed = 0) {
}

template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
inline void llk_math_eltwise_unary_sfpu_sigmoid(uint dst_index, int vector_mode = Dim::RC) {
inline void llk_math_eltwise_unary_sfpu_sigmoid(uint dst_index, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_unary_sfpu<SfpuType::sigmoid, APPROXIMATE, dst_sync>(dst_index, vector_mode);
}

Expand Down Expand Up @@ -164,7 +164,7 @@ inline void llk_math_eltwise_unary_sfpu_gez_init() {
}

template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
inline void llk_math_eltwise_unary_sfpu_max(uint dst_index, int vector_mode = Dim::RC) {
inline void llk_math_eltwise_unary_sfpu_max(uint dst_index, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_unary_sfpu<SfpuType::max, APPROXIMATE, dst_sync>(dst_index, vector_mode);
}

Expand All @@ -174,7 +174,7 @@ inline void llk_math_eltwise_unary_sfpu_max_init() {
}

template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
inline void llk_math_eltwise_unary_sfpu_square(uint dst_index, int vector_mode = Dim::RC) {
inline void llk_math_eltwise_unary_sfpu_square(uint dst_index, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_unary_sfpu<SfpuType::square, APPROXIMATE, dst_sync>(dst_index, vector_mode);
}

Expand All @@ -184,7 +184,7 @@ inline void llk_math_eltwise_unary_sfpu_square_init() {
}

template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
inline void llk_math_eltwise_unary_sfpu_power(uint dst_index, int pow = 0, int vector_mode = Dim::RC) {
inline void llk_math_eltwise_unary_sfpu_power(uint dst_index, int pow = 0, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_unary_sfpu<SfpuType::power, APPROXIMATE, dst_sync>(dst_index, vector_mode, pow);
}

Expand All @@ -194,7 +194,7 @@ inline void llk_math_eltwise_unary_sfpu_power_init() {
}

template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
inline void llk_math_eltwise_unary_sfpu_abs(uint dst_index, int vector_mode = Dim::RC) {
inline void llk_math_eltwise_unary_sfpu_abs(uint dst_index, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_unary_sfpu<SfpuType::abs, APPROXIMATE, dst_sync>(dst_index, vector_mode);
}

Expand All @@ -204,7 +204,7 @@ inline void llk_math_eltwise_unary_sfpu_abs_init() {
}

template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
inline void llk_math_eltwise_unary_sfpu_cast_fp32_to_fp16a(uint dst_index, int vector_mode = Dim::RC) {
inline void llk_math_eltwise_unary_sfpu_cast_fp32_to_fp16a(uint dst_index, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_unary_sfpu<SfpuType::cast_fp32_to_fp16a, APPROXIMATE, dst_sync>(dst_index, vector_mode);
}

Expand All @@ -226,7 +226,7 @@ inline void llk_math_eltwise_unary_sfpu_exp2_init() {

//heaviside
template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
inline void llk_math_eltwise_unary_sfpu_heaviside(uint dst_index,uint param0, int vector_mode = Dim::RC) {
inline void llk_math_eltwise_unary_sfpu_heaviside(uint dst_index,uint param0, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_unary_sfpu<SfpuType::heaviside, APPROXIMATE, dst_sync>(dst_index,vector_mode,param0);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ inline void llk_math_eltwise_unary_sfpu_0_param(
void (*first_func)(),
void (*func)(),
uint dst_index,
int vector_mode = Dim::RC) {
int vector_mode = (int)VectorMode::RC) {
if constexpr ((Dst == DstSync::SyncTile16) || (Dst == DstSync::SyncTile2)) {
math::set_dst_write_addr<DstTileLayout::Default, DstTileShape::Tile32x32>(math_sync_tile_dst_index);
} else {
math::set_dst_write_addr<DstTileLayout::Default, DstTileShape::Tile32x32>(dst_index);
}
math::set_addr_mod_base();
TTI_STALLWAIT(p_stall::STALL_SFPU, p_stall::MATH);
if (vector_mode == Dim::R) {
if (vector_mode == (int)VectorMode::R) {
// Do a row vector, Face0 + Face1 -- first iteration (first row)
const int ITERATIONS = 1;
#pragma GCC unroll 0
Expand All @@ -33,7 +33,7 @@ inline void llk_math_eltwise_unary_sfpu_0_param(
TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D);
TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D);
TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D);
} else if (vector_mode == Dim::C) {
} else if (vector_mode == (int)VectorMode::C) {
// Do a column vector, Face0 + Face2 -- All iterations for full face
#pragma GCC unroll 0
for (int face = 0; face < 2; face++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ inline void llk_math_eltwise_unary_sfpu_1_param(
void (*first_func)(uint),
void (*func)(uint),
uint dst_index,
int vector_mode = Dim::RC,
int vector_mode = (int)VectorMode::RC,
uint param0 = 0) {
if constexpr ((Dst == DstSync::SyncTile16) || (Dst == DstSync::SyncTile2)) {
math::set_dst_write_addr<DstTileLayout::Default, DstTileShape::Tile32x32>(math_sync_tile_dst_index);
Expand All @@ -20,7 +20,7 @@ inline void llk_math_eltwise_unary_sfpu_1_param(
}
math::set_addr_mod_base();
TTI_STALLWAIT(p_stall::STALL_SFPU, p_stall::MATH);
if (vector_mode == Dim::R) {
if (vector_mode == (int)VectorMode::R) {
// Do a row vector, Face0 + Face1 -- first iteration (first row)
const int ITERATIONS = 1;
#pragma GCC unroll 0
Expand All @@ -34,7 +34,7 @@ inline void llk_math_eltwise_unary_sfpu_1_param(
TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D);
TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D);
TTI_SETRWC(p_setrwc::CLR_NONE, p_setrwc::CR_D, 8, 0, 0, p_setrwc::SET_D);
} else if (vector_mode == Dim::C) {
} else if (vector_mode == (int)VectorMode::C) {
// Do a column vector, Face0 + Face2 -- All iterations for full face
#pragma GCC unroll 0
for (int face = 0; face < 2; face++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ inline void llk_math_calculate_sfpu(
template <SfpuType sfpu_op, bool APPROXIMATE, DstSync Dst = DstSync::SyncFull, bool IS_INT_SFPU_EN = false>
inline void llk_math_eltwise_unary_sfpu(
uint dst_index,
int vector_mode = (int)Dim::RC,
int vector_mode = (int)VectorMode::RC,
uint param0 = 0,
uint param1 = 0,
uint param2 = 0,
Expand All @@ -126,7 +126,7 @@ inline void llk_math_eltwise_unary_sfpu(

_llk_math_eltwise_unary_sfpu_start_<Dst>(dst_index);

if (vector_mode == (int)Dim::R) {
if (vector_mode == (int)VectorMode::R) {
// Do a row vector, Face0 + Face1 -- first iteration (first row)
const int iterations = (num_faces < 4) ? ((face_r_dim <= 2) ? 2 : face_r_dim / 2)
: 2; // At least 2 iterations for odd and even columns
Expand All @@ -140,7 +140,7 @@ inline void llk_math_eltwise_unary_sfpu(
// Skip next two faces
_llk_math_eltwise_unary_sfpu_inc_dst_face_addr_();
_llk_math_eltwise_unary_sfpu_inc_dst_face_addr_();
} else if (vector_mode == (int)Dim::C) {
} else if (vector_mode == (int)VectorMode::C) {
// Do a column vector, Face0 + Face2 if tile is 32x32 or Face0+Face1 if tiles is 32x16 -- All iterations for
// full face
#pragma GCC unroll 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ inline void llk_math_eltwise_unary_sfpu_elu(uint dst_index, uint param0) {
llk_math_eltwise_unary_sfpu_1_param<APPROXIMATE, Dst>
(ckernel::sfpu::calculate_elu<APPROXIMATE>,
ckernel::sfpu::calculate_elu<APPROXIMATE>,
dst_index, Dim::RC, param0);
dst_index, (int)VectorMode::RC, param0);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ inline void llk_math_eltwise_unary_sfpu_erf(uint dst_index, int param0 = 0) {
llk_math_eltwise_unary_sfpu_0_param<APPROXIMATE, Dst>
(ckernel::sfpu::calculate_sfpu_erf_erfc<SfpuType::erf, APPROXIMATE>,
ckernel::sfpu::calculate_sfpu_erf_erfc<SfpuType::erf, APPROXIMATE>,
dst_index, Dim::RC);
dst_index, (int)VectorMode::RC);
}

template <bool APPROXIMATE, DstSync Dst = DstSync::SyncFull>
inline void llk_math_eltwise_unary_sfpu_erfc(uint dst_index, int param0 = 0) {
llk_math_eltwise_unary_sfpu_0_param<APPROXIMATE, Dst>
(ckernel::sfpu::calculate_sfpu_erf_erfc<SfpuType::erfc, APPROXIMATE>,
ckernel::sfpu::calculate_sfpu_erf_erfc<SfpuType::erfc, APPROXIMATE>,
dst_index, Dim::RC);
dst_index, (int)VectorMode::RC);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ inline void llk_math_eltwise_unary_sfpu_erfinv_op(uint dst_index) {
llk_math_eltwise_unary_sfpu_0_param<APPROXIMATE, Dst>
(ckernel::sfpu::calculate_erfinv<APPROXIMATE>,
ckernel::sfpu::calculate_erfinv<APPROXIMATE>,
dst_index, Dim::RC);
dst_index, (int)VectorMode::RC);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace ckernel {
// New LLK SFPU APIs

template <bool APPROXIMATE, DstSync Dst = DstSync::SyncFull>
inline void llk_math_eltwise_unary_sfpu_exponential(uint dst_index, int vector_mode = Dim::RC, int param0 = 0) {
inline void llk_math_eltwise_unary_sfpu_exponential(uint dst_index, int vector_mode = (int)VectorMode::RC, int param0 = 0) {

constexpr bool zero_negative = true;
constexpr int first_iterations = 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace ckernel {
// New LLK SFPU APIs

template <bool APPROXIMATE, DstSync Dst = DstSync::SyncFull>
inline void llk_math_eltwise_unary_sfpu_gelu(uint dst_index, int vector_mode = Dim::RC, int param0=0) {
inline void llk_math_eltwise_unary_sfpu_gelu(uint dst_index, int vector_mode = (int)VectorMode::RC, int param0=0) {
constexpr int first_iterations = 1;
llk_math_eltwise_unary_sfpu_0_param<APPROXIMATE, Dst>
(ckernel::sfpu::calculate_gelu<APPROXIMATE, first_iterations>,
Expand All @@ -27,7 +27,7 @@ inline void llk_math_eltwise_unary_sfpu_gelu_init() {
}

template <bool APPROXIMATE, DstSync Dst = DstSync::SyncFull>
inline void llk_math_eltwise_unary_sfpu_gelu_derivative(uint dst_index, int vector_mode = Dim::RC) {
inline void llk_math_eltwise_unary_sfpu_gelu_derivative(uint dst_index, int vector_mode = (int)VectorMode::RC) {
constexpr int first_iterations = 1;
llk_math_eltwise_unary_sfpu_0_param<APPROXIMATE, Dst>
(ckernel::sfpu::calculate_gelu_derivative<APPROXIMATE, first_iterations>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ inline void llk_math_eltwise_unary_sfpu_i0_op(uint dst_index) {
llk_math_eltwise_unary_sfpu_0_param<APPROXIMATE, Dst>
(ckernel::sfpu::calculate_i0<APPROXIMATE>,
ckernel::sfpu::calculate_i0<APPROXIMATE>,
dst_index, Dim::RC);
dst_index, (int)VectorMode::RC);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ inline void llk_math_eltwise_unary_sfpu_isinf(uint dst_index) {
llk_math_eltwise_unary_sfpu_0_param<APPROXIMATE, Dst>
(ckernel::sfpu::calculate_sfpu_isinf_isnan<SfpuType::isinf, APPROXIMATE>,
ckernel::sfpu::calculate_sfpu_isinf_isnan<SfpuType::isinf, APPROXIMATE>,
dst_index, Dim::RC);
dst_index, (int)VectorMode::RC);

}

Expand All @@ -41,7 +41,7 @@ inline void llk_math_eltwise_unary_sfpu_isposinf(uint dst_index) {
llk_math_eltwise_unary_sfpu_0_param<APPROXIMATE, Dst>
(ckernel::sfpu::calculate_sfpu_isinf_isnan<SfpuType::isposinf, APPROXIMATE>,
ckernel::sfpu::calculate_sfpu_isinf_isnan<SfpuType::isposinf, APPROXIMATE>,
dst_index,Dim::RC);
dst_index,(int)VectorMode::RC);

}

Expand All @@ -58,7 +58,7 @@ inline void llk_math_eltwise_unary_sfpu_isneginf(uint dst_index) {
llk_math_eltwise_unary_sfpu_0_param<APPROXIMATE, Dst>
(ckernel::sfpu::calculate_sfpu_isinf_isnan<SfpuType::isneginf, APPROXIMATE>,
ckernel::sfpu::calculate_sfpu_isinf_isnan<SfpuType::isneginf, APPROXIMATE>,
dst_index,Dim::RC);
dst_index,(int)VectorMode::RC);

}

Expand All @@ -73,7 +73,7 @@ inline void llk_math_eltwise_unary_sfpu_isnan(uint dst_index) {
llk_math_eltwise_unary_sfpu_0_param<APPROXIMATE, Dst>
(ckernel::sfpu::calculate_sfpu_isinf_isnan<SfpuType::isnan, APPROXIMATE>,
ckernel::sfpu::calculate_sfpu_isinf_isnan<SfpuType::isnan, APPROXIMATE>,
dst_index,Dim::RC);
dst_index,(int)VectorMode::RC);

}

Expand All @@ -88,7 +88,7 @@ inline void llk_math_eltwise_unary_sfpu_isfinite(uint dst_index) {
llk_math_eltwise_unary_sfpu_0_param<APPROXIMATE, Dst>
(ckernel::sfpu::calculate_sfpu_isinf_isnan<SfpuType::isfinite, APPROXIMATE>,
ckernel::sfpu::calculate_sfpu_isinf_isnan<SfpuType::isfinite, APPROXIMATE>,
dst_index,Dim::RC);
dst_index,(int)VectorMode::RC);

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ inline void llk_math_eltwise_unary_sfpu_logical_not_unary_op(uint dst_index) {
llk_math_eltwise_unary_sfpu_0_param<APPROXIMATE, Dst>
(ckernel::sfpu::calculate_logical_not_unary<APPROXIMATE>,
ckernel::sfpu::calculate_logical_not_unary<APPROXIMATE>,
dst_index, Dim::RC);
dst_index, (int)VectorMode::RC);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ namespace ckernel {
// New LLK SFPU APIs

template <bool APPROXIMATE, DstSync Dst = DstSync::SyncFull>
inline void llk_math_eltwise_unary_sfpu_reciprocal(uint dst_index, int vector_mode = Dim::RC) {
inline void llk_math_eltwise_unary_sfpu_reciprocal(uint dst_index, int vector_mode = (int)VectorMode::RC) {
constexpr int first_iterations = 1;
llk_math_eltwise_unary_sfpu_0_param<APPROXIMATE, Dst>
(ckernel::sfpu::calculate_reciprocal<APPROXIMATE, first_iterations>,
Expand Down
Loading

0 comments on commit 80c6e43

Please sign in to comment.