Skip to content

Commit

Permalink
#3908: Fixes for llk lib compile/regressions:
Browse files Browse the repository at this point in the history
	- Add separate llk api files for sfpu negative & mask
	- Move sfpu identity to metal api folder
	- Add llk_io files to erisc core compile
  • Loading branch information
rtawfik01 committed Dec 13, 2023
1 parent 6c091f5 commit 4303b98
Show file tree
Hide file tree
Showing 39 changed files with 270 additions and 2,573 deletions.
36 changes: 0 additions & 36 deletions tt_metal/hw/ckernels/grayskull/common/inc/ckernel_sfpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,20 +244,6 @@ inline void calculate_atan()
}
}


template <bool APPROXIMATION_MODE, int ITERATIONS>
inline void calculate_negative()
{

for (int d = 0; d < ITERATIONS; d++)
{
vFloat val = dst_reg[0];
dst_reg[0] = -val;
dst_reg++;
}
}


template <bool APPROXIMATION_MODE, int ITERATIONS, int RECIPROCAL_ITERATIONS>
inline void calculate_rsqrt()
{
Expand Down Expand Up @@ -888,21 +874,6 @@ inline void calculate_silu()
}
}

template <bool APPROXIMATION_MODE, int ITERATIONS>
inline void calculate_mask()
{
bool exponent_size_8 = true;
for (int d = 0; d < ITERATIONS; d++)
{
vFloat mask = dst_reg[16];
v_if(sfpu_is_fp16_zero(mask, exponent_size_8)) {
dst_reg[0] = 0;
}
v_endif;
dst_reg++;
}
}

template <SfpuType operation, bool APPROXIMATION_MODE, int SfpuType_PARAM = 0, int ITERATIONS = 4>
inline void calculate_sfpu(uint param0 = 0, uint param1 = 0, uint param2 = 0, uint param3 = 0, uint param4 = 0, uint param5 = 0)
{
Expand Down Expand Up @@ -997,13 +968,6 @@ inline void calculate_sfpu(uint param0 = 0, uint param1 = 0, uint param2 = 0, ui
else if constexpr (operation == SfpuType::silu) {
calculate_silu<APPROXIMATION_MODE, ITERATIONS>();
}
else if constexpr (operation == SfpuType::mask) {
calculate_mask<APPROXIMATION_MODE, ITERATIONS>();
}
else if constexpr (operation == SfpuType::negative) {
calculate_negative<APPROXIMATION_MODE, ITERATIONS>();
}

//erf, erfc are dispatched directly.
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,26 +366,4 @@ inline void llk_math_eltwise_unary_sfpu_silu_init() {
llk_math_eltwise_unary_sfpu_init<SfpuType::silu, APPROXIMATE>();
}

//Mask
template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
inline void llk_math_eltwise_unary_sfpu_mask(uint dst_index, int vector_mode = Dim::RC) {
llk_math_eltwise_unary_sfpu<SfpuType::mask, APPROXIMATE, dst_sync>(dst_index, vector_mode);
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_unary_sfpu_mask_init() {
llk_math_eltwise_unary_sfpu_init<SfpuType::mask, APPROXIMATE>();
}

// Negative
template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
inline void llk_math_eltwise_unary_sfpu_negative(uint dst_index, int vector_mode = Dim::RC) {
llk_math_eltwise_unary_sfpu<SfpuType::negative, APPROXIMATE, dst_sync>(dst_index,vector_mode);
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_unary_sfpu_negative_init() {
llk_math_eltwise_unary_sfpu_init<SfpuType::negative, APPROXIMATE>();
}

}
75 changes: 0 additions & 75 deletions tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_math_binary_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,78 +9,3 @@
// /*************************************************************************
// * LLK ELTWISE BINARY
// *************************************************************************/

// // Version with no operand
// template <
// EltwiseBinaryType eltwise_binary_type,
// BroadcastType src_b_bcast_type,
// int NUM_FIDELITY_PHASES = 0,
// EltwiseBinaryReuseDestType binary_reuse_dest = EltwiseBinaryReuseDestType::NONE>
// inline void llk_math_eltwise_binary_init(const std::uint32_t transpose = 0, const std::uint32_t acc_to_dest = 0) {
// const std::uint32_t num_faces = 4;

// _llk_math_eltwise_binary_init_<eltwise_binary_type, src_b_bcast_type, NUM_FIDELITY_PHASES, binary_reuse_dest>(
// num_faces, transpose, acc_to_dest);
// }

// // Version with operands
// template <
// EltwiseBinaryType eltwise_binary_type,
// BroadcastType src_b_bcast_type,
// int NUM_FIDELITY_PHASES = 0,
// EltwiseBinaryReuseDestType binary_reuse_dest = EltwiseBinaryReuseDestType::NONE>
// inline void llk_math_eltwise_binary_init_with_operands(
// const std::uint32_t operand_A,
// const std::uint32_t operand_B,
// const std::uint32_t transpose = 0,
// const std::uint32_t acc_to_dest = 0) {
// const std::uint32_t operand_id =
// get_operand_id(operand_A); // operand_id is used to extract tile dim data which is the same for both operands
// const std::uint32_t num_faces = get_operand_num_faces(operand_id);

// _llk_math_eltwise_binary_init_<eltwise_binary_type, src_b_bcast_type, NUM_FIDELITY_PHASES, binary_reuse_dest>(
// num_faces, transpose, acc_to_dest);
// }

// template <
// EltwiseBinaryType eltwise_binary_type,
// BroadcastType src_b_bcast_type,
// DstSync Dst = DstSync::SyncFull,
// int NUM_FIDELITY_PHASES = 0,
// EltwiseBinaryReuseDestType binary_reuse_dest = EltwiseBinaryReuseDestType::NONE,
// bool is_fp32_dest_acc_en = false>
// inline void llk_math_eltwise_binary(uint dst_index, const bool clear_fp32_dst_acc = true) {
// const std::uint32_t num_faces = 4;

// _llk_math_eltwise_binary_<
// eltwise_binary_type,
// src_b_bcast_type,
// Dst,
// NUM_FIDELITY_PHASES,
// binary_reuse_dest,
// is_fp32_dest_acc_en>(num_faces, dst_index, clear_fp32_dst_acc);
// }

// template <
// EltwiseBinaryType eltwise_binary_type,
// BroadcastType src_b_bcast_type,
// DstSync Dst = DstSync::SyncFull,
// int NUM_FIDELITY_PHASES = 0,
// EltwiseBinaryReuseDestType binary_reuse_dest = EltwiseBinaryReuseDestType::NONE,
// bool is_fp32_dest_acc_en = false>
// inline void llk_math_eltwise_binary(
// const std::uint32_t operand_A,
// const std::uint32_t operand_B,
// uint dst_index,
// const bool clear_fp32_dst_acc = true) {
// const std::uint32_t operand_id = get_operand_id(operand_A); // both operands must have same number of faces
// const std::uint32_t num_faces = get_operand_num_faces(operand_id);

// _llk_math_eltwise_binary_<
// eltwise_binary_type,
// src_b_bcast_type,
// Dst,
// NUM_FIDELITY_PHASES,
// binary_reuse_dest,
// is_fp32_dest_acc_en>(num_faces, dst_index, clear_fp32_dst_acc);
// }
Original file line number Diff line number Diff line change
Expand Up @@ -9,62 +9,3 @@
// /*************************************************************************
// * LLK ELTWISE BINARY SFPU
// *************************************************************************/

// template <SfpuType sfpu_op, bool APPROXIMATE, DstSync Dst = DstSync::SyncFull>
// inline void llk_math_eltwise_binary_sfpu(
// const uint operand,
// uint dst_index_a,
// uint dst_index_b,
// int vector_mode = (int)Dim::RC,
// uint param0 = 0,
// uint param1 = 0,
// uint param2 = 0,
// uint param3 = 0,
// uint param4 = 0,
// uint param5 = 0) {
// const std::uint32_t operand_id = get_operand_id(0);
// const std::uint32_t num_faces = get_operand_num_faces(operand_id);
// const std::uint32_t face_r_dim = get_operand_face_r_dim(operand_id);

// _llk_math_eltwise_binary_sfpu_<sfpu_op, APPROXIMATE, Dst>(
// face_r_dim, num_faces, dst_index_a, dst_index_b, vector_mode, param0, param1, param2, param3, param4, param5);
// }

// template <SfpuType sfpu_op, bool APPROXIMATE>
// inline void llk_math_eltwise_binary_sfpu_init(
// uint param0 = 0, uint param1 = 0, uint param2 = 0, uint param3 = 0, uint param4 = 0, uint param5 = 0) {
// _llk_math_eltwise_binary_sfpu_init_<sfpu_op, APPROXIMATE>(param0, param1, param2, param3, param4, param5);
// }

// template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
// inline void llk_math_eltwise_binary_sfpu_quant_int32(
// uint dst_index_a, uint dst_index_b, int vector_mode = (int)Dim::RC) {
// llk_math_eltwise_binary_sfpu<SfpuType::quant_int32, APPROXIMATE, dst_sync>(dst_index_a, dst_index_b, vector_mode);
// }

// template <bool APPROXIMATE>
// inline void llk_math_eltwise_binary_sfpu_quant_int32_init(const uint zero_point) {
// llk_math_eltwise_binary_sfpu_init<SfpuType::quant_int32, APPROXIMATE>(zero_point);
// }

// template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
// inline void llk_math_eltwise_binary_sfpu_requant_int32(
// uint dst_index_a, uint dst_index_b, int vector_mode = (int)Dim::RC) {
// llk_math_eltwise_binary_sfpu<SfpuType::requant_int32, APPROXIMATE, dst_sync>(dst_index_a, dst_index_b, vector_mode);
// }

// template <bool APPROXIMATE>
// inline void llk_math_eltwise_binary_sfpu_requant_int32_init(const uint zero_point) {
// llk_math_eltwise_binary_sfpu_init<SfpuType::requant_int32, APPROXIMATE>(zero_point);
// }

// template <bool APPROXIMATE, DstSync dst_sync = DstSync::SyncFull>
// inline void llk_math_eltwise_binary_sfpu_dequant_int32(
// uint dst_index_a, uint dst_index_b, int vector_mode = (int)Dim::RC) {
// llk_math_eltwise_binary_sfpu<SfpuType::dequant_int32, APPROXIMATE, dst_sync>(dst_index_a, dst_index_b, vector_mode);
// }

// template <bool APPROXIMATE>
// inline void llk_math_eltwise_binary_sfpu_dequant_int32_init(const uint zero_point) {
// llk_math_eltwise_binary_sfpu_init<SfpuType::dequant_int32, APPROXIMATE>(zero_point);
// }
89 changes: 0 additions & 89 deletions tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_math_common_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,95 +14,6 @@
#include "llk_operands.h"
#include "llk_param_structs.h"

// // Need to revisit why we even need this
// #define EPS 1.19209e-07 // std::numeric_limits::epsilon() for FP32

// /*************************************************************************
// * LLK MATH COMMON
// *************************************************************************/

// template <DstSync Dst>
// inline void llk_math_wait_for_dest_available() {
// _llk_math_wait_for_dest_available_<Dst>();
// }

// template <DstSync Dst = SyncFull, bool is_fp32_dest_acc_en = false>
// inline void llk_math_dest_section_done() {
// _llk_math_dest_section_done_<Dst, is_fp32_dest_acc_en>();
// }

// template <DstSync Dst, bool is_fp32_dest_acc_en = false>
// inline void llk_math_pack_sync_init() {
// _llk_math_pack_sync_init_<Dst, is_fp32_dest_acc_en>();
// }

// template <bool mail2math = true, bool mail2pack = true>
// inline void llk_math_get_tile(std::uint32_t operand, std::uint32_t tile_index, std::uint32_t *p_tile) {
// _llk_math_get_tile_<mail2math, mail2pack>(tile_index, p_tile);
// }

// template <bool mail2math = true, bool mail2pack = true>
// inline void llk_math_release_tile(std::uint32_t operand) {
// _llk_math_release_tile_<mail2math, mail2pack>();
// }

// inline void llk_math_debug_dump(std::uint8_t *data, std::uint32_t byte_size) { _llk_math_debug_dump_(data, byte_size); }

// inline void llk_math_debug_dump_seek(std::uint8_t offset) { _llk_math_debug_dump_seek_(offset); }

// inline void llk_math_reconfig_data_format_srca(const std::uint32_t srca_new_operand) {
// std::uint32_t new_srca_operand_id = get_operand_id(srca_new_operand);
// _llk_math_reconfig_data_format_srca_(unpack_dst_format[new_srca_operand_id]);
// }

// inline void llk_math_reconfig_data_format_srcb(const std::uint32_t srcb_new_operand) {
// std::uint32_t new_srcb_operand_id = get_operand_id(srcb_new_operand);
// _llk_math_reconfig_data_format_srcb_(unpack_dst_format[new_srcb_operand_id]);
// }

// inline void llk_math_reconfig_data_format(const std::uint32_t srca_new_operand, const std::uint32_t srcb_new_operand) {
// std::uint32_t new_srca_operand_id = get_operand_id(srca_new_operand);
// std::uint32_t new_srcb_operand_id = get_operand_id(srcb_new_operand);

// _llk_math_reconfig_data_format_(unpack_dst_format[new_srca_operand_id], unpack_dst_format[new_srcb_operand_id]);
// }

// inline void llk_math_reconfig_data_format(
// const std::uint32_t srca_old_operand,
// const std::uint32_t srca_new_operand,
// const std::uint32_t srcb_old_operand,
// const std::uint32_t srcb_new_operand) {
// std::uint32_t old_srca_operand_id = get_operand_id(srca_old_operand);
// std::uint32_t new_srca_operand_id = get_operand_id(srca_new_operand);
// std::uint32_t old_srcb_operand_id = get_operand_id(srcb_old_operand);
// std::uint32_t new_srcb_operand_id = get_operand_id(srcb_new_operand);

// if ((unpack_dst_format[old_srca_operand_id] != unpack_dst_format[new_srca_operand_id]) &&
// (unpack_dst_format[old_srcb_operand_id] != unpack_dst_format[new_srcb_operand_id])) {
// llk_math_reconfig_data_format(srca_new_operand, srcb_new_operand);
// } else if ((unpack_dst_format[old_srca_operand_id] != unpack_dst_format[new_srca_operand_id])) {
// llk_math_reconfig_data_format_srca(srca_new_operand);
// } else if ((unpack_dst_format[old_srcb_operand_id] != unpack_dst_format[new_srcb_operand_id])) {
// llk_math_reconfig_data_format_srcb(srcb_new_operand);
// }
// }

// inline void llk_math_reconfig_data_format_srca(
// const std::uint32_t srca_old_operand, const std::uint32_t srca_new_operand) {
// std::uint32_t old_srca_operand_id = get_operand_id(srca_old_operand);
// std::uint32_t new_srca_operand_id = get_operand_id(srca_new_operand);

// if ((unpack_dst_format[old_srca_operand_id] != unpack_dst_format[new_srca_operand_id])) {
// llk_math_reconfig_data_format_srca(srca_new_operand);
// }
// }

// inline void llk_math_reconfig_data_format_srcb(
// const std::uint32_t srcb_old_operand, const std::uint32_t srcb_new_operand) {
// std::uint32_t old_srcb_operand_id = get_operand_id(srcb_old_operand);
// std::uint32_t new_srcb_operand_id = get_operand_id(srcb_new_operand);

// if ((unpack_dst_format[old_srcb_operand_id] != unpack_dst_format[new_srcb_operand_id])) {
// llk_math_reconfig_data_format_srcb(srcb_new_operand);
// }
// }
57 changes: 0 additions & 57 deletions tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_math_matmul_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,60 +9,3 @@
// /*************************************************************************
// * LLK MATMUL
// *************************************************************************/

// template <int NUM_FIDELITY_PHASES, DstTileFaceLayout FaceLayout = DstTileFaceLayout::ColMajor>
// inline void llk_math_matmul_init(
// const std::uint32_t operandA,
// const std::uint32_t operandB,
// const std::uint32_t transpose = 0,
// const std::uint32_t ct_dim = 1,
// const std::uint32_t rt_dim = 1,
// const std::uint32_t kt_dim = 1) {
// const std::uint32_t in0_id = get_operand_id(operandA);
// const std::uint32_t in1_id = get_operand_id(operandB);

// const bool partial_face = get_operand_partial_face(in0_id);

// const std::uint32_t in0_tile_r_dim = get_operand_tile_r_dim(in0_id);
// const std::uint32_t in0_tile_c_dim = get_operand_tile_c_dim(in0_id);
// const std::uint32_t in1_tile_r_dim = get_operand_tile_r_dim(in1_id);
// const std::uint32_t in1_tile_c_dim = get_operand_tile_c_dim(in1_id);

// #ifdef ARCH_GRAYSKULL
// _llk_math_matmul_init_<NUM_FIDELITY_PHASES, FaceLayout>(
// in0_tile_r_dim,
// in0_tile_c_dim,
// in1_tile_r_dim,
// in1_tile_c_dim,
// partial_face,
// transpose,
// ct_dim,
// rt_dim,
// kt_dim);
// #else
// _llk_math_matmul_init_<NUM_FIDELITY_PHASES, DstTileFaceLayout::RowMajor>(
// in0_tile_r_dim,
// in0_tile_c_dim,
// in1_tile_r_dim,
// in1_tile_c_dim,
// partial_face,
// transpose,
// ct_dim,
// rt_dim,
// kt_dim);
// #endif
// }

// template <int NUM_FIDELITY_PHASES, DstTileFaceLayout FaceLayout = DstTileFaceLayout::ColMajor>
// inline void llk_math_matmul(
// uint dst_index,
// const bool transpose = false,
// const std::uint32_t ct_dim = 1,
// const std::uint32_t rt_dim = 1,
// const std::uint32_t kt_dim = 1) {
// #ifdef ARCH_GRAYSKULL
// _llk_math_matmul_<NUM_FIDELITY_PHASES, FaceLayout>(dst_index, transpose, ct_dim, rt_dim, kt_dim);
// #else
// _llk_math_matmul_<NUM_FIDELITY_PHASES, DstTileFaceLayout::RowMajor>(dst_index, transpose, ct_dim, rt_dim, kt_dim);
// #endif
// }
Loading

0 comments on commit 4303b98

Please sign in to comment.