Skip to content

Commit

Permalink
#0: Add empty llk api files for grayskull to fix compile
Browse files Browse the repository at this point in the history
  • Loading branch information
rtawfik01 committed Dec 6, 2023
1 parent 9f7f00d commit 364fb1d
Show file tree
Hide file tree
Showing 42 changed files with 3,402 additions and 446 deletions.
4 changes: 1 addition & 3 deletions tt_metal/hw/ckernels/grayskull/common/inc/chlkc_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,18 @@ using namespace ckernel;


#ifdef UCK_CHLKC_MATH
// #include "chlkc_math_llk_args.h"
#include "chlkc_unpack_data_format.h"
#include "chlkc_math_fidelity.h"
#include "chlkc_math_approx_mode.h"
#include "chlkc_math.cpp"
#endif

#ifdef UCK_CHLKC_PACK
// #include "chlkc_pack_llk_args.h"
#include "chlkc_pack_data_format.h"
#include "chlkc_pack.cpp"
#endif

#ifdef UCK_CHLKC_UNPACK
// #include "chlkc_unpack_llk_args.h"
#include "chlkc_unpack_data_format.h"
#include "chlkc_unpack.cpp"
#endif
Expand Down
18 changes: 0 additions & 18 deletions tt_metal/hw/ckernels/grayskull/common/inc/ckernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ extern uint32_t dest_offset_id;
extern uint32_t dbg_event_index;
extern uint32_t dbg_event_end;

extern uint32_t op_info_offset;

// Internal scope to namespace methods only (C++ does not allow namespace private ownership)
namespace internal {
}
Expand Down Expand Up @@ -281,22 +279,6 @@ inline void debug_dump(uint8_t *data, uint32_t byte_size) {
// TODO(pk) re-implement
}

inline void llk_get_next_op_info(tt::op_info_t& op_info_struct) {

uint32_t* op_info_ptr = reinterpret_cast<uint32_t*>(OP_INFO_BASE_ADDR + op_info_offset);
static constexpr uint32_t op_info_num_items = 7;

volatile tt_l1_ptr uint32_t* op_info_struct_ptr = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(&op_info_struct);
for (uint32_t i = 0; i < op_info_num_items; i++) {
op_info_struct_ptr[i] = op_info_ptr[i];
}
op_info_offset += 28;

if (op_info_offset == OP_INFO_SIZE) {
op_info_offset = 0; // In case we go out of bounds
}
}

inline __attribute__((always_inline)) unsigned int mulsi3 (unsigned int a, unsigned int b)
{
unsigned int r = 0;
Expand Down
59 changes: 0 additions & 59 deletions tt_metal/hw/ckernels/grayskull/common/inc/ckernel_globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,71 +7,12 @@
#include <cstdint>
#include "ckernel_structs.h"
#include "risc_attribs.h"
#include "tensix_functions.h"
#include "hostdevcommon/common_runtime_address_map.h"

extern uint32_t cfg_state_id;
extern uint32_t unp_cfg_context;
extern uint32_t gl_alu_format_spec_reg;

extern volatile uint32_t l1_buffer[16];

//extern const int32_t unpack_src_format[24];
//extern const int32_t unpack_dst_format[24];
//extern const int32_t pack_src_format[16];
//extern const int32_t pack_dst_format[16];

extern uint32_t pack_sync_tile_dst_ptr;
extern uint32_t math_sync_tile_dst_index;

extern CBInterface cb_interface[NUM_CIRCULAR_BUFFERS];

extern uint32_t __ldm_bss_start[];
extern uint32_t __ldm_bss_end[];
extern uint32_t __ldm_data_start[];
extern uint32_t __ldm_data_end[];
extern void (* __init_array_start[])();
extern void (* __init_array_end[])();
extern uint32_t __firmware_start[];

extern void kernel_init();
extern void kernel_launch();

inline void l1_to_local_mem_copy(uint32_t *local_mem_addr, uint32_t tt_l1_ptr *l1_addr, int32_t len) {
// Cover L1 load latency of 6 cycles for the bulk of the copy
int32_t n = 0;
while (n < len - 5) {
uint32_t v0 = l1_addr[n + 0];
uint32_t v1 = l1_addr[n + 1];
uint32_t v2 = l1_addr[n + 2];
uint32_t v3 = l1_addr[n + 3];
uint32_t v4 = l1_addr[n + 4];
uint32_t v5 = l1_addr[n + 5];
local_mem_addr[n + 0] = v0;
local_mem_addr[n + 1] = v1;
local_mem_addr[n + 2] = v2;
local_mem_addr[n + 3] = v3;
local_mem_addr[n + 4] = v4;
local_mem_addr[n + 5] = v5;
n += 6;
}
// Could optimize this further (eg, loop of 2 or 4), probably not worth it
while (n < len) {
local_mem_addr[n] = l1_addr[n];
n++;
}
}

inline void firmware_kernel_common_init(void *init_local_l1_base) {

// Handle stuff typically done in crt0 in asm. Easier to do in C
wzerorange(__ldm_bss_start, __ldm_bss_end);

int32_t num_words = ((uint)__ldm_data_end - (uint)__ldm_data_start) >> 2;
uint32_t offset = (uint32_t)__ldm_data_start - MEM_LOCAL_BASE;
l1_to_local_mem_copy((uint32_t *)__ldm_data_start, (uint32_t *)((uint8_t *)init_local_l1_base + offset), num_words);

for (void (** fptr)() = __init_array_start; fptr < __init_array_end; fptr++) {
(**fptr)();
}
}
217 changes: 217 additions & 0 deletions tt_metal/hw/ckernels/grayskull/common/inc/ckernel_template.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,4 +237,221 @@ class ckernel_unpack_template
void program_and_run(volatile uint *instrn_buffer, const uint8_t count, const uint32_t zmask = 0); // calls program, then run
};

ckernel_template::ckernel_template(uint outer_loop_len, uint inner_loop_len, uint loop_op)
: m_outer_loop_len(outer_loop_len)
, m_inner_loop_len(inner_loop_len)
, m_loop_op0(loop_op)
, m_loop_op1(TT_OP_NOP)
, m_end_op0(TT_OP_NOP)
, m_end_op1(TT_OP_NOP)
, m_start_op0(TT_OP_NOP)
{
m_loop0_last_instr = loop_op;
m_loop1_last_instr = loop_op;
}

ckernel_template::ckernel_template(uint outer_loop_len, uint inner_loop_len, uint loop_op0, uint loop_op1)
: m_outer_loop_len(outer_loop_len)
, m_inner_loop_len(inner_loop_len)
, m_loop_op0(loop_op0)
, m_loop_op1(loop_op1)
, m_end_op0(TT_OP_NOP)
, m_end_op1(TT_OP_NOP)
, m_start_op0(TT_OP_NOP)
{
m_loop0_last_instr = loop_op1;
m_loop1_last_instr = loop_op1;
}

void ckernel_template::set_loop_op0(uint loop_op)
{
m_loop_op0 = loop_op;
}

void ckernel_template::set_loop_op1(uint loop_op)
{
m_loop_op1 = loop_op;
}

void ckernel_template::set_end_ops(uint end_op0, uint end_op1)
{
m_end_op0 = end_op0;
m_end_op1 = end_op1;
}

void ckernel_template::set_end_op(uint end_op0)
{
set_end_ops(end_op0, TT_OP_NOP);
}

void ckernel_template::set_start_op(uint start_op0)
{
m_start_op0 = start_op0;
}

void ckernel_template::set_last_inner_loop_instr(uint op)
{
m_loop1_last_instr = op;
}

void ckernel_template::set_last_outer_loop_instr(uint op)
{
m_loop0_last_instr = op;
}

void ckernel_template::program_and_run(volatile uint *instrn_buffer)
{
program(instrn_buffer);
run(instrn_buffer);
}

void ckernel_template::run(volatile uint *instrn_buffer)
{
TTI_MOP(1, 0, 0); // run the double-loop template
}

void ckernel_template::program(volatile uint *instrn_buffer)
{
volatile uint *mop_cfg = reinterpret_cast<volatile uint *>(TENSIX_MOP_CFG_BASE);

mop_sync(); // wait until previous mops have completed

mop_cfg[0] = m_outer_loop_len;
mop_cfg[1] = m_inner_loop_len;
mop_cfg[2] = m_start_op0;
mop_cfg[3] = m_end_op0;
mop_cfg[4] = m_end_op1;
mop_cfg[5] = m_loop_op0;
mop_cfg[6] = m_loop_op1;
mop_cfg[7] = m_loop0_last_instr;
mop_cfg[8] = m_loop1_last_instr;
}

void ckernel_unpack_template::program_and_run(volatile uint *instrn_buffer, const uint8_t count, const uint32_t zmask)
{
program(instrn_buffer);
run(instrn_buffer, count, zmask);
}

void ckernel_unpack_template::run(volatile uint *instrn_buffer, const uint8_t count, const uint32_t zmask)
{
FWASSERT("Unpack template only supports loops up to 128", count <= 128);
TT_MOP_CFG(zmask >> 16); // Set the top 16 bits of zmask - we could skip this for count <= 16
TT_MOP(0, count - 1, zmask & 0xFFFF); // Run the template
}

// Version without zmask, should be slightly faster by eliminating one instruction.
void ckernel_unpack_template::run(volatile uint *instrn_buffer, const uint8_t count)
{
FWASSERT("Unpack template only supports loops up to 128", count <= 128);
TT_MOP(0, count - 1, 0); // Run the template
}

void ckernel_unpack_template::program(volatile uint *instrn_buffer) const
{
volatile uint *mop_cfg = reinterpret_cast<volatile uint *>(TENSIX_MOP_CFG_BASE);

mop_sync(); // wait until previous mops have completed

mop_cfg[1] = m_unpackB | (m_unpack_halo << 1);
mop_cfg[2] = m_B_instr;
mop_cfg[3] = m_A0_instr;
mop_cfg[4] = m_A1_instr;
mop_cfg[5] = m_A2_instr;
mop_cfg[6] = m_A3_instr;
mop_cfg[7] = m_skipA_instr;
mop_cfg[8] = m_skipB_instr;
}

ckernel_unpack_template ckernel_unpack_template::lA(uint A_instr, uint skipA_instr)
{
return ckernel_unpack_template(false, // src B
false, // halo
A_instr, 0, 0, 0, skipA_instr, 0, 0);
}

ckernel_unpack_template ckernel_unpack_template::lB(uint B_instr, uint skipB_instr)
{
return ckernel_unpack_template(false, // src B
false, // halo
B_instr, 0, 0, 0, skipB_instr, 0, 0);
}

ckernel_unpack_template ckernel_unpack_template::lzA(bool neginf, uint A_instr, uint skipA_instr)
{
return ckernel_unpack_template(false, // src B
true, // halo
neginf ? DEF_NINFSRCA : DEF_ZEROSRCA, A_instr, DEF_UNPACR_NOP, DEF_UNPACR_NOP, skipA_instr, 0, 0);
}

ckernel_unpack_template ckernel_unpack_template::lhA(const uint32_t halo_mask)
{
// Figure out which unpack is last
const uint last_mask = (halo_mask == 0x1) ? 0x1 : (halo_mask <= 0x3) ? 0x2 : (halo_mask <= 0x7) ? 0x4 : 0;

return ckernel_unpack_template(false, // src B
true, // halo
((halo_mask >> 0) & 0x1) ? ((last_mask >> 0) & 0x1) ? DEF_A0_last_instr : DEF_A0_instr : SKIP_A0_instr,
((halo_mask >> 1) & 0x1) ? ((last_mask >> 1) & 0x1) ? DEF_A1_last_instr : DEF_A1_instr : SKIP_A1_instr,
((halo_mask >> 2) & 0x1) ? ((last_mask >> 2) & 0x1) ? DEF_A2_last_instr : DEF_A2_instr : SKIP_A2_instr,
((halo_mask >> 3) & 0x1) ? DEF_A3_instr : SKIP_A3_instr, DEF_SKIP_A, 0, 0);
}

ckernel_unpack_template ckernel_unpack_template::flhA(const uint32_t halo_mask)
{
// Figure out which unpack is last
const uint last_mask = (halo_mask == 0x1) ? 0x1 : (halo_mask <= 0x3) ? 0x2 : (halo_mask <= 0x7) ? 0x4 : 0;

return ckernel_unpack_template(false, // src B
true, // halo
((halo_mask >> 0) & 0x1) ? ((last_mask >> 0) & 0x1) ? DEF_A0_fconv_last_instr : DEF_A0_fconv_instr : SKIP_A0_instr,
((halo_mask >> 1) & 0x1) ? ((last_mask >> 1) & 0x1) ? DEF_A1_fconv_last_instr : DEF_A1_fconv_instr : SKIP_A1_instr,
((halo_mask >> 2) & 0x1) ? ((last_mask >> 2) & 0x1) ? DEF_A2_fconv_last_instr : DEF_A2_fconv_instr : SKIP_A2_instr,
((halo_mask >> 3) & 0x1) ? DEF_A3_fconv_instr : SKIP_A3_instr, TT_OP_NOP, 0, 0);
}

ckernel_unpack_template ckernel_unpack_template::lBhA(const uint32_t halo_mask, const bool rarefy)
{
// Figure out which unpack is last
const uint last_mask = (halo_mask == 0x1) ? 0x1 : (halo_mask <= 0x3) ? 0x2 : (halo_mask <= 0x7) ? 0x4 : 0;

return ckernel_unpack_template(true, // src B
true, // halo
((halo_mask >> 0) & 0x1) ? ((last_mask >> 0) & 0x1) ? DEF_A0_last_instr : DEF_A0_instr : SKIP_A0_instr,
((halo_mask >> 1) & 0x1) ? ((last_mask >> 1) & 0x1) ? DEF_A1_last_instr : DEF_A1_instr : SKIP_A1_instr,
((halo_mask >> 2) & 0x1) ? ((last_mask >> 2) & 0x1) ? DEF_A2_last_instr : DEF_A2_instr : SKIP_A2_instr,
((halo_mask >> 3) & 0x1) ? DEF_A3_instr : SKIP_A3_instr, DEF_SKIP_A, rarefy ? DEF_B_rarefy_cntx_ovrd_instr : DEF_B_cntx_ovrd_instr, DEF_SKIP_B);
}

ckernel_unpack_template ckernel_unpack_template::flBhA(const uint32_t halo_mask)
{
// Figure out which unpack is last
const uint last_mask = (halo_mask == 0x1) ? 0x1 : (halo_mask <= 0x3) ? 0x2 : (halo_mask <= 0x7) ? 0x4 : 0;

return ckernel_unpack_template(true, // src B
true, // halo
((halo_mask >> 0) & 0x1) ? ((last_mask >> 0) & 0x1) ? DEF_A0_fconv_last_instr : DEF_A0_fconv_instr : SKIP_A0_instr,
((halo_mask >> 1) & 0x1) ? ((last_mask >> 1) & 0x1) ? DEF_A1_fconv_last_instr : DEF_A1_fconv_instr : SKIP_A1_instr,
((halo_mask >> 2) & 0x1) ? ((last_mask >> 2) & 0x1) ? DEF_A2_fconv_last_instr : DEF_A2_fconv_instr : SKIP_A2_instr,
((halo_mask >> 3) & 0x1) ? DEF_A3_fconv_instr : SKIP_A3_instr, TT_OP_NOP, DEF_B_cntx_ovrd_no_z_inc_instr, DEF_SKIP_B);
}

ckernel_unpack_template ckernel_unpack_template::lBA(uint A_instr, uint skipA_instr,

uint B_instr, uint skipB_instr)
{
return ckernel_unpack_template(true, // src B
false, // halo
A_instr, 0, 0, 0, skipA_instr, B_instr, skipB_instr);
}

ckernel_unpack_template ckernel_unpack_template::loopx1instr(uint instr0, uint skip0){
return ckernel_unpack_template::lA(instr0, skip0);
}

ckernel_unpack_template ckernel_unpack_template::loopx2instr(uint instr0, uint instr1, uint skip0, uint skip1){
// Note - 2 instr loop so we will hijack B_instr slot for 2nd instruction via lBA.
return ckernel_unpack_template::lBA(instr0, skip0, instr1, skip1);
}

} // namespace ckernel
10 changes: 0 additions & 10 deletions tt_metal/hw/ckernels/grayskull/common/inc/cpack_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,14 +416,4 @@ namespace ckernel::packer
{
dest_offset_id = 0;
}

inline uint32_t get_output_id(uint32_t output)
{
return ((output) - OUTPUT_BASE);
}

inline constexpr uint32_t get_output_base_id()
{
return (OUTPUT_BASE_ID);
}
}
5 changes: 0 additions & 5 deletions tt_metal/hw/ckernels/grayskull/common/inc/cunpack_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -335,9 +335,4 @@ namespace ckernel::unpacker
// Clear context ID
//reset_config_context();
}

inline uint32_t get_operand_id(uint32_t operand)
{
return operand;
}
}
Loading

0 comments on commit 364fb1d

Please sign in to comment.