Skip to content

Commit

Permalink
Revert "Ncvetkovic/0 bh llk test coverage dst acc" (#13784)
Browse files Browse the repository at this point in the history
Revert "Ncvetkovic/0 bh llk test coverage dst acc (#13293)"

This reverts commit e58dd71.
  • Loading branch information
TT-billteng authored Oct 14, 2024
1 parent 4f9574d commit 7b95079
Show file tree
Hide file tree
Showing 10 changed files with 764 additions and 948 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include "compute_kernel_api/eltwise_unary/eltwise_unary.h"
#include "compute_kernel_api.h"

#define START_IN_TILE_ID (0)
#define START_DST_TILE_ID (0)
namespace NAMESPACE {
void MAIN {

Expand All @@ -31,9 +33,9 @@ void MAIN {
cb_reserve_back(out_cb_id, num_single_transfer);

// Copy num_single_transfer tiles from in_cb to DEST
copy_block_matmul_partials(in_cb_id, 0, 0, num_single_transfer);
copy_block_matmul_partials(in_cb_id, START_IN_TILE_ID, START_DST_TILE_ID, num_single_transfer);
// Pack num_single_transfer tiles to out_cb
matmul_pack_tile(0, out_cb_id, num_single_transfer);
matmul_pack_tile(START_DST_TILE_ID, out_cb_id, num_single_transfer);

// Release DEST reg marking compute/pack complete
release_dst();
Expand Down
21 changes: 8 additions & 13 deletions tests/tt_metal/tt_metal/test_kernels/compute/reconfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@
//
// SPDX-License-Identifier: Apache-2.0

#include <cstdint>

#include "compute_kernel_api/eltwise_binary.h"
#include <cstdint>
#include "compute_kernel_api/eltwise_unary/sfpu_split_includes.h"
#include "compute_kernel_api/tile_move_copy.h"
#include "compute_kernel_api/pack.h"
#include "compute_kernel_api/reconfig_data_format.h"

#define START_IN_TILE_ID (0)
#define START_DST_TILE_ID (0)

namespace NAMESPACE {
void MAIN {
uint32_t num_tiles = get_arg_val<uint32_t>(0);
Expand All @@ -18,7 +20,7 @@ void MAIN {
constexpr auto cb_in0 = tt::CB::c_in0; // Bfp8_b
constexpr auto cb_in1 = tt::CB::c_in1; // Bfp16_b
constexpr auto cb_in2 = tt::CB::c_in2; // Bfp16_b
constexpr auto cb_out0 = tt::CB::c_out0; // Fp32
constexpr auto cb_out0 = tt::CB::c_out0; // Bfp16_b
constexpr auto cb_out1 = tt::CB::c_out1; // Bfp8_b


Expand All @@ -39,17 +41,10 @@ void MAIN {
// data inside CB_0, 2nd one inits it to Bfp16_b
// which is inside CB_2
copy_tile_init();
// This call will test copy_tile_to_dst_init_short as well
copy_tile_to_dst_init_short_with_dt(cb_in0, cb_in2);

cb_wait_front(cb_in2, ublock_size_tiles);
#if (BLOCK_COPY == 1)
for (uint32_t u_cnt = 0; u_cnt < ublock_size_tiles; u_cnt++) {
copy_tile(cb_in2, 0, 0);
}
#elif (BLOCK_COPY == 0)
copy_block_matmul_partials(cb_in2, 0, 0, ublock_size_tiles);
#endif
copy_block_matmul_partials(cb_in2, START_IN_TILE_ID, START_DST_TILE_ID, ublock_size_tiles);
cb_pop_front(cb_in2, ublock_size_tiles);

// -------------------- Addition with acc -----------------------------
Expand Down Expand Up @@ -89,7 +84,7 @@ void MAIN {
pack_reconfig_l1_acc(true);
#endif
// Configured already for CB_16, Bfp16_b
matmul_pack_tile(0, cb_out0, ublock_size_tiles);
matmul_pack_tile(START_DST_TILE_ID, cb_out0, ublock_size_tiles);
// Reconfig for CB_17, Bfp8_b, then pack to CB_17
#if (EXPLICIT_RECONFIG == 1)
// Indices for old_output, new_output
Expand All @@ -101,7 +96,7 @@ void MAIN {
// Not testing for L1 accumulation
pack_reconfig_l1_acc(false);

matmul_pack_tile(0, cb_out1, ublock_size_tiles);
matmul_pack_tile(START_DST_TILE_ID, cb_out1, ublock_size_tiles);
release_dst();

cb_pop_front(cb_in0, ublock_size_tiles);
Expand Down
43 changes: 6 additions & 37 deletions tests/tt_metal/tt_metal/test_kernels/compute/reduce_h.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,53 +4,22 @@

#include <cstdint>

#include "compute_kernel_api/reduce.h"

/* This dummy initialization function is called prior to reduce_init to ensure proper
* initialization of the HW and to test reduce_init_short/reduce_init_delta calls.
*
* - If SHORT_INIT is defined, this function provides API calls
* which initialize the HW properly when supplemented with reduce_init_short or
* reduce_init_delta (note that these two inits are the same except for the "at_start"
* argument; reference reduce.h for more details).
* - If SHORT_INIT is not defined, only the PACK configuration function is called with
* a negative value of the defined "at_start" template argument because full reduce_init
* provides other API calls.
*
* If "at_start = 1", the value that is passed to llk_pack_reduce_config_v2 is 0.
* If "at_start = 0", the value that is passed to llk_pack_reduce_config_v2 is 1.
*
* After dummy_init is called, the proper reduce init call will be invoked with the defined
* value of the argument, not the negated value. This will ensure that the "at_start"
* argument is tested. Reference llk_pack_reduce_config_v2 for more details.
*/
template<bool at_start, PoolType reduce_type = REDUCE_OP, ReduceDim reduce_dim = REDUCE_DIM>
ALWI void dummy_init(uint32_t icb = 0, uint32_t icb_scaler = 1, uint32_t ocb = 16)
{
#ifdef SHORT_INIT
UNPACK(( llk_unpack_AB_hw_configure_disaggregated<DST_ACCUM_MODE>(icb, icb_scaler) ));

MATH(( llk_math_pack_sync_init<DST_ACCUM_MODE>() ));
MATH(( llk_math_hw_configure_disaggregated() ));
#include "debug/dprint.h"

PACK(( llk_pack_init() ));
PACK(( llk_pack_dest_init<false, DST_ACCUM_MODE>() ));
#endif
PACK(( llk_pack_reduce_config_v2<reduce_dim, !at_start, false, DST_ACCUM_MODE>(ocb) ));
}
#include "compute_kernel_api/reduce.h"
#include "compute_kernel_api/eltwise_binary.h"

namespace NAMESPACE {
void MAIN {

constexpr uint32_t Ht = get_compile_time_arg_val(0);
constexpr uint32_t Wt = get_compile_time_arg_val(1);
constexpr uint32_t NC = get_compile_time_arg_val(2);
constexpr bool at_start = get_compile_time_arg_val(3);
dummy_init<at_start>(tt::CB::c_in0, tt::CB::c_in2);
#ifndef SHORT_INIT
reduce_init<at_start>(tt::CB::c_in0, tt::CB::c_in2);
reduce_init<true>(tt::CB::c_in0, tt::CB::c_in2);
#else
reduce_init_delta<at_start>(tt::CB::c_out0, tt::CB::c_in0, tt::CB::c_in2);
binary_op_init_common(tt::CB::c_in0, tt::CB::c_in2, tt::CB::c_out0);
reduce_init_delta<false>(tt::CB::c_out0, tt::CB::c_in0, tt::CB::c_in2);
#endif

cb_wait_front(tt::CB::c_in2, 1); // scaler tile from the reader
Expand Down
43 changes: 6 additions & 37 deletions tests/tt_metal/tt_metal/test_kernels/compute/reduce_hw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,53 +4,22 @@

#include <cstdint>

#include "compute_kernel_api/reduce.h"

/* This dummy initialization function is called prior to reduce_init to ensure proper
* initialization of the HW and to test reduce_init_short/reduce_init_delta calls.
*
* - If SHORT_INIT is defined, this function provides API calls
* which initialize the HW properly when supplemented with reduce_init_short or
* reduce_init_delta (note that these two inits are the same except for the "at_start"
* argument; reference reduce.h for more details).
* - If SHORT_INIT is not defined, only the PACK configuration function is called with
* a negative value of the defined "at_start" template argument because full reduce_init
* provides other API calls.
*
* If "at_start = 1", the value that is passed to llk_pack_reduce_config_v2 is 0.
* If "at_start = 0", the value that is passed to llk_pack_reduce_config_v2 is 1.
*
* After dummy_init is called, the proper reduce init call will be invoked with the defined
* value of the argument, not the negated value. This will ensure that the "at_start"
* argument is tested. Reference llk_pack_reduce_config_v2 for more details.
*/
template<bool at_start, PoolType reduce_type = REDUCE_OP, ReduceDim reduce_dim = REDUCE_DIM>
ALWI void dummy_init(uint32_t icb = 0, uint32_t icb_scaler = 1, uint32_t ocb = 16)
{
#ifdef SHORT_INIT
UNPACK(( llk_unpack_AB_hw_configure_disaggregated<DST_ACCUM_MODE>(icb, icb_scaler) ));

MATH(( llk_math_pack_sync_init<DST_ACCUM_MODE>() ));
MATH(( llk_math_hw_configure_disaggregated() ));
#include "debug/dprint.h"

PACK(( llk_pack_init() ));
PACK(( llk_pack_dest_init<false, DST_ACCUM_MODE>() ));
#endif
PACK(( llk_pack_reduce_config_v2<reduce_dim, !at_start, false, DST_ACCUM_MODE>(ocb) ));
}
#include "compute_kernel_api/reduce.h"
#include "compute_kernel_api/eltwise_binary.h"

namespace NAMESPACE {
void MAIN {

constexpr uint32_t Ht = get_compile_time_arg_val(0);
constexpr uint32_t Wt = get_compile_time_arg_val(1);
constexpr uint32_t NC = get_compile_time_arg_val(2);
constexpr bool at_start = get_compile_time_arg_val(3);
dummy_init<at_start>(tt::CB::c_in0, tt::CB::c_in2);
#ifndef SHORT_INIT
reduce_init<at_start>(tt::CB::c_in0, tt::CB::c_in2);
reduce_init<true>(tt::CB::c_in0, tt::CB::c_in2);
#else
reduce_init_delta<at_start>(tt::CB::c_out0, tt::CB::c_in0, tt::CB::c_in2);
binary_op_init_common(tt::CB::c_in0, tt::CB::c_in2, tt::CB::c_out0);
reduce_init_delta<false>(tt::CB::c_out0, tt::CB::c_in0, tt::CB::c_in2);
#endif

cb_wait_front(tt::CB::c_in2, 1); // scaler tile from the reader
Expand Down
43 changes: 6 additions & 37 deletions tests/tt_metal/tt_metal/test_kernels/compute/reduce_w.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,53 +4,22 @@

#include <cstdint>

#include "compute_kernel_api/reduce.h"

/* This dummy initialization function is called prior to reduce_init to ensure proper
* initialization of the HW and to test reduce_init_short/reduce_init_delta calls.
*
* - If SHORT_INIT is defined, this function provides API calls
* which initialize the HW properly when supplemented with reduce_init_short or
* reduce_init_delta (note that these two inits are the same except for the "at_start"
* argument; reference reduce.h for more details).
* - If SHORT_INIT is not defined, only the PACK configuration function is called with
* a negative value of the defined "at_start" template argument because full reduce_init
* provides other API calls.
*
* If "at_start = 1", the value that is passed to llk_pack_reduce_config_v2 is 0.
* If "at_start = 0", the value that is passed to llk_pack_reduce_config_v2 is 1.
*
* After dummy_init is called, the proper reduce init call will be invoked with the defined
* value of the argument, not the negated value. This will ensure that the "at_start"
* argument is tested. Reference llk_pack_reduce_config_v2 for more details.
*/
template<bool at_start, PoolType reduce_type = REDUCE_OP, ReduceDim reduce_dim = REDUCE_DIM>
ALWI void dummy_init(uint32_t icb = 0, uint32_t icb_scaler = 1, uint32_t ocb = 16)
{
#ifdef SHORT_INIT
UNPACK(( llk_unpack_AB_hw_configure_disaggregated<DST_ACCUM_MODE>(icb, icb_scaler) ));

MATH(( llk_math_pack_sync_init<DST_ACCUM_MODE>() ));
MATH(( llk_math_hw_configure_disaggregated() ));
#include "debug/dprint.h"

PACK(( llk_pack_init() ));
PACK(( llk_pack_dest_init<false, DST_ACCUM_MODE>() ));
#endif
PACK(( llk_pack_reduce_config_v2<reduce_dim, !at_start, false, DST_ACCUM_MODE>(ocb) ));
}
#include "compute_kernel_api/reduce.h"
#include "compute_kernel_api/eltwise_binary.h"

namespace NAMESPACE {
void MAIN {

constexpr uint32_t Ht = get_compile_time_arg_val(0);
constexpr uint32_t Wt = get_compile_time_arg_val(1);
constexpr uint32_t NC = get_compile_time_arg_val(2);
constexpr bool at_start = get_compile_time_arg_val(3);
dummy_init<at_start>(tt::CB::c_in0, tt::CB::c_in2);
#ifndef SHORT_INIT
reduce_init<at_start>(tt::CB::c_in0, tt::CB::c_in2);
reduce_init<true>(tt::CB::c_in0, tt::CB::c_in2);
#else
reduce_init_delta<at_start>(tt::CB::c_out0, tt::CB::c_in0, tt::CB::c_in2);
binary_op_init_common(tt::CB::c_in0, tt::CB::c_in2, tt::CB::c_out0);
reduce_init_delta<false>(tt::CB::c_out0, tt::CB::c_in0, tt::CB::c_in2);
#endif

cb_wait_front(tt::CB::c_in2, 1); // scaler tile from the reader
Expand Down
Loading

0 comments on commit 7b95079

Please sign in to comment.