Skip to content

Commit

Permalink
#0: Delete dead code from work_split.hpp
Browse files Browse the repository at this point in the history
  • Loading branch information
yan-zaretskiy committed May 29, 2024
1 parent 9ec72ea commit 44751ac
Show file tree
Hide file tree
Showing 25 changed files with 122 additions and 289 deletions.
53 changes: 0 additions & 53 deletions tests/tt_metal/tt_metal/perf_microbenchmark/common/work_split.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,59 +19,6 @@
namespace tt {
namespace tt_metal {

// splits the tiles evenly between num_cores,
// with option of padding where necessary
struct TilesSplit {
int num_cores_;
int total_tiles_;
int tpc_; // unclipped tiles per core

inline TilesSplit(int num_cores, int total_tiles) : num_cores_(num_cores), total_tiles_(total_tiles) {
tpc_ = div_up(total_tiles_, num_cores_);
}

// number of tiles per core for div_up split
inline uint32_t get_tpc() const { return tpc_; }

// number of tiles per core for close to even split with multiples of 8 going
// to each core
inline uint32_t get_clipped_tpc(int icore) const {
auto result = (tpc_ * (icore + 1) > total_tiles_) ? (total_tiles_ - tpc_ * (icore + 1)) : tpc_;
return result;
}
};

struct CoreGridDesc {
uint32_t x_, y_;
CoreGridDesc(Device *dev) {
auto gs = dev->compute_with_storage_grid_size();
x_ = gs.x;
y_ = gs.y;
TT_ASSERT(x_ > 0 && y_ > 0);
}
uint32_t total_cores() const { return x_ * y_; }
CoreCoord wrap_core(int icore) const {
TT_ASSERT(icore < total_cores());
CoreCoord core = {(std::size_t)icore % x_, (std::size_t)icore / x_};
return core;
}

int numcores_dividing_numtiles(int num_tiles, int block_size = 1) {
// since we will be splitting num_tiles into num_cores we need to find
// num_cores such that num_tiles % num_cores = 0, so that it's evenly
// divided since we don't support leftovers at the moment
// TODO(AP): optimize if needed, O(max_cores) atm
uint32_t max_cores = total_cores();
TT_ASSERT(max_cores % block_size == 0 || max_cores == 1);
if (max_cores > num_tiles)
max_cores = num_tiles;
for (int j = max_cores; j >= 1; j--)
if (num_tiles % j == 0)
return j;
return 1;
}
};

// Given a number of tiles and number of cores available
// Set the largest number of cores less than the number of tiles
// Returns the number of cores as well as the number of tiles per core
Expand Down
8 changes: 3 additions & 5 deletions tt_eager/tt_dnn/op_library/moreh_adam/moreh_adam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,13 @@ operation::ProgramWithCallbacks moreh_adam_(
// Device Setup
////////////////////////////////////////////////////////////////////////////
tt_metal::Device *device = param.device();

tt_metal::CoreGridDesc core_grid(device);
const auto num_cores_y = core_grid.y_;
CoreCoord core_grid_coord(core_grid.x_, num_cores_y);
auto grid = device->compute_with_storage_grid_size();
const auto num_cores_y = grid.y;

// auto compute_with_storage_grid_size = device->compute_with_storage_grid_size();
// uint32_t num_cores_x = compute_with_storage_grid_size.x;
// uint32_t num_cores_y = compute_with_storage_grid_size.y;
auto [num_cores, all_cores, core_group_1, core_group_2, num_tiles_per_core_group_1, num_tiles_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_tiles);
auto [num_cores, all_cores, core_group_1, core_group_2, num_tiles_per_core_group_1, num_tiles_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_tiles);

////////////////////////////////////////////////////////////////////////////
// CircularBuffer Setup
Expand Down
8 changes: 3 additions & 5 deletions tt_eager/tt_dnn/op_library/moreh_adamw/moreh_adamw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,13 @@ operation::ProgramWithCallbacks moreh_adamw_(
// Device Setup
////////////////////////////////////////////////////////////////////////////
tt_metal::Device *device = param.device();

tt_metal::CoreGridDesc core_grid(device);
const auto num_cores_y = core_grid.y_;
CoreCoord core_grid_coord(core_grid.x_, num_cores_y);
auto grid = device->compute_with_storage_grid_size();
const auto num_cores_y = grid.y;

// auto compute_with_storage_grid_size = device->compute_with_storage_grid_size();
// uint32_t num_cores_x = compute_with_storage_grid_size.x;
// uint32_t num_cores_y = compute_with_storage_grid_size.y;
auto [num_cores, all_cores, core_group_1, core_group_2, num_tiles_per_core_group_1, num_tiles_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_tiles);
auto [num_cores, all_cores, core_group_1, core_group_2, num_tiles_per_core_group_1, num_tiles_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_tiles);

////////////////////////////////////////////////////////////////////////////
// CircularBuffer Setup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,15 @@ operation::ProgramWithCallbacks moreh_clip_grad_norm_step1_impl(
////////////////////////////////////////////////////////////////////////////
// Core Setup
////////////////////////////////////////////////////////////////////////////
tt_metal::CoreGridDesc core_grid(device);
const auto num_cores_y = core_grid.y_;
CoreCoord core_grid_coord = {core_grid.x_, num_cores_y};
auto grid = device->compute_with_storage_grid_size();
const auto num_cores_y = grid.y;
const auto
[num_cores_to_be_used,
all_cores,
core_group_1,
core_group_2,
num_inputs_per_core_group_1,
num_inputs_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_inputs);
num_inputs_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_inputs);
TT_ASSERT(core_group_2.ranges().empty());
TT_ASSERT(num_inputs_per_core_group_1 == 1);
TT_ASSERT(num_inputs_per_core_group_2 == 0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,16 @@ operation::ProgramWithCallbacks moreh_clip_grad_norm_step3_impl(
////////////////////////////////////////////////////////////////////////////
// Core Setup
////////////////////////////////////////////////////////////////////////////
tt_metal::CoreGridDesc core_grid(device);
const auto num_cores_y = core_grid.y_;
CoreCoord core_grid_coord = {core_grid.x_, num_cores_y};
auto grid = device->compute_with_storage_grid_size();
const auto num_cores_y = grid.y;

const auto
[num_cores_to_be_used,
all_cores,
core_group_1,
core_group_2,
num_inputs_per_core_group_1,
num_inputs_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_inputs);
num_inputs_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_inputs);
TT_ASSERT(core_group_2.ranges().empty());
TT_ASSERT(num_inputs_per_core_group_1 == 1);
TT_ASSERT(num_inputs_per_core_group_2 == 0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,8 @@ operation::ProgramWithCallbacks moreh_cumsum_nc(
////////////////////////////////////////////////////////////////////////////
// Core Setup
////////////////////////////////////////////////////////////////////////////
CoreGridDesc core_grid(device);
const auto num_cores_y = core_grid.y_;
CoreCoord core_grid_coord = {core_grid.x_, num_cores_y};
auto grid = device->compute_with_storage_grid_size();
const auto num_cores_y = grid.y;

const uint32_t in0_t = 2; // input
const uint32_t in1_t = 1; // zero
Expand All @@ -71,7 +70,7 @@ operation::ProgramWithCallbacks moreh_cumsum_nc(
core_group_1,
core_group_2,
num_cols_per_core_group_1,
num_cols_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_tiles_per_chip);
num_cols_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_tiles_per_chip);

////////////////////////////////////////////////////////////////////////////
// CircularBuffer Setup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,17 +101,16 @@ operation::ProgramWithCallbacks moreh_groupnorm_impl(
////////////////////////////////////////////////////////////////////////////
// Core Setup
////////////////////////////////////////////////////////////////////////////
tt_metal::CoreGridDesc core_grid(device);
const auto num_cores_y = core_grid.y_;
CoreCoord core_grid_coord(core_grid.x_, num_cores_y);
auto grid = device->compute_with_storage_grid_size();
const auto num_cores_y = grid.y;

const auto
[num_cores_to_be_used,
all_cores,
core_group_1,
core_group_2,
num_rows_per_core_group_1,
num_rows_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_rows);
num_rows_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_rows);

log_debug(LogTest, fmt::format("num_cores_to_be_used: {}", num_cores_to_be_used).c_str());
log_debug(LogTest, fmt::format("num_rows_per_core_group_1: {}", num_rows_per_core_group_1).c_str());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,17 +75,16 @@ operation::ProgramWithCallbacks moreh_groupnorm_backward_gamma_beta_grad_impl(
////////////////////////////////////////////////////////////////////////////
// Core Setup
////////////////////////////////////////////////////////////////////////////
tt_metal::CoreGridDesc core_grid(device);
const auto num_cores_y = core_grid.y_;
CoreCoord core_grid_coord(core_grid.x_, num_cores_y);
auto grid = device->compute_with_storage_grid_size();
const auto num_cores_y = grid.y;

const auto
[num_cores_to_be_used,
all_cores,
core_group_1,
core_group_2,
num_channels_per_core_group_1,
num_channels_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_channels);
num_channels_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_channels);

log_debug(LogTest, fmt::format("num_cores_to_be_used: {}", num_cores_to_be_used).c_str());
log_debug(LogTest, fmt::format("num_channels_per_core_group_1: {}", num_channels_per_core_group_1).c_str());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,16 @@ operation::ProgramWithCallbacks moreh_groupnorm_backward_input_grad_impl(
////////////////////////////////////////////////////////////////////////////
// Core Setup
////////////////////////////////////////////////////////////////////////////
tt_metal::CoreGridDesc core_grid(device);
const auto num_cores_y = core_grid.y_;
CoreCoord core_grid_coord(core_grid.x_, num_cores_y);
auto grid = device->compute_with_storage_grid_size();
const auto num_cores_y = grid.y;

const auto
[num_cores_to_be_used,
all_cores,
core_group_1,
core_group_2,
num_rows_per_core_group_1,
num_rows_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_rows);
num_rows_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_rows);

log_debug(LogTest, fmt::format("num_cores_to_be_used: {}", num_cores_to_be_used).c_str());
log_debug(LogTest, fmt::format("num_rows_per_core_group_1: {}", num_rows_per_core_group_1).c_str());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,8 @@ operation::ProgramWithCallbacks moreh_layernorm_impl(
// Core Setup
////////////////////////////////////////////////////////////////////////////
const auto NCHt = N * C * Ht;
tt_metal::CoreGridDesc core_grid(device);
const auto num_cores_y = core_grid.y_;
CoreCoord core_grid_coord = {core_grid.x_, num_cores_y};
auto grid = device->compute_with_storage_grid_size();
const auto num_cores_y = grid.y;

// core_group_2 works more.
// If number of working cores is 108 and NCHt is 110,
Expand All @@ -140,7 +139,7 @@ operation::ProgramWithCallbacks moreh_layernorm_impl(
core_group_1,
core_group_2,
num_rows_per_core_group_1,
num_rows_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, NCHt);
num_rows_per_core_group_2] = tt_metal::split_work_to_cores(grid, NCHt);

////////////////////////////////////////////////////////////////////////////
// CircularBuffer Setup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,17 +95,16 @@ operation::ProgramWithCallbacks moreh_layernorm_backward_gamma_beta_grad_impl(
////////////////////////////////////////////////////////////////////////////
// Core Setup
////////////////////////////////////////////////////////////////////////////
tt_metal::CoreGridDesc core_grid(device);
const auto num_cores_y = core_grid.y_;
CoreCoord core_grid_coord = {core_grid.x_, num_cores_y};
auto grid = device->compute_with_storage_grid_size();
const auto num_cores_y = grid.y;

const auto
[num_cores_to_be_used,
all_cores,
core_group_1,
core_group_2,
num_cols_per_core_group_1,
num_cols_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, Wt);
num_cols_per_core_group_2] = tt_metal::split_work_to_cores(grid, Wt);

////////////////////////////////////////////////////////////////////////////
// CircularBuffer Setup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,17 +106,16 @@ operation::ProgramWithCallbacks moreh_layernorm_backward_input_grad_impl(
////////////////////////////////////////////////////////////////////////////
// Core Setup
////////////////////////////////////////////////////////////////////////////
tt_metal::CoreGridDesc core_grid(device);
const auto num_cores_y = core_grid.y_;
CoreCoord core_grid_coord = {core_grid.x_, num_cores_y};
auto grid = device->compute_with_storage_grid_size();
const auto num_cores_y = grid.y;

const auto
[num_cores_to_be_used,
all_cores,
core_group_1,
core_group_2,
num_rows_per_core_group_1,
num_rows_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, NCHt);
num_rows_per_core_group_2] = tt_metal::split_work_to_cores(grid, NCHt);

////////////////////////////////////////////////////////////////////////////
// CircularBuffer Setup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,16 @@ operation::ProgramWithCallbacks moreh_bias_backward_multi_core_h(const Tensor &o
////////////////////////////////////////////////////////////////////////////
// This should allocate a DRAM buffer on the device
Device *device = output_grad.device();
CoreGridDesc core_grid(device);
const auto num_cores_y = core_grid.y_;
CoreCoord core_grid_coord(core_grid.x_, num_cores_y);
auto grid = device->compute_with_storage_grid_size();
const auto num_cores_y = grid.y;

const auto
[num_cores_to_be_used,
all_cores,
core_group_1,
core_group_2,
num_cols_per_core_group_1,
num_cols_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, Wt);
num_cols_per_core_group_2] = tt_metal::split_work_to_cores(grid, Wt);

////////////////////////////////////////////////////////////////////////////
// CircularBuffer Setup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,16 +174,16 @@ operation::ProgramWithCallbacks moreh_matmul_multi_core(
////////////////////////////////////////////////////////////////////////////
// Core Grid Configuration For Workload
////////////////////////////////////////////////////////////////////////////
CoreGridDesc core_grid(device);
const auto num_cores_y {core_grid.y_};
CoreCoord core_grid_coord = {core_grid.x_, num_cores_y};
auto grid = device->compute_with_storage_grid_size();
const auto num_cores_y = grid.y;

const auto
[num_cores,
all_cores,
core_group_1,
core_group_2,
num_output_tiles_per_core_group_1,
num_output_tiles_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_output_tiles);
num_output_tiles_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_output_tiles);

log_debug(LogOp, "{}:{} num_output_tiles: {}", __func__, __LINE__, num_output_tiles);
log_debug(LogOp, "{}:{} num_output_tiles_per_core_group1: {}, 2: {} ", __func__, __LINE__, num_output_tiles_per_core_group_1, num_output_tiles_per_core_group_2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,8 @@ operation::ProgramWithCallbacks moreh_mean_nc(const Tensor &input, const Tensor
////////////////////////////////////////////////////////////////////////////
// Core Setup
////////////////////////////////////////////////////////////////////////////
CoreGridDesc core_grid(device);
const auto num_cores_y = core_grid.y_;
CoreCoord core_grid_coord(core_grid.x_, num_cores_y);
auto grid = device->compute_with_storage_grid_size();
const auto num_cores_y = grid.y;

const uint32_t in0_t = 2; // input
const uint32_t in1_t = 1; // zero
Expand All @@ -71,7 +70,7 @@ operation::ProgramWithCallbacks moreh_mean_nc(const Tensor &input, const Tensor
core_group_1,
core_group_2,
num_cols_per_core_group_1,
num_cols_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_output_tiles);
num_cols_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_output_tiles);

////////////////////////////////////////////////////////////////////////////
// CircularBuffer Setup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,8 @@ operation::ProgramWithCallbacks moreh_mean_backward_program(const Tensor &output
////////////////////////////////////////////////////////////////////////////
// Core Setup
////////////////////////////////////////////////////////////////////////////
CoreGridDesc core_grid(device);
const auto num_cores_y = core_grid.y_;
CoreCoord core_grid_coord(core_grid.x_, num_cores_y);
auto grid = device->compute_with_storage_grid_size();
const auto num_cores_y = grid.y;

const uint32_t in0_t = 2; // input
const uint32_t in1_t = 1; // zero
Expand All @@ -78,7 +77,7 @@ operation::ProgramWithCallbacks moreh_mean_backward_program(const Tensor &output
core_group_1,
core_group_2,
num_cols_per_core_group_1,
num_cols_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, num_input_grad_tiles);
num_cols_per_core_group_2] = tt_metal::split_work_to_cores(grid, num_input_grad_tiles);

////////////////////////////////////////////////////////////////////////////
// CircularBuffer Setup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,16 @@ operation::ProgramWithCallbacks moreh_norm_h_impl(const Tensor &input, float p,
////////////////////////////////////////////////////////////////////////////
// Core Setup
////////////////////////////////////////////////////////////////////////////
tt_metal::CoreGridDesc core_grid(device);
const auto num_cores_y = core_grid.y_;
CoreCoord core_grid_coord(core_grid.x_, num_cores_y);
auto grid = device->compute_with_storage_grid_size();
const auto num_cores_y = grid.y;

const auto
[num_cores_to_be_used,
all_cores,
core_group_1,
core_group_2,
num_cols_per_core_group_1,
num_cols_per_core_group_2] = tt_metal::split_work_to_cores(core_grid_coord, N * C * Wt);
num_cols_per_core_group_2] = tt_metal::split_work_to_cores(grid, N * C * Wt);

////////////////////////////////////////////////////////////////////////////
// CircularBuffer Setup
Expand Down
Loading

0 comments on commit 44751ac

Please sign in to comment.