Skip to content

Commit

Permalink
#14119: Make CoreRangeSet accept/store CoreRanges as a vector interna…
Browse files Browse the repository at this point in the history
…lly to support user specified ordering

Split core_coord.h into implementation and header file
  • Loading branch information
tt-aho committed Oct 22, 2024
1 parent b6e8600 commit 1478321
Show file tree
Hide file tree
Showing 51 changed files with 815 additions and 651 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,7 @@ int main(int argc, char **argv) {
uint32_t num_cores = num_banks; // number of DRAM banks
// uint32_t num_banks_all = 12;

CoreRangeSet all_cores = CoreRangeSet{{}};
CoreRangeSet all_cores;
std::vector<CoreCoord> all_cores_list;
if (device->arch() == tt::ARCH::WORMHOLE_B0) {
get_dram_reader_core_coords_wormhole_b0(device, all_cores, all_cores_list);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -815,9 +815,9 @@ int main(int argc, char **argv) {
uint32_t num_tiles = static_cast<uint32_t>((input_size + single_tile_size - 1) / single_tile_size);
uint32_t num_cores = num_banks; // number of DRAM banks

CoreRangeSet all_dram_reader_cores = CoreRangeSet{{}};
CoreRangeSet all_dram_reader_cores;
std::vector<CoreCoord> all_dram_reader_cores_ordered;
CoreRangeSet all_l1_receiver_cores = CoreRangeSet{{}};
CoreRangeSet all_l1_receiver_cores;
std::vector<CoreCoord> all_l1_writer_cores_ordered;
if (device->arch() == tt::ARCH::BLACKHOLE) {
get_dram_reader_core_coords_blackhole(device, all_dram_reader_cores, all_dram_reader_cores_ordered);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ int main(int argc, char **argv) {
{"MCAST_NOC_END_ADDR_Y", std::to_string(mcast_noc_addr_end_y)}
};
if (!page_size_as_runtime_arg_g) {
defines.insert(pair<string, string>("PAGE_SIZE", std::to_string(page_size_g)));
defines.insert(std::pair<string, string>("PAGE_SIZE", std::to_string(page_size_g)));
}

tt_metal::CircularBufferConfig cb_config = tt_metal::CircularBufferConfig(page_size_g * page_count_g, {{0, tt::DataFormat::Float32}})
Expand Down
2 changes: 1 addition & 1 deletion tests/tt_metal/tt_metal/test_core_range_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ int main(int argc, char **argv) {
tt_metal::Program program = tt_metal::CreateProgram();
CoreRange core_range_one({0, 0}, {1, 1});
CoreRange core_range_two({2, 2}, {3, 3});
CoreRangeSet core_ranges = CoreRangeSet({core_range_one, core_range_two});
CoreRangeSet core_ranges = CoreRangeSet(std::vector{core_range_one, core_range_two});

pass &= test_program_specified_with_core_range_set(device, program, core_ranges);

Expand Down
2 changes: 1 addition & 1 deletion tests/tt_metal/tt_metal/test_multi_core_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ bool test_multi_core_kernel_unique_runtime_args(tt_metal::Device *device) {
CoreRange core_group({0, 1}, {1, 1});
CoreRange single_core({1, 0}, {1, 0});
CoreRange all_cores(start_core, end_core);
CoreRangeSet core_blocks = CoreRangeSet({start_core_range, single_core, core_group});
CoreRangeSet core_blocks = CoreRangeSet(std::vector{start_core_range, single_core, core_group});

uint32_t single_tile_size = 2 * 1024;
int32_t num_tiles = 2048;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ TEST_F(DeviceFixture, CreateMultipleSemaphoresOnSameCore) {

CoreRange core_range({1, 0}, {3, 0});
CoreRangeSet core_range_set({core_range});
CoreRangeSet core_range_set2 = core_range_set.merge({core1});
CoreRangeSet core_range_set2 = core_range_set.merge(std::set<CoreRange>{core1});
std::set<CoreRange> set_of_cores({CoreRange({2,0}, {2,0}), CoreRange({3,0}, {3,0}), CoreRange({5,0}, {5,0})});
CoreRangeSet core_range_set3(set_of_cores);
CoreRangeSet core_range_set4({CoreRange({5,0}, {6,0})});
Expand Down
14 changes: 7 additions & 7 deletions tests/tt_metal/tt_metal/unit_tests/basic/runtime_args.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ TEST_F(DeviceFixture, LegallyModifyRTArgsDataMovement) {
// First run the program with the initial runtime args
CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1));
CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5));
CoreRangeSet core_range_set({first_core_range, second_core_range});
CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range});
auto program = unit_tests::runtime_args::initialize_program_data_movement_rta(this->devices_.at(id), core_range_set, 2);
ASSERT_TRUE(program.num_kernels() == 1);
std::vector<uint32_t> initial_runtime_args = {101, 202};
Expand Down Expand Up @@ -219,7 +219,7 @@ TEST_F(DeviceFixture, LegallyModifyRTArgsCompute) {
// First run the program with the initial runtime args
CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1));
CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5));
CoreRangeSet core_range_set({first_core_range, second_core_range});
CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range});
std::vector<uint32_t> initial_runtime_args = {101, 202};
std::vector<uint32_t> common_runtime_args = {11, 22, 33, 44};
auto program = unit_tests::runtime_args::initialize_program_compute(this->devices_.at(id), core_range_set, initial_runtime_args.size(), common_runtime_args.size());
Expand Down Expand Up @@ -249,7 +249,7 @@ TEST_F(DeviceFixture, SetRuntimeArgsSubsetOfCoresCompute) {
// First run the program with the initial runtime args
CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1));
CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5));
CoreRangeSet core_range_set({first_core_range, second_core_range});
CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range});
std::vector<uint32_t> initial_runtime_args = {101, 202};
std::vector<uint32_t> common_runtime_args = {11, 22, 33, 44};

Expand Down Expand Up @@ -277,7 +277,7 @@ TEST_F(DeviceFixture, SetRuntimeArgsUniqueValuesCompute) {
// First run the program with the initial runtime args
CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1));
CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5));
CoreRangeSet core_range_set({first_core_range, second_core_range});
CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range});
std::vector<uint32_t> common_runtime_args = {11, 22, 33, 44};
auto program = unit_tests::runtime_args::initialize_program_compute(this->devices_.at(id), core_range_set, 2, common_runtime_args.size());

Expand Down Expand Up @@ -311,7 +311,7 @@ TEST_F(DeviceFixture, SetRuntimeArgsVaryingLengthPerCore) {
// First run the program with the initial runtime args
CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1));
CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5));
CoreRangeSet core_range_set({first_core_range, second_core_range});
CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range});
std::vector<uint32_t> common_runtime_args = {11, 22, 33, 44};

// Figure out max number of unique runtime args across all cores, so kernel
Expand Down Expand Up @@ -359,7 +359,7 @@ TEST_F(DeviceFixture, SetRuntimeArgsVaryingLengthPerCore) {
TEST_F(DeviceFixture, IllegalTooManyRuntimeArgs) {
for (unsigned int id = 0; id < num_devices_; id++) {
CoreRange first_core_range(CoreCoord(1, 1), CoreCoord(2, 2));
CoreRangeSet core_range_set({first_core_range});
CoreRangeSet core_range_set(first_core_range);
auto program = unit_tests::runtime_args::initialize_program_compute(this->devices_.at(id), core_range_set, 0, 0); // Kernel isn't run here.

// Set 100 unique args, then try to set 300 common args and fail.
Expand All @@ -381,7 +381,7 @@ TEST_F(DeviceFixture, IllegallyModifyRTArgs) {
// First run the program with the initial runtime args
CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1));
CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5));
CoreRangeSet core_range_set({first_core_range, second_core_range});
CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range});
auto program = unit_tests::runtime_args::initialize_program_data_movement_rta(this->devices_.at(id), core_range_set, 2);
ASSERT_TRUE(program.num_kernels() == 1);
std::vector<uint32_t> initial_runtime_args = {101, 202};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ struct SfpuConfig {
size_t tile_byte_size = 0;
tt::DataFormat l1_input_data_format = tt::DataFormat::Invalid;
tt::DataFormat l1_output_data_format = tt::DataFormat::Invalid;
CoreRangeSet cores = {{}};
CoreRangeSet cores = CoreRangeSet();
std::string sfpu_op = "";
bool approx_mode = true;
};
Expand Down Expand Up @@ -398,7 +398,7 @@ TEST_F(DeviceFixture, DISABLED_AllCoreSingleTileSfpuApproxCompute) {
.tile_byte_size = 2 * 32 * 32,
.l1_input_data_format = tt::DataFormat::Float16_b,
.l1_output_data_format = tt::DataFormat::Float16_b,
.cores = {{}},
.cores = CoreRangeSet(),
.approx_mode = true};

auto arch = this->arch_;
Expand Down Expand Up @@ -437,7 +437,7 @@ TEST_F(DeviceFixture, DISABLED_AllCoreMultiTileSfpuApproxCompute) {
.tile_byte_size = 2 * 32 * 32,
.l1_input_data_format = tt::DataFormat::Float16_b,
.l1_output_data_format = tt::DataFormat::Float16_b,
.cores = {{}},
.cores = CoreRangeSet(),
.approx_mode = true};

auto arch = this->arch_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,18 @@ namespace basic_tests::CoreRangeSet{

TEST_F(CoreCoordHarness, TestCoreRangeSetValidConstruct)
{
EXPECT_NO_THROW ( ::CoreRangeSet({this->sc1, this->cr2}));
EXPECT_NO_THROW ( ::CoreRangeSet({this->cr1, this->cr2}) );
EXPECT_NO_THROW(::CoreRangeSet(std::vector{this->sc1, this->cr2}));
EXPECT_NO_THROW(::CoreRangeSet(std::vector{this->cr1, this->cr2}));

::CoreRangeSet valid_ranges = ::CoreRangeSet({this->cr1, this->cr2});
::CoreRangeSet valid_ranges = ::CoreRangeSet(std::vector{this->cr1, this->cr2});
EXPECT_EQ(valid_ranges.ranges().size(), 2);
}

TEST_F(CoreCoordHarness, TestCoreRangeSetInvalidConstruct)
{
::CoreRange overlapping_range({1, 2}, {3, 3});
EXPECT_ANY_THROW( ::CoreRangeSet({this->cr1, this->cr2, overlapping_range}) );
EXPECT_ANY_THROW( ::CoreRangeSet({this->sc1, this->cr1}) );
EXPECT_ANY_THROW(::CoreRangeSet(std::vector{this->cr1, this->cr2, overlapping_range}));
EXPECT_ANY_THROW(::CoreRangeSet(std::vector{this->sc1, this->cr1}));
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,19 @@ namespace basic_tests::CoreRangeSet{

TEST_F(CoreCoordHarness, TestCoreRangeSetMergeNoSolution)
{
EXPECT_EQ ( ::CoreRangeSet({sc1}).merge({sc3}).ranges() , std::set<::CoreRange>( {sc1,sc3}) );
EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({cr2}).ranges() , std::set<::CoreRange>( {cr1,cr2}) );
EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({cr1,cr2}).ranges() , std::set<::CoreRange>( {cr1,cr2}) );
EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({cr2}).merge({cr3}).ranges() , std::set<::CoreRange>( {cr1,cr2,cr3}) );
EXPECT_EQ(::CoreRangeSet(sc1).merge(std::set{sc3}).ranges(), std::set<::CoreRange>({sc1, sc3}));
EXPECT_EQ(::CoreRangeSet(cr1).merge(std::set{cr2}).ranges(), std::set<::CoreRange>({cr1, cr2}));
EXPECT_EQ(::CoreRangeSet(cr1).merge(std::set{cr1, cr2}).ranges(), std::set<::CoreRange>({cr1, cr2}));
EXPECT_EQ(
::CoreRangeSet(cr1).merge(std::set{cr2}).merge(std::set{cr3}).ranges(), std::set<::CoreRange>({cr1, cr2, cr3}));
}

TEST_F(CoreCoordHarness, TestCoreRangeSetMergeCoreCoord)
{
::CoreRangeSet empty_crs({});
EXPECT_EQ ( empty_crs.merge({this->sc1}).ranges().size(), 1);
EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({sc3, sc4}).ranges() , std::set<::CoreRange>( {cr16}) );
EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({sc3}).merge({sc4}).ranges() , std::set<::CoreRange>( {cr16}) );
::CoreRangeSet empty_crs;
EXPECT_EQ(empty_crs.merge(std::set{this->sc1}).ranges().size(), 1);
EXPECT_EQ(::CoreRangeSet(cr1).merge(std::set{sc3, sc4}).ranges(), std::set<::CoreRange>({cr16}));
EXPECT_EQ(::CoreRangeSet(cr1).merge(std::set{sc3}).merge(std::set{sc4}).ranges(), std::set<::CoreRange>({cr16}));
CoreRange rect ( {0,0}, {4,2});
std::set<CoreRange> rect_pts;
for (unsigned y = rect.start_coord.y; y <= rect.end_coord.y; y++){
Expand All @@ -38,18 +39,22 @@ TEST_F(CoreCoordHarness, TestCoreRangeSetMergeCoreCoord)
EXPECT_EQ ( empty_crs.merge(rect_pts).ranges(), std::set<::CoreRange>( {rect, CoreRange( {2,3}, {3,5} ) } ));

// "H", sub-optimal currently, should be reduced down to 3 CRs instead of 5
EXPECT_EQ ( empty_crs.merge( { CoreRange { {0,0}, {1,5} }, CoreRange { {3,0}, {4,5}}, CoreRange { {0,2} , {4,3} } } ).ranges(),
EXPECT_EQ ( empty_crs.merge( std::vector{ CoreRange { {0,0}, {1,5} }, CoreRange { {3,0}, {4,5}}, CoreRange { {0,2} , {4,3} } } ).ranges(),
std::set<::CoreRange>( { CoreRange { {0,0}, {1,1} }, CoreRange { {0,2}, {4,3}}, CoreRange{ {0,4}, {1,5}},
CoreRange { {3,0}, {4,1} }, CoreRange{ {3,4}, {4,5} } } ));
}

TEST_F(CoreCoordHarness, TestCoreRangeSetMergeCoreRange)
{
EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({cr1}).ranges() , std::set<::CoreRange>( {cr1}) );
EXPECT_EQ ( ::CoreRangeSet({cr7}).merge({cr6}).merge({cr4}).ranges() , std::set<::CoreRange>( {cr8} ) );
EXPECT_EQ ( ::CoreRangeSet({cr8}).merge({cr7}).merge({cr6}).merge({cr4}).ranges() , std::set<::CoreRange>( {cr8} ) );
EXPECT_EQ ( ::CoreRangeSet({cr1, cr2, cr3}).merge({cr4}).ranges() , std::set<::CoreRange>( {cr4}) );
EXPECT_EQ ( ::CoreRangeSet({cr1, cr2}).merge({cr4}).merge({cr6}).ranges() , std::set<::CoreRange>( {cr6}) );
EXPECT_EQ(::CoreRangeSet(cr1).merge(std::set{cr1}).ranges(), std::set<::CoreRange>({cr1}));
EXPECT_EQ(::CoreRangeSet(cr7).merge(std::set{cr6}).merge(std::set{cr4}).ranges(), std::set<::CoreRange>({cr8}));
EXPECT_EQ(
::CoreRangeSet(cr8).merge(std::set{cr7}).merge(std::set{cr6}).merge(std::set{cr4}).ranges(),
std::set<::CoreRange>({cr8}));
EXPECT_EQ(::CoreRangeSet(std::vector{cr1, cr2, cr3}).merge(std::set{cr4}).ranges(), std::set<::CoreRange>({cr4}));
EXPECT_EQ(
::CoreRangeSet(std::vector{cr1, cr2}).merge(std::set{cr4}).merge(std::set{cr6}).ranges(),
std::set<::CoreRange>({cr6}));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ struct TestBufferConfig {
tt::tt_metal::BufferType buftype;
};

inline pair<tt::tt_metal::Buffer, std::vector<uint32_t>> EnqueueWriteBuffer_prior_to_wrap(tt::tt_metal::Device* device, tt::tt_metal::CommandQueue& cq, const TestBufferConfig& config) {
inline std::pair<tt::tt_metal::Buffer, std::vector<uint32_t>> EnqueueWriteBuffer_prior_to_wrap(tt::tt_metal::Device* device, tt::tt_metal::CommandQueue& cq, const TestBufferConfig& config) {
// This function just enqueues a buffer (which should be large in the config)
// write as a precursor to testing the wrap mechanism
size_t buf_size = config.num_pages * config.page_size;
Expand Down
Loading

0 comments on commit 1478321

Please sign in to comment.