diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/8_dram_adjacent_core_read/test_dram_read.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/8_dram_adjacent_core_read/test_dram_read.cpp index facfd0ab019..773380ebee9 100644 --- a/tests/tt_metal/tt_metal/perf_microbenchmark/8_dram_adjacent_core_read/test_dram_read.cpp +++ b/tests/tt_metal/tt_metal/perf_microbenchmark/8_dram_adjacent_core_read/test_dram_read.cpp @@ -632,7 +632,7 @@ int main(int argc, char **argv) { uint32_t num_cores = num_banks; // number of DRAM banks // uint32_t num_banks_all = 12; - CoreRangeSet all_cores = CoreRangeSet{{}}; + CoreRangeSet all_cores; std::vector all_cores_list; if (device->arch() == tt::ARCH::WORMHOLE_B0) { get_dram_reader_core_coords_wormhole_b0(device, all_cores, all_cores_list); diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/9_dram_adjacent_read_remote_l1_write/test_dram_read_l1_write.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/9_dram_adjacent_read_remote_l1_write/test_dram_read_l1_write.cpp index a3d62706327..814b28abe02 100644 --- a/tests/tt_metal/tt_metal/perf_microbenchmark/9_dram_adjacent_read_remote_l1_write/test_dram_read_l1_write.cpp +++ b/tests/tt_metal/tt_metal/perf_microbenchmark/9_dram_adjacent_read_remote_l1_write/test_dram_read_l1_write.cpp @@ -815,9 +815,9 @@ int main(int argc, char **argv) { uint32_t num_tiles = static_cast((input_size + single_tile_size - 1) / single_tile_size); uint32_t num_cores = num_banks; // number of DRAM banks - CoreRangeSet all_dram_reader_cores = CoreRangeSet{{}}; + CoreRangeSet all_dram_reader_cores; std::vector all_dram_reader_cores_ordered; - CoreRangeSet all_l1_receiver_cores = CoreRangeSet{{}}; + CoreRangeSet all_l1_receiver_cores; std::vector all_l1_writer_cores_ordered; if (device->arch() == tt::ARCH::BLACKHOLE) { get_dram_reader_core_coords_blackhole(device, all_dram_reader_cores, all_dram_reader_cores_ordered); diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_bw_and_latency.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_bw_and_latency.cpp index 1128d5d7809..85b15175215 100644 --- a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_bw_and_latency.cpp +++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_bw_and_latency.cpp @@ -263,7 +263,7 @@ int main(int argc, char **argv) { {"MCAST_NOC_END_ADDR_Y", std::to_string(mcast_noc_addr_end_y)} }; if (!page_size_as_runtime_arg_g) { - defines.insert(pair("PAGE_SIZE", std::to_string(page_size_g))); + defines.insert(std::pair("PAGE_SIZE", std::to_string(page_size_g))); } tt_metal::CircularBufferConfig cb_config = tt_metal::CircularBufferConfig(page_size_g * page_count_g, {{0, tt::DataFormat::Float32}}) diff --git a/tests/tt_metal/tt_metal/test_core_range_set.cpp b/tests/tt_metal/tt_metal/test_core_range_set.cpp index 0c6cbb21ff0..d40d2516128 100644 --- a/tests/tt_metal/tt_metal/test_core_range_set.cpp +++ b/tests/tt_metal/tt_metal/test_core_range_set.cpp @@ -223,7 +223,7 @@ int main(int argc, char **argv) { tt_metal::Program program = tt_metal::CreateProgram(); CoreRange core_range_one({0, 0}, {1, 1}); CoreRange core_range_two({2, 2}, {3, 3}); - CoreRangeSet core_ranges = CoreRangeSet({core_range_one, core_range_two}); + CoreRangeSet core_ranges = CoreRangeSet(std::vector{core_range_one, core_range_two}); pass &= test_program_specified_with_core_range_set(device, program, core_ranges); diff --git a/tests/tt_metal/tt_metal/test_multi_core_kernel.cpp b/tests/tt_metal/tt_metal/test_multi_core_kernel.cpp index 601ce80f696..d8b5c73f90c 100644 --- a/tests/tt_metal/tt_metal/test_multi_core_kernel.cpp +++ b/tests/tt_metal/tt_metal/test_multi_core_kernel.cpp @@ -251,7 +251,7 @@ bool test_multi_core_kernel_unique_runtime_args(tt_metal::Device *device) { CoreRange core_group({0, 1}, {1, 1}); CoreRange single_core({1, 0}, {1, 0}); CoreRange all_cores(start_core, end_core); - CoreRangeSet core_blocks = CoreRangeSet({start_core_range, single_core, core_group}); + CoreRangeSet core_blocks = CoreRangeSet(std::vector{start_core_range, single_core, core_group}); uint32_t single_tile_size = 2 * 1024; int32_t num_tiles = 2048; diff --git a/tests/tt_metal/tt_metal/unit_tests/basic/initialize_semaphores.cpp b/tests/tt_metal/tt_metal/unit_tests/basic/initialize_semaphores.cpp index 0865725e13f..d43254f7c37 100644 --- a/tests/tt_metal/tt_metal/unit_tests/basic/initialize_semaphores.cpp +++ b/tests/tt_metal/tt_metal/unit_tests/basic/initialize_semaphores.cpp @@ -132,7 +132,7 @@ TEST_F(DeviceFixture, CreateMultipleSemaphoresOnSameCore) { CoreRange core_range({1, 0}, {3, 0}); CoreRangeSet core_range_set({core_range}); - CoreRangeSet core_range_set2 = core_range_set.merge({core1}); + CoreRangeSet core_range_set2 = core_range_set.merge(std::set{core1}); std::set set_of_cores({CoreRange({2,0}, {2,0}), CoreRange({3,0}, {3,0}), CoreRange({5,0}, {5,0})}); CoreRangeSet core_range_set3(set_of_cores); CoreRangeSet core_range_set4({CoreRange({5,0}, {6,0})}); diff --git a/tests/tt_metal/tt_metal/unit_tests/basic/runtime_args.cpp b/tests/tt_metal/tt_metal/unit_tests/basic/runtime_args.cpp index ec54d27d5d6..520d04986d2 100644 --- a/tests/tt_metal/tt_metal/unit_tests/basic/runtime_args.cpp +++ b/tests/tt_metal/tt_metal/unit_tests/basic/runtime_args.cpp @@ -173,7 +173,7 @@ TEST_F(DeviceFixture, LegallyModifyRTArgsDataMovement) { // First run the program with the initial runtime args CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1)); CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5)); - CoreRangeSet core_range_set({first_core_range, second_core_range}); + CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range}); auto program = unit_tests::runtime_args::initialize_program_data_movement_rta(this->devices_.at(id), core_range_set, 2); ASSERT_TRUE(program.num_kernels() == 1); std::vector initial_runtime_args = {101, 202}; @@ -219,7 +219,7 @@ TEST_F(DeviceFixture, LegallyModifyRTArgsCompute) { // First run the program with the initial runtime args CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1)); CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5)); - CoreRangeSet core_range_set({first_core_range, second_core_range}); + CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range}); std::vector initial_runtime_args = {101, 202}; std::vector common_runtime_args = {11, 22, 33, 44}; auto program = unit_tests::runtime_args::initialize_program_compute(this->devices_.at(id), core_range_set, initial_runtime_args.size(), common_runtime_args.size()); @@ -249,7 +249,7 @@ TEST_F(DeviceFixture, SetRuntimeArgsSubsetOfCoresCompute) { // First run the program with the initial runtime args CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1)); CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5)); - CoreRangeSet core_range_set({first_core_range, second_core_range}); + CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range}); std::vector initial_runtime_args = {101, 202}; std::vector common_runtime_args = {11, 22, 33, 44}; @@ -277,7 +277,7 @@ TEST_F(DeviceFixture, SetRuntimeArgsUniqueValuesCompute) { // First run the program with the initial runtime args CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1)); CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5)); - CoreRangeSet core_range_set({first_core_range, second_core_range}); + CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range}); std::vector common_runtime_args = {11, 22, 33, 44}; auto program = unit_tests::runtime_args::initialize_program_compute(this->devices_.at(id), core_range_set, 2, common_runtime_args.size()); @@ -311,7 +311,7 @@ TEST_F(DeviceFixture, SetRuntimeArgsVaryingLengthPerCore) { // First run the program with the initial runtime args CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1)); CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5)); - CoreRangeSet core_range_set({first_core_range, second_core_range}); + CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range}); std::vector common_runtime_args = {11, 22, 33, 44}; // Figure out max number of unique runtime args across all cores, so kernel @@ -359,7 +359,7 @@ TEST_F(DeviceFixture, SetRuntimeArgsVaryingLengthPerCore) { TEST_F(DeviceFixture, IllegalTooManyRuntimeArgs) { for (unsigned int id = 0; id < num_devices_; id++) { CoreRange first_core_range(CoreCoord(1, 1), CoreCoord(2, 2)); - CoreRangeSet core_range_set({first_core_range}); + CoreRangeSet core_range_set(first_core_range); auto program = unit_tests::runtime_args::initialize_program_compute(this->devices_.at(id), core_range_set, 0, 0); // Kernel isn't run here. // Set 100 unique args, then try to set 300 common args and fail. @@ -381,7 +381,7 @@ TEST_F(DeviceFixture, IllegallyModifyRTArgs) { // First run the program with the initial runtime args CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1)); CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5)); - CoreRangeSet core_range_set({first_core_range, second_core_range}); + CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range}); auto program = unit_tests::runtime_args::initialize_program_data_movement_rta(this->devices_.at(id), core_range_set, 2); ASSERT_TRUE(program.num_kernels() == 1); std::vector initial_runtime_args = {101, 202}; diff --git a/tests/tt_metal/tt_metal/unit_tests/compute/test_sfpu_compute.cpp b/tests/tt_metal/tt_metal/unit_tests/compute/test_sfpu_compute.cpp index 532276b9e39..56a9dbc37e0 100644 --- a/tests/tt_metal/tt_metal/unit_tests/compute/test_sfpu_compute.cpp +++ b/tests/tt_metal/tt_metal/unit_tests/compute/test_sfpu_compute.cpp @@ -105,7 +105,7 @@ struct SfpuConfig { size_t tile_byte_size = 0; tt::DataFormat l1_input_data_format = tt::DataFormat::Invalid; tt::DataFormat l1_output_data_format = tt::DataFormat::Invalid; - CoreRangeSet cores = {{}}; + CoreRangeSet cores = CoreRangeSet(); std::string sfpu_op = ""; bool approx_mode = true; }; @@ -398,7 +398,7 @@ TEST_F(DeviceFixture, DISABLED_AllCoreSingleTileSfpuApproxCompute) { .tile_byte_size = 2 * 32 * 32, .l1_input_data_format = tt::DataFormat::Float16_b, .l1_output_data_format = tt::DataFormat::Float16_b, - .cores = {{}}, + .cores = CoreRangeSet(), .approx_mode = true}; auto arch = this->arch_; @@ -437,7 +437,7 @@ TEST_F(DeviceFixture, DISABLED_AllCoreMultiTileSfpuApproxCompute) { .tile_byte_size = 2 * 32 * 32, .l1_input_data_format = tt::DataFormat::Float16_b, .l1_output_data_format = tt::DataFormat::Float16_b, - .cores = {{}}, + .cores = CoreRangeSet(), .approx_mode = true}; auto arch = this->arch_; diff --git a/tests/tt_metal/tt_metal/unit_tests/core_coord/test_CoreRangeSet_construct.cpp b/tests/tt_metal/tt_metal/unit_tests/core_coord/test_CoreRangeSet_construct.cpp index c1ba749cab4..ffd63eb3842 100644 --- a/tests/tt_metal/tt_metal/unit_tests/core_coord/test_CoreRangeSet_construct.cpp +++ b/tests/tt_metal/tt_metal/unit_tests/core_coord/test_CoreRangeSet_construct.cpp @@ -10,18 +10,18 @@ namespace basic_tests::CoreRangeSet{ TEST_F(CoreCoordHarness, TestCoreRangeSetValidConstruct) { - EXPECT_NO_THROW ( ::CoreRangeSet({this->sc1, this->cr2})); - EXPECT_NO_THROW ( ::CoreRangeSet({this->cr1, this->cr2}) ); + EXPECT_NO_THROW(::CoreRangeSet(std::vector{this->sc1, this->cr2})); + EXPECT_NO_THROW(::CoreRangeSet(std::vector{this->cr1, this->cr2})); - ::CoreRangeSet valid_ranges = ::CoreRangeSet({this->cr1, this->cr2}); + ::CoreRangeSet valid_ranges = ::CoreRangeSet(std::vector{this->cr1, this->cr2}); EXPECT_EQ(valid_ranges.ranges().size(), 2); } TEST_F(CoreCoordHarness, TestCoreRangeSetInvalidConstruct) { ::CoreRange overlapping_range({1, 2}, {3, 3}); - EXPECT_ANY_THROW( ::CoreRangeSet({this->cr1, this->cr2, overlapping_range}) ); - EXPECT_ANY_THROW( ::CoreRangeSet({this->sc1, this->cr1}) ); + EXPECT_ANY_THROW(::CoreRangeSet(std::vector{this->cr1, this->cr2, overlapping_range})); + EXPECT_ANY_THROW(::CoreRangeSet(std::vector{this->sc1, this->cr1})); } diff --git a/tests/tt_metal/tt_metal/unit_tests/core_coord/test_CoreRangeSet_merge.cpp b/tests/tt_metal/tt_metal/unit_tests/core_coord/test_CoreRangeSet_merge.cpp index 32c0092741f..36822e744f3 100644 --- a/tests/tt_metal/tt_metal/unit_tests/core_coord/test_CoreRangeSet_merge.cpp +++ b/tests/tt_metal/tt_metal/unit_tests/core_coord/test_CoreRangeSet_merge.cpp @@ -12,18 +12,19 @@ namespace basic_tests::CoreRangeSet{ TEST_F(CoreCoordHarness, TestCoreRangeSetMergeNoSolution) { - EXPECT_EQ ( ::CoreRangeSet({sc1}).merge({sc3}).ranges() , std::set<::CoreRange>( {sc1,sc3}) ); - EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({cr2}).ranges() , std::set<::CoreRange>( {cr1,cr2}) ); - EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({cr1,cr2}).ranges() , std::set<::CoreRange>( {cr1,cr2}) ); - EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({cr2}).merge({cr3}).ranges() , std::set<::CoreRange>( {cr1,cr2,cr3}) ); + EXPECT_EQ(::CoreRangeSet(sc1).merge(std::set{sc3}).ranges(), std::set<::CoreRange>({sc1, sc3})); + EXPECT_EQ(::CoreRangeSet(cr1).merge(std::set{cr2}).ranges(), std::set<::CoreRange>({cr1, cr2})); + EXPECT_EQ(::CoreRangeSet(cr1).merge(std::set{cr1, cr2}).ranges(), std::set<::CoreRange>({cr1, cr2})); + EXPECT_EQ( + ::CoreRangeSet(cr1).merge(std::set{cr2}).merge(std::set{cr3}).ranges(), std::set<::CoreRange>({cr1, cr2, cr3})); } TEST_F(CoreCoordHarness, TestCoreRangeSetMergeCoreCoord) { - ::CoreRangeSet empty_crs({}); - EXPECT_EQ ( empty_crs.merge({this->sc1}).ranges().size(), 1); - EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({sc3, sc4}).ranges() , std::set<::CoreRange>( {cr16}) ); - EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({sc3}).merge({sc4}).ranges() , std::set<::CoreRange>( {cr16}) ); + ::CoreRangeSet empty_crs; + EXPECT_EQ(empty_crs.merge(std::set{this->sc1}).ranges().size(), 1); + EXPECT_EQ(::CoreRangeSet(cr1).merge(std::set{sc3, sc4}).ranges(), std::set<::CoreRange>({cr16})); + EXPECT_EQ(::CoreRangeSet(cr1).merge(std::set{sc3}).merge(std::set{sc4}).ranges(), std::set<::CoreRange>({cr16})); CoreRange rect ( {0,0}, {4,2}); std::set rect_pts; for (unsigned y = rect.start_coord.y; y <= rect.end_coord.y; y++){ @@ -38,18 +39,22 @@ TEST_F(CoreCoordHarness, TestCoreRangeSetMergeCoreCoord) EXPECT_EQ ( empty_crs.merge(rect_pts).ranges(), std::set<::CoreRange>( {rect, CoreRange( {2,3}, {3,5} ) } )); // "H", sub-optimal currently, should be reduced down to 3 CRs instead of 5 - EXPECT_EQ ( empty_crs.merge( { CoreRange { {0,0}, {1,5} }, CoreRange { {3,0}, {4,5}}, CoreRange { {0,2} , {4,3} } } ).ranges(), + EXPECT_EQ ( empty_crs.merge( std::vector{ CoreRange { {0,0}, {1,5} }, CoreRange { {3,0}, {4,5}}, CoreRange { {0,2} , {4,3} } } ).ranges(), std::set<::CoreRange>( { CoreRange { {0,0}, {1,1} }, CoreRange { {0,2}, {4,3}}, CoreRange{ {0,4}, {1,5}}, CoreRange { {3,0}, {4,1} }, CoreRange{ {3,4}, {4,5} } } )); } TEST_F(CoreCoordHarness, TestCoreRangeSetMergeCoreRange) { - EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({cr1}).ranges() , std::set<::CoreRange>( {cr1}) ); - EXPECT_EQ ( ::CoreRangeSet({cr7}).merge({cr6}).merge({cr4}).ranges() , std::set<::CoreRange>( {cr8} ) ); - EXPECT_EQ ( ::CoreRangeSet({cr8}).merge({cr7}).merge({cr6}).merge({cr4}).ranges() , std::set<::CoreRange>( {cr8} ) ); - EXPECT_EQ ( ::CoreRangeSet({cr1, cr2, cr3}).merge({cr4}).ranges() , std::set<::CoreRange>( {cr4}) ); - EXPECT_EQ ( ::CoreRangeSet({cr1, cr2}).merge({cr4}).merge({cr6}).ranges() , std::set<::CoreRange>( {cr6}) ); + EXPECT_EQ(::CoreRangeSet(cr1).merge(std::set{cr1}).ranges(), std::set<::CoreRange>({cr1})); + EXPECT_EQ(::CoreRangeSet(cr7).merge(std::set{cr6}).merge(std::set{cr4}).ranges(), std::set<::CoreRange>({cr8})); + EXPECT_EQ( + ::CoreRangeSet(cr8).merge(std::set{cr7}).merge(std::set{cr6}).merge(std::set{cr4}).ranges(), + std::set<::CoreRange>({cr8})); + EXPECT_EQ(::CoreRangeSet(std::vector{cr1, cr2, cr3}).merge(std::set{cr4}).ranges(), std::set<::CoreRange>({cr4})); + EXPECT_EQ( + ::CoreRangeSet(std::vector{cr1, cr2}).merge(std::set{cr4}).merge(std::set{cr6}).ranges(), + std::set<::CoreRange>({cr6})); } } diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/command_queue_test_utils.hpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/command_queue_test_utils.hpp index 2223519bc63..3b1a12c88ba 100644 --- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/command_queue_test_utils.hpp +++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/command_queue_test_utils.hpp @@ -12,7 +12,7 @@ struct TestBufferConfig { tt::tt_metal::BufferType buftype; }; -inline pair> EnqueueWriteBuffer_prior_to_wrap(tt::tt_metal::Device* device, tt::tt_metal::CommandQueue& cq, const TestBufferConfig& config) { +inline std::pair> EnqueueWriteBuffer_prior_to_wrap(tt::tt_metal::Device* device, tt::tt_metal::CommandQueue& cq, const TestBufferConfig& config) { // This function just enqueues a buffer (which should be large in the config) // write as a precursor to testing the wrap mechanism size_t buf_size = config.num_pages * config.page_size; diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp index 3194e16e35c..461f07c2825 100644 --- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp +++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp @@ -980,7 +980,7 @@ TEST_F(CommandQueueSingleCardFixture, TestAllCbConfigsCorrectlySentMultipleCoreR CoreCoord worker_grid_size = device->compute_with_storage_grid_size(); CoreRange cr1({worker_grid_size.x - 2, worker_grid_size.y - 2}, {worker_grid_size.x - 1, worker_grid_size.y - 1}); - CoreRangeSet core_ranges({cr0, cr1}); + CoreRangeSet core_ranges(std::vector{cr0, cr1}); DummyProgramMultiCBConfig config = {.cr_set = core_ranges, .cb_config_vector = cb_config_vector}; @@ -1001,7 +1001,7 @@ TEST_F(CommandQueueSingleCardFixture, TestAllCbConfigsCorrectlySentUpdateSizeMul CoreCoord worker_grid_size = device->compute_with_storage_grid_size(); CoreRange cr1({worker_grid_size.x - 2, worker_grid_size.y - 2}, {worker_grid_size.x - 1, worker_grid_size.y - 1}); - CoreRangeSet core_ranges({cr0, cr1}); + CoreRangeSet core_ranges(std::vector{cr0, cr1}); DummyProgramMultiCBConfig config = {.cr_set = core_ranges, .cb_config_vector = cb_config_vector}; @@ -1023,7 +1023,7 @@ TEST_F(CommandQueueSingleCardFixture, TestMultiCbConfigsCorrectlySentUpdateSizeM CoreCoord worker_grid_size = device->compute_with_storage_grid_size(); CoreRange cr1({worker_grid_size.x - 2, worker_grid_size.y - 2}, {worker_grid_size.x - 1, worker_grid_size.y - 1}); - CoreRangeSet core_ranges({cr0, cr1}); + CoreRangeSet core_ranges(std::vector{cr0, cr1}); DummyProgramMultiCBConfig config = {.cr_set = core_ranges, .cb_config_vector = cb_config_vector}; @@ -1036,7 +1036,7 @@ TEST_F(CommandQueueSingleCardFixture, TestAllSemConfigsCorrectlySentMultiCore) { CoreCoord worker_grid_size = device->compute_with_storage_grid_size(); CoreRange cr({0, 0}, {worker_grid_size.x - 1, worker_grid_size.y - 1}); - CoreRangeSet cr_set({cr}); + CoreRangeSet cr_set(cr); DummyProgramConfig config = {.cr_set = cr_set, .num_sems = NUM_SEMAPHORES}; @@ -1052,7 +1052,7 @@ TEST_F(CommandQueueSingleCardFixture, TestAllSemaphoreConfigsCorrectlySentMultip CoreCoord worker_grid_size = device->compute_with_storage_grid_size(); CoreRange second_cr({worker_grid_size.x - 2, worker_grid_size.y - 2}, {worker_grid_size.x - 1, worker_grid_size.y - 1}); - CoreRangeSet cr_set({first_cr, second_cr}); + CoreRangeSet cr_set(std::vector{first_cr, second_cr}); Program program; DummyProgramConfig config = {.cr_set = cr_set, .num_sems = NUM_SEMAPHORES}; @@ -1089,7 +1089,7 @@ TEST_F(CommandQueueSingleCardFixture, TestAllRuntimeArgsCorrectlySentMultiCore) CoreCoord worker_grid_size = device->compute_with_storage_grid_size(); CoreRange cr({0, 0}, {worker_grid_size.x - 1, worker_grid_size.y - 1}); - CoreRangeSet cr_set({cr}); + CoreRangeSet cr_set(cr); DummyProgramConfig dummy_program_config = {.cr_set = cr_set}; EXPECT_TRUE(local_test_functions::test_dummy_EnqueueProgram_with_runtime_args(device, device->command_queue(), dummy_program_config, 13, 17, 19, 1)); @@ -1101,7 +1101,7 @@ TEST_F(CommandQueueSingleCardFixture, TestAllRuntimeArgsCorrectlySentMultiCore_2 CoreCoord worker_grid_size = device->compute_with_storage_grid_size(); CoreRange cr({0, 0}, {worker_grid_size.x - 1, worker_grid_size.y - 1}); - CoreRangeSet cr_set({cr}); + CoreRangeSet cr_set(cr); DummyProgramConfig dummy_program_config = {.cr_set = cr_set}; EXPECT_TRUE(local_test_functions::test_dummy_EnqueueProgram_with_runtime_args(device, device->command_queue(), dummy_program_config, 255, 255, 255, 1)); @@ -1114,7 +1114,7 @@ TEST_F(CommandQueueSingleCardFixture, TestSendRuntimeArgsMultiCoreRange) { CoreRange cr0({0, 0}, {worker_grid_size.x - 1, 3}); CoreRange cr1({0, 4}, {worker_grid_size.x - 1, worker_grid_size.y - 1}); - CoreRangeSet cr_set({cr0, cr1}); + CoreRangeSet cr_set(std::vector{cr0, cr1}); DummyProgramConfig dummy_program_config = {.cr_set = cr_set}; EXPECT_TRUE(local_test_functions::test_dummy_EnqueueProgram_with_runtime_args_multi_crs( @@ -1129,7 +1129,7 @@ TEST_F(CommandQueueSingleCardFixture, TestSendRuntimeArgsMultiNonOverlappingCore CoreRange cr0({0, 0}, {worker_grid_size.x - 1, 3}); CoreRange cr1({0, 5}, {worker_grid_size.x - 1, worker_grid_size.y - 1}); - CoreRangeSet cr_set({cr0, cr1}); + CoreRangeSet cr_set(std::vector{cr0, cr1}); DummyProgramConfig dummy_program_config = {.cr_set = cr_set}; EXPECT_TRUE(local_test_functions::test_dummy_EnqueueProgram_with_runtime_args_multi_crs( @@ -1143,7 +1143,7 @@ TEST_F(CommandQueueSingleCardFixture, TestUpdateRuntimeArgsMultiCoreRange) { CoreRange cr0({0, 0}, {worker_grid_size.x - 1, 3}); CoreRange cr1({0, 5}, {worker_grid_size.x - 1, worker_grid_size.y - 1}); - CoreRangeSet cr_set({cr0, cr1}); + CoreRangeSet cr_set(std::vector{cr0, cr1}); DummyProgramConfig dummy_program_config = {.cr_set = cr_set}; EXPECT_TRUE(local_test_functions::test_dummy_EnqueueProgram_with_runtime_args_multi_crs( @@ -1155,7 +1155,7 @@ TEST_F(CommandQueueSingleCardFixture, TestUpdateRuntimeArgsMultiCoreRange) { TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreCompute) { CoreRange cr0({1, 1}, {2, 2}); CoreRange cr1({3, 3}, {4, 4}); - CoreRangeSet cr_set({cr0, cr1}); + CoreRangeSet cr_set(std::vector{cr0, cr1}); DummyProgramConfig dummy_program_config = {.cr_set = cr_set}; for (Device *device : devices_) { EXPECT_TRUE(local_test_functions::test_increment_runtime_args_sanity(device, dummy_program_config, 16, 16, tt::RISCV::COMPUTE)); @@ -1166,7 +1166,7 @@ TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreCompute TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreCompute_255_UniqueArgs) { CoreRange cr0({1, 1}, {2, 2}); CoreRange cr1({3, 3}, {4, 4}); - CoreRangeSet cr_set({cr0, cr1}); + CoreRangeSet cr_set(std::vector{cr0, cr1}); DummyProgramConfig dummy_program_config = {.cr_set = cr_set}; for (Device *device : devices_) { EXPECT_TRUE(local_test_functions::test_increment_runtime_args_sanity(device, dummy_program_config, 255, 0, tt::RISCV::COMPUTE)); @@ -1177,7 +1177,7 @@ TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreCompute TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreCompute_255_CommonArgs) { CoreRange cr0({1, 1}, {2, 2}); CoreRange cr1({3, 3}, {4, 4}); - CoreRangeSet cr_set({cr0, cr1}); + CoreRangeSet cr_set(std::vector{cr0, cr1}); DummyProgramConfig dummy_program_config = {.cr_set = cr_set}; for (Device *device : devices_) { EXPECT_TRUE(local_test_functions::test_increment_runtime_args_sanity(device, dummy_program_config, 0, 255, tt::RISCV::COMPUTE)); @@ -1188,7 +1188,7 @@ TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreCompute TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreDataMovementBrisc) { CoreRange cr0({1, 1}, {2, 2}); CoreRange cr1({3, 3}, {4, 4}); - CoreRangeSet cr_set({cr0, cr1}); + CoreRangeSet cr_set(std::vector{cr0, cr1}); DummyProgramConfig dummy_program_config = {.cr_set = cr_set}; for (Device *device : devices_) { EXPECT_TRUE(local_test_functions::test_increment_runtime_args_sanity(device, dummy_program_config, 16, 16, tt::RISCV::BRISC)); @@ -1199,7 +1199,7 @@ TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreDataMov TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreDataMovementNcrisc) { CoreRange cr0({1, 1}, {2, 2}); CoreRange cr1({3, 3}, {4, 4}); - CoreRangeSet cr_set({cr0, cr1}); + CoreRangeSet cr_set(std::vector{cr0, cr1}); DummyProgramConfig dummy_program_config = {.cr_set = cr_set}; for (Device *device : devices_) { EXPECT_TRUE(local_test_functions::test_increment_runtime_args_sanity(device, dummy_program_config, 16, 16, tt::RISCV::NCRISC)); @@ -1219,7 +1219,7 @@ TEST_F(CommandQueueSingleCardFixture, DISABLED_TestFillDispatchCoreBuffer) { CoreCoord worker_grid_size = device->compute_with_storage_grid_size(); CoreRange cr({0, 0}, {worker_grid_size.x - 1, worker_grid_size.y - 1}); - CoreRangeSet cr_set({cr}); + CoreRangeSet cr_set(cr); DummyProgramConfig dummy_program_config = {.cr_set = cr_set}; @@ -1240,7 +1240,7 @@ TEST_F(CommandQueueFixture, TestRandomizedProgram) { CoreCoord worker_grid_size = this->device_->compute_with_storage_grid_size(); CoreRange cr({0, 0}, {worker_grid_size.x - 1, worker_grid_size.y - 1}); - CoreRangeSet cr_set({cr}); + CoreRangeSet cr_set(cr); log_info(tt::LogTest, "Starting compile of {} programs now.", NUM_PROGRAMS); diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/compute/sfpu/sfpu_compute.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/compute/sfpu/sfpu_compute.cpp index 1e6e8c54362..8cb072266de 100644 --- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/compute/sfpu/sfpu_compute.cpp +++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/compute/sfpu/sfpu_compute.cpp @@ -104,7 +104,7 @@ struct SfpuConfig { size_t tile_byte_size = 0; tt::DataFormat l1_input_data_format = tt::DataFormat::Invalid; tt::DataFormat l1_output_data_format = tt::DataFormat::Invalid; - CoreRangeSet cores = {{}}; + CoreRangeSet cores = CoreRangeSet(); std::string sfpu_op = ""; bool approx_mode = true; }; diff --git a/tt_metal/common/CMakeLists.txt b/tt_metal/common/CMakeLists.txt index 75433fda8c8..294d5700810 100644 --- a/tt_metal/common/CMakeLists.txt +++ b/tt_metal/common/CMakeLists.txt @@ -1,5 +1,6 @@ set(COMMON_SRCS + ${CMAKE_CURRENT_SOURCE_DIR}/core_coord.cpp ${CMAKE_CURRENT_SOURCE_DIR}/core_descriptor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/metal_soc_descriptor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tt_backend_api_types.cpp diff --git a/tt_metal/common/core_coord.cpp b/tt_metal/common/core_coord.cpp new file mode 100644 index 00000000000..3815f2ca7e1 --- /dev/null +++ b/tt_metal/common/core_coord.cpp @@ -0,0 +1,525 @@ +// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include "tt_metal/common/core_coord.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "third_party/umd/device/tt_xy_pair.h" +#include "tt_metal/common/assert.hpp" +#include "tt_metal/third_party/tracy/public/tracy/Tracy.hpp" +#include "tt_metal/tt_stl/reflection.hpp" +#include "tt_metal/tt_stl/span.hpp" + +auto fmt::formatter::format(const CoreCoord &core_coord, format_context &ctx) const + -> format_context::iterator { + std::stringstream ss; + ss << core_coord.str(); + return fmt::format_to(ctx.out(), "{}", ss.str()); +} + +std::string RelativeCoreCoord::str() const { return "(x=" + std::to_string(x) + ",y=" + std::to_string(y) + ")"; } + +CoreCoord get_core_coord_from_relative(const RelativeCoreCoord &in, const CoreCoord &grid_size) { + CoreCoord coord; + coord.x = in.x + ((in.x < 0) ? grid_size.x : 0); + coord.y = in.y + ((in.y < 0) ? grid_size.y : 0); + return coord; +} + +CoreRange::CoreRange(const CoreCoord &point) : start_coord(point), end_coord(point) {} + +CoreRange::CoreRange(const CoreCoord &start_coord, const CoreCoord &end_coord) { + TT_FATAL( + end_coord.x >= start_coord.x and end_coord.y >= start_coord.y, + "Invalid core range for start_coord: {}, end_coord: {}", + start_coord.str(), + end_coord.str()); + + this->start_coord = start_coord; + this->end_coord = end_coord; +} + +std::optional CoreRange::intersects(const CoreRange &other) const { + std::size_t x1 = std::max(this->start_coord.x, other.start_coord.x); + std::size_t y1 = std::max(this->start_coord.y, other.start_coord.y); + std::size_t x2 = std::min(this->end_coord.x, other.end_coord.x); + std::size_t y2 = std::min(this->end_coord.y, other.end_coord.y); + if (x1 <= x2 and y1 <= y2) { + return CoreRange({x1, y1}, {x2, y2}); + } + + return {}; +} + +bool CoreRange::adjacent(const CoreRange &other) const { + std::size_t x1 = std::max(this->start_coord.x, other.start_coord.x); + std::size_t y1 = std::max(this->start_coord.y, other.start_coord.y); + std::size_t x2 = std::min(this->end_coord.x, other.end_coord.x); + std::size_t y2 = std::min(this->end_coord.y, other.end_coord.y); + return ((x2 + 1 == x1 && y1 <= y2) || (y2 + 1 == y1 && x1 <= x2)); +} + +bool CoreRange::contains(const CoreRange &other) const { + return (other.start_coord.x >= this->start_coord.x) && (other.end_coord.x <= this->end_coord.x) && + (other.start_coord.y >= this->start_coord.y) && (other.end_coord.y <= this->end_coord.y); +} + +bool CoreRange::contains(const CoreCoord &other) const { + return (other.x >= this->start_coord.x) && (other.x <= this->end_coord.x) && (other.y >= this->start_coord.y) && + (other.y <= this->end_coord.y); +} + +// Merge lined-up (in x or y dimension) intersecting/adjacent rectangles +std::optional CoreRange::merge(const CoreRange &cr) const { + if (this->intersects(cr) || this->adjacent(cr)) { + if (this->start_coord.x == cr.start_coord.x && this->end_coord.x == cr.end_coord.x) + return CoreRange( + {this->start_coord.x, std::min(this->start_coord.y, cr.start_coord.y)}, + {this->end_coord.x, std::max(this->end_coord.y, cr.end_coord.y)}); + + else if (this->start_coord.y == cr.start_coord.y && this->end_coord.y == cr.end_coord.y) + return CoreRange( + {std::min(this->start_coord.x, cr.start_coord.x), this->start_coord.y}, + {std::max(this->end_coord.x, cr.end_coord.x), this->end_coord.y}); + } + return std::nullopt; +} + +std::string CoreRange::str() const { return "[" + this->start_coord.str() + " - " + this->end_coord.str() + "]"; } + +size_t CoreRange::size() const { + return (this->end_coord.x - this->start_coord.x + 1) * (this->end_coord.y - this->start_coord.y + 1); +} + +CoreCoord CoreRange::grid_size() const { + return {this->end_coord.x - this->start_coord.x + 1, this->end_coord.y - this->start_coord.y + 1}; +} + +CoreRange::CoreIterator::CoreIterator(const CoreCoord ¤t, const CoreRange &core_range) : + current_(current), range_(core_range) {} + +CoreCoord &CoreRange::CoreIterator::operator*() { return current_; } + +CoreRange::CoreIterator &CoreRange::CoreIterator::operator++() { + CoreCoord next; + + const bool is_curr_core_at_end_of_row = current_.x == range_.end_coord.x; + if (is_curr_core_at_end_of_row) { + // Go to the beginning of the next row + next.x = range_.start_coord.x; + next.y = current_.y + 1; + } else { + next.x = current_.x + 1; + next.y = current_.y; + } + + current_ = next; + return *this; +} + +CoreRange::CoreIterator CoreRange::begin() const { return CoreRange::CoreIterator(this->start_coord, *this); } + +CoreRange::CoreIterator CoreRange::end() const { + const CoreCoord iterator_end(this->start_coord.x, this->end_coord.y + 1); + return CoreRange::CoreIterator(iterator_end, *this); +} + +bool CoreRange::CoreIterator::operator==(const CoreIterator &other) const { return current_ == other.current_; } + +bool CoreRange::CoreIterator::operator!=(const CoreIterator &other) const { return !(current_ == other.current_); } + +auto fmt::formatter::format(const CoreRange &core_range, format_context &ctx) const + -> format_context::iterator { + std::stringstream ss; + ss << core_range.str(); + return fmt::format_to(ctx.out(), "{}", ss.str()); +} + +CoreRangeSet::CoreRangeSet(const std::vector &core_ranges) : + ranges_(core_ranges.begin(), core_ranges.end()) { + ZoneScoped; + this->validate_no_overlap(); +} + +CoreRangeSet::CoreRangeSet(const std::set &core_ranges) : ranges_(core_ranges.begin(), core_ranges.end()) { + ZoneScoped; + this->validate_no_overlap(); +} + +CoreRangeSet::CoreRangeSet(const CoreRange &core_range) : ranges_{core_range} {} + +void swap(CoreRangeSet &first, CoreRangeSet &second) { + std::scoped_lock lock(first.ranges_guard, second.ranges_guard); + std::swap(first.ranges_, second.ranges_); +} + +CoreRangeSet::CoreRangeSet(const CoreRangeSet &other) { + std::scoped_lock lock(other.ranges_guard); + this->ranges_ = other.ranges_; +} + +CoreRangeSet &CoreRangeSet::operator=(const CoreRangeSet &other) { + std::scoped_lock lock(other.ranges_guard); + this->ranges_ = other.ranges_; + return *this; +} + +CoreRangeSet::CoreRangeSet(CoreRangeSet &&other) { swap(*this, other); } + +CoreRangeSet &CoreRangeSet::operator=(CoreRangeSet &&other) { + swap(*this, other); + return *this; +} + +CoreRangeSet::CoreRangeSet(std::vector &&core_ranges) : ranges_(std::move(core_ranges)) { + ZoneScoped; + this->validate_no_overlap(); +} + +size_t CoreRangeSet::size() const { return ranges_.size(); } + +template +CoreRangeSet CoreRangeSet::merge(const T &other) const { + size_t min_x = std::numeric_limits::max(), max_x = 0, min_y = std::numeric_limits::max(), max_y = 0; + std::set crs(this->ranges_.begin(), this->ranges_.end()); + crs.insert(other.begin(), other.end()); + + for (const auto &cr : crs) { + min_x = std::min(min_x, cr.start_coord.x); + max_x = std::max(max_x, cr.end_coord.x); + min_y = std::min(min_y, cr.start_coord.y); + max_y = std::max(max_y, cr.end_coord.y); + } + + // By overallocating by one x entry, we can avoid needing to check for + // boundary conditions when iterating, since there'll always be one + // last false entry + bool grid[max_y + 1][max_x + 2]; + memset(grid, 0, sizeof(grid)); + + for (const auto &cr : crs) + for (unsigned y = cr.start_coord.y; y <= cr.end_coord.y; y++) + for (unsigned x = cr.start_coord.x; x <= cr.end_coord.x; x++) grid[y][x] = true; + + crs.clear(); + for (unsigned y = min_y; y <= max_y; y++) { + std::set filter_set, tmp, new_crs; + std::vector ranges; + for (unsigned x = min_x; x <= max_x + 1; x++) { + if (grid[y][x]) { + unsigned x_start = x; + while (grid[y][x]) x++; + ranges.push_back(CoreRange({x_start, y}, {x - 1, y})); + } + } + + for (const auto &cr : ranges) { + for (const auto &prev_cr : crs) { + if (auto merged = cr.merge(prev_cr)) { + new_crs.insert(merged.value()); + filter_set.insert(prev_cr); + filter_set.insert(cr); + } + } + crs.insert(cr); + } + // Set(A) = Set(A) - Set(B) + std::set_difference( + std::make_move_iterator(crs.begin()), + std::make_move_iterator(crs.end()), + filter_set.begin(), + filter_set.end(), + std::inserter(tmp, tmp.end())); + crs.swap(tmp); + crs.insert(new_crs.begin(), new_crs.end()); + } + return CoreRangeSet(crs); +} + +template CoreRangeSet CoreRangeSet::merge>(const std::vector &other) const; +template CoreRangeSet CoreRangeSet::merge>(const std::set &other) const; + +template <> +CoreRangeSet CoreRangeSet::merge(const CoreRangeSet &other) const { + return this->merge(other.ranges()); +} + +bool CoreRangeSet::core_coord_in_core_ranges(const CoreCoord &core_coord) const { + ZoneScoped; + for (const auto &cr : this->ranges_) { + if (cr.contains(core_coord)) + return true; + } + return false; +} + +bool CoreRangeSet::intersects(const CoreRange &cr) const { + for (const auto &local_cr : this->ranges_) { + if (local_cr.intersects(cr)) + return true; + } + return false; +} + +const std::vector &CoreRangeSet::ranges() const { return this->ranges_; } + +std::string CoreRangeSet::str() const { + if (this->ranges().size() > 0) { + std::string core_range_set_str = "{"; + for (const auto &core_range : this->ranges_) { + core_range_set_str += core_range.str() + ", "; + } + core_range_set_str[core_range_set_str.length() - 2] = '}'; + core_range_set_str.pop_back(); + return core_range_set_str; + } else { + return "{}"; + } +} + +uint32_t CoreRangeSet::num_cores() const { + uint32_t num_cores = 0; + for (const auto &core_range : this->ranges()) { + num_cores += core_range.size(); + } + return num_cores; +} + +CoreRange CoreRangeSet::bounding_box() const { + TT_FATAL(this->ranges().size() > 0, "Cannot get bounding_box of an empty CoreRangeSet!"); + size_t min_x = UINT32_MAX, min_y = UINT32_MAX, max_x = 0, max_y = 0; + for (const auto &cr : this->ranges()) { + min_x = std::min(min_x, cr.start_coord.x); + max_x = std::max(max_x, cr.end_coord.x); + min_y = std::min(min_y, cr.start_coord.y); + max_y = std::max(max_y, cr.end_coord.y); + } + return {{min_x, min_y}, {max_x, max_y}}; +} + +void CoreRangeSet::validate_no_overlap() { + if (this->ranges_.size() < 2) { + return; + } + for (auto outer_it = this->ranges_.begin(); outer_it != this->ranges_.end() - 1; outer_it++) { + for (auto inner_it = outer_it + 1; inner_it != this->ranges_.end(); inner_it++) { + CoreRange &first_core_range = *outer_it; + CoreRange &second_core_range = *inner_it; + bool first_core_left_of_second = first_core_range.end_coord.x < second_core_range.start_coord.x; + bool first_core_right_of_second = first_core_range.start_coord.x > second_core_range.end_coord.x; + bool first_core_above_second = first_core_range.end_coord.y < second_core_range.start_coord.y; + bool first_core_below_second = first_core_range.start_coord.y > second_core_range.end_coord.y; + auto no_overlap = first_core_left_of_second or first_core_right_of_second or first_core_above_second or + first_core_below_second; + if (not no_overlap) { + TT_THROW( + "Cannot create CoreRangeSet with specified core ranges because core ranges {} and {} overlap!", + first_core_range.str(), + second_core_range.str()); + } + } + } +} + +bool operator==(const CoreRangeSet &a, const CoreRangeSet &b) { + if (a.ranges().size() == b.ranges().size()) { + auto range_a = a.ranges(); + auto range_b = b.ranges(); + for (auto it_a = range_a.begin(), it_b = range_b.begin(); it_a != range_a.end(); it_a++, it_b++) { + if (*it_a != *it_b) { + return false; + } + } + return true; + } + return false; +} + +std::vector grid_to_cores(uint32_t num_cores, uint32_t grid_size_x, uint32_t grid_size_y, bool row_wise) { + std::vector cores; + cores.reserve(num_cores); + TT_ASSERT( + num_cores <= grid_size_x * grid_size_y, + "Number of cores {} exceeds grid size {}x{}", + num_cores, + grid_size_x, + grid_size_y); + if (row_wise) { + for (uint32_t i = 0; i < num_cores; ++i) { + cores.push_back({i % grid_size_x, i / grid_size_x}); + } + } else { + for (uint32_t i = 0; i < num_cores; ++i) { + cores.push_back({i / grid_size_y, i % grid_size_y}); + } + } + return cores; +} + +std::vector grid_to_cores(CoreCoord start, CoreCoord end, bool row_wise) { + std::vector cores; + auto num_cores_x = (end.x + 1) - start.x; + auto num_cores_y = (end.y + 1) - start.y; + uint32_t num_cores = num_cores_x * num_cores_y; + cores.reserve(num_cores); + if (row_wise) { + for (uint32_t j = start.y; j < (end.y + 1); j++) { + for (uint32_t i = start.x; i < (end.x + 1); i++) { + cores.push_back({i, j}); + } + } + + } else { + for (uint32_t i = start.x; i < (end.x + 1); i++) { + for (uint32_t j = start.y; j < (end.y + 1); j++) { + cores.push_back({i, j}); + } + } + } + return cores; +} + +// Noop cores are appended at the end with no guarantees on ordering +std::vector grid_to_cores_with_noop( + const uint32_t bbox_x, + const uint32_t bbox_y, + const uint32_t grid_size_x, + const uint32_t grid_size_y, + const bool row_wise) { + ZoneScoped; + std::vector cores; + cores.reserve(grid_size_x * grid_size_y); + TT_ASSERT(bbox_x < grid_size_x); + TT_ASSERT(bbox_y < grid_size_y); + const uint32_t box_size_x = bbox_x + 1; + const uint32_t box_size_y = bbox_y + 1; + + if (row_wise) { + for (uint32_t i = 0; i < box_size_x * box_size_y; ++i) { + cores.push_back({i % box_size_x, i / box_size_x}); + } + } else { + for (uint32_t i = 0; i < box_size_x * box_size_y; ++i) { + cores.push_back({i / box_size_y, i % box_size_y}); + } + } + + // Right rectangle noops + for (uint32_t x = box_size_x; x < grid_size_x; ++x) { + for (uint32_t y = 0; y < grid_size_y; ++y) { + cores.push_back({x, y}); + } + } + + // Bottom rectangle noops + for (uint32_t y = box_size_y; y < grid_size_y; ++y) { + for (uint32_t x = 0; x < box_size_x; ++x) { + cores.push_back({x, y}); + } + } + + return cores; +} + +std::vector corerange_to_cores(const CoreRangeSet &crs, std::optional max_cores, bool row_wise) { + uint32_t num_total_cores = 0; + std::vector all_cores; + uint32_t offset = 0; + + for (auto core_range : crs.ranges()) { + auto start_coord = core_range.start_coord; + auto end_coord = core_range.end_coord; + auto cores = grid_to_cores(start_coord, end_coord, row_wise); + if (max_cores.has_value()) { + if (all_cores.size() + cores.size() > max_cores.value()) { + uint32_t num_cores_to_add = max_cores.value() - all_cores.size(); + all_cores.insert(all_cores.end(), cores.begin(), cores.begin() + num_cores_to_add); + } else { + all_cores.insert(all_cores.end(), cores.begin(), cores.end()); + } + } else { + all_cores.insert(all_cores.end(), cores.begin(), cores.end()); + } + } + + return all_cores; +} + +bool operator!=(const CoreRangeSet &a, const CoreRangeSet &b) { return !(a == b); } + +auto fmt::formatter::format(const CoreRangeSet &core_range_set, format_context &ctx) const + -> format_context::iterator { + std::stringstream ss; + ss << core_range_set.str(); + return fmt::format_to(ctx.out(), "{}", ss.str()); +} + +namespace std { + +std::size_t hash::operator()(RelativeCoreCoord const &o) const { + std::size_t seed = 0; + seed = std::hash()(o.x) ^ std::hash()(o.y) << 1; + return seed; +} + +std::size_t hash::operator()(const CoreRange &core_range) const { + std::size_t seed = 0; + seed = std::hash{}(core_range.start_coord) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + seed = std::hash{}(core_range.end_coord) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; +} + +std::size_t hash::operator()(const CoreRangeSet &core_range_set) const { + std::size_t seed = 0; + for (const auto &core_range : core_range_set.ranges()) { + seed = std::hash{}(core_range) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + return seed; +} + +} // namespace std + +namespace tt::stl::json { + +nlohmann::json to_json_t::operator()(const CoreCoord &core_coord) noexcept { + return {{"x", to_json(core_coord.x)}, {"y", to_json(core_coord.y)}}; +} + +CoreCoord from_json_t::operator()(const nlohmann::json &json) noexcept { + return {from_json(json.at("x")), from_json(json.at("y"))}; +} + +nlohmann::json to_json_t::operator()(const RelativeCoreCoord &relative_core_coord) noexcept { + return {{"x", to_json(relative_core_coord.x)}, {"y", to_json(relative_core_coord.y)}}; +} + +RelativeCoreCoord from_json_t::operator()(const nlohmann::json &json) noexcept { + return {from_json(json.at("x")), from_json(json.at("y"))}; +} + +nlohmann::json to_json_t::operator()(const CoreRange &core_range) noexcept { + return {{"start", to_json(core_range.start_coord)}, {"end", to_json(core_range.end_coord)}}; +} + +CoreRange from_json_t::operator()(const nlohmann::json &json) noexcept { + return {from_json(json.at("start")), from_json(json.at("end"))}; +} + +nlohmann::json to_json_t::operator()(const CoreRangeSet &core_range_set) noexcept { + nlohmann::json core_range_set_json = nlohmann::json::array(); + return to_json(core_range_set.ranges()); +} + +CoreRangeSet from_json_t::operator()(const nlohmann::json &json) noexcept { + return CoreRangeSet(from_json>(json)); +} + +} // namespace tt::stl::json diff --git a/tt_metal/common/core_coord.h b/tt_metal/common/core_coord.h index 448ef85edb1..d5623836152 100644 --- a/tt_metal/common/core_coord.h +++ b/tt_metal/common/core_coord.h @@ -5,31 +5,23 @@ #pragma once #include -#include #include #include #include #include +#include #include "third_party/json/json.hpp" #include "third_party/umd/device/tt_xy_pair.h" -#include "tt_metal/common/assert.hpp" -#include "tt_metal/third_party/tracy/public/tracy/Tracy.hpp" #include "tt_metal/tt_stl/reflection.hpp" -using std::pair; - using CoreCoord = tt_xy_pair; template <> struct fmt::formatter { constexpr auto parse(format_parse_context &ctx) -> format_parse_context::iterator { return ctx.end(); } - auto format(const CoreCoord &core_coord, format_context &ctx) const -> format_context::iterator { - std::stringstream ss; - ss << core_coord.str(); - return fmt::format_to(ctx.out(), "{}", ss.str()); - } + auto format(const CoreCoord &core_coord, format_context &ctx) const -> format_context::iterator; }; constexpr inline bool operator<=(const CoreCoord &a, const CoreCoord &b) { return (a < b) or (a == b); } @@ -38,7 +30,7 @@ struct RelativeCoreCoord { long x = 0; long y = 0; - std::string str() const { return "(x=" + std::to_string(x) + ",y=" + std::to_string(y) + ")"; } + std::string str() const; }; constexpr inline bool operator==(const RelativeCoreCoord &a, const RelativeCoreCoord &b) { @@ -47,590 +39,217 @@ constexpr inline bool operator==(const RelativeCoreCoord &a, const RelativeCoreC constexpr inline bool operator!=(const RelativeCoreCoord &a, const RelativeCoreCoord &b) { return !(a == b); } -namespace std { -template <> -struct hash { - std::size_t operator()(RelativeCoreCoord const &o) const { - std::size_t seed = 0; - seed = std::hash()(o.x) ^ std::hash()(o.y) << 1; - return seed; - } -}; -} // namespace std - -inline CoreCoord get_core_coord_from_relative(const RelativeCoreCoord &in, const CoreCoord &grid_size) { - CoreCoord coord; - coord.x = in.x + ((in.x < 0) ? grid_size.x : 0); - coord.y = in.y + ((in.y < 0) ? grid_size.y : 0); - return coord; -} +CoreCoord get_core_coord_from_relative(const RelativeCoreCoord &in, const CoreCoord &grid_size); struct CoreRange { CoreCoord start_coord; CoreCoord end_coord; - CoreRange(const CoreCoord &point) { - this->start_coord = point; - this->end_coord = point; - } + CoreRange(const CoreCoord &point); - CoreRange(const CoreCoord &start_coord, const CoreCoord &end_coord) { - TT_ASSERT( - end_coord.x >= start_coord.x and end_coord.y >= start_coord.y, - "Invalid core range for start_coord: {}, end_coord: {}", start_coord.str(), end_coord.str()); - - this->start_coord = start_coord; - this->end_coord = end_coord; - } + CoreRange(const CoreCoord &start_coord, const CoreCoord &end_coord); CoreRange(const CoreRange &other) = default; CoreRange &operator=(const CoreRange &other) = default; CoreRange(CoreRange &&other) = default; CoreRange &operator=(CoreRange &&other) = default; - // void validate() { - // TT_FATAL( - // end_coord.x >= start_coord.x and end_coord.y >= start_coord.y, - // "Invalid core range for start_coord: {}, end_coord: {}", start_coord.str(), end_coord.str()); - // } - - inline std::optional intersects(const CoreRange &other) const { - std::size_t x1 = std::max(this->start_coord.x, other.start_coord.x); - std::size_t y1 = std::max(this->start_coord.y, other.start_coord.y); - std::size_t x2 = std::min(this->end_coord.x, other.end_coord.x); - std::size_t y2 = std::min(this->end_coord.y, other.end_coord.y); - if (x1 <= x2 and y1 <= y2) - return CoreRange({x1, y1}, {x2, y2}); - - return {}; - } - - inline bool adjacent(const CoreRange &other) const { - std::size_t x1 = std::max(this->start_coord.x, other.start_coord.x); - std::size_t y1 = std::max(this->start_coord.y, other.start_coord.y); - std::size_t x2 = std::min(this->end_coord.x, other.end_coord.x); - std::size_t y2 = std::min(this->end_coord.y, other.end_coord.y); - return ((x2 + 1 == x1 && y1 <= y2) || (y2 + 1 == y1 && x1 <= x2)); - } - - inline bool contains(const CoreRange &other) const { - return (other.start_coord.x >= this->start_coord.x) && (other.end_coord.x <= this->end_coord.x) && (other.start_coord.y >= this->start_coord.y) && - (other.end_coord.y <= this->end_coord.y); - } - - inline bool contains(const CoreCoord &other) const { - return (other.x >= this->start_coord.x) && (other.x <= this->end_coord.x) && (other.y >= this->start_coord.y) && - (other.y <= this->end_coord.y); - } + std::optional intersects(const CoreRange &other) const; + + bool adjacent(const CoreRange &other) const; + + bool contains(const CoreRange &other) const; + + bool contains(const CoreCoord &other) const; // Merge lined-up (in x or y dimension) intersecting/adjacent rectangles - std::optional merge(const CoreRange &cr) const { - if (this->intersects(cr) || this->adjacent(cr)) { - if (this->start_coord.x == cr.start_coord.x && this->end_coord.x == cr.end_coord.x) - return CoreRange( - {this->start_coord.x, std::min(this->start_coord.y, cr.start_coord.y)}, - {this->end_coord.x, std::max(this->end_coord.y, cr.end_coord.y)}); - - else if (this->start_coord.y == cr.start_coord.y && this->end_coord.y == cr.end_coord.y) - return CoreRange( - {std::min(this->start_coord.x, cr.start_coord.x), this->start_coord.y}, - {std::max(this->end_coord.x, cr.end_coord.x), this->end_coord.y}); - } - return std::nullopt; - } - - std::string str() const { return "[" + this->start_coord.str() + " - " + this->end_coord.str() + "]"; } - - size_t size() const { return (this->end_coord.x - this->start_coord.x + 1) * (this->end_coord.y - this->start_coord.y + 1); } - - CoreCoord grid_size() const { return {this->end_coord.x - this->start_coord.x + 1, this->end_coord.y - this->start_coord.y + 1}; } - - class CoreIterator - { - public: - CoreIterator(const CoreCoord& current, const CoreRange& core_range) : - current_(current), - range_(core_range) - {} - - CoreCoord& operator*() - { - return current_; - } - - CoreIterator& operator++() - { - CoreCoord next; - - const bool is_curr_core_at_end_of_row = current_.x == range_.end_coord.x; - if (is_curr_core_at_end_of_row) - { - // Go to the beginning of the next row - next.x = range_.start_coord.x; - next.y = current_.y + 1; - } - else - { - next.x = current_.x + 1; - next.y = current_.y; - } - - current_ = next; - return *this; - } - - bool operator==(const CoreIterator& other) const - { - return current_ == other.current_; - } - - bool operator!=(const CoreIterator& other) const - { - return !(current_ == other.current_); - } - - private: + std::optional merge(const CoreRange &cr) const; + + std::string str() const; + + size_t size() const; + + CoreCoord grid_size() const; + + class CoreIterator { + public: + CoreIterator(const CoreCoord ¤t, const CoreRange &core_range); + + CoreCoord &operator*(); + + CoreIterator &operator++(); + + bool operator==(const CoreIterator &other) const; + + bool operator!=(const CoreIterator &other) const; + + private: CoreCoord current_; - const CoreRange& range_; + const CoreRange &range_; }; - CoreIterator begin() const - { - return CoreIterator(this->start_coord, *this); - } + CoreIterator begin() const; - CoreIterator end() const - { - const CoreCoord iterator_end(this->start_coord.x, this->end_coord.y + 1); - return CoreIterator(iterator_end, *this); - } + CoreIterator end() const; }; -constexpr inline bool operator==(const CoreRange &a, const CoreRange &b) { +constexpr bool operator==(const CoreRange &a, const CoreRange &b) { return a.start_coord == b.start_coord && a.end_coord == b.end_coord; } -constexpr inline bool operator!=(const CoreRange &a, const CoreRange &b) { return !(a == b); } +constexpr bool operator!=(const CoreRange &a, const CoreRange &b) { return !(a == b); } -constexpr inline bool operator<(const CoreRange &left, const CoreRange &right) { - return (left.start_coord < right.start_coord || (left.start_coord == right.start_coord && left.end_coord < right.end_coord)); +constexpr bool operator<(const CoreRange &left, const CoreRange &right) { + return ( + left.start_coord < right.start_coord || + (left.start_coord == right.start_coord && left.end_coord < right.end_coord)); } template <> struct fmt::formatter { constexpr auto parse(format_parse_context &ctx) -> format_parse_context::iterator { return ctx.end(); } - auto format(const CoreRange &core_range, format_context &ctx) const -> format_context::iterator { - std::stringstream ss; - ss << core_range.str(); - return fmt::format_to(ctx.out(), "{}", ss.str()); - } + auto format(const CoreRange &core_range, format_context &ctx) const -> format_context::iterator; }; -namespace std { -template <> -struct hash { - std::size_t operator()(const CoreRange &core_range) const { - std::size_t seed = 0; - seed = std::hash{}(core_range.start_coord) + 0x9e3779b9 + (seed << 6) + (seed >> 2); - seed = std::hash{}(core_range.end_coord) + 0x9e3779b9 + (seed << 6) + (seed >> 2); - return seed; - } -}; -} // namespace std - class CoreRangeSet { public: - CoreRangeSet(const std::set &core_ranges) : ranges_(core_ranges) { - ZoneScoped; - for (auto outer_it = this->ranges_.begin(); outer_it != this->ranges_.end(); outer_it++) { - for (auto inner_it = this->ranges_.begin(); inner_it != this->ranges_.end(); inner_it++) { - if (outer_it == inner_it) { - continue; - } - CoreRange first_core_range = *outer_it; - CoreRange second_core_range = *inner_it; - bool first_core_left_of_second = first_core_range.end_coord.x < second_core_range.start_coord.x; - bool first_core_right_of_second = first_core_range.start_coord.x > second_core_range.end_coord.x; - bool first_core_above_second = first_core_range.end_coord.y < second_core_range.start_coord.y; - bool first_core_below_second = first_core_range.start_coord.y > second_core_range.end_coord.y; - auto no_overlap = first_core_left_of_second or first_core_right_of_second or first_core_above_second or - first_core_below_second; - if (not no_overlap) { - TT_THROW( - "Cannot create CoreRangeSet with specified core ranges because core ranges {} and {} overlap!", - first_core_range.str(), - second_core_range.str()); - } - } - } - } - - friend void swap(CoreRangeSet& first, CoreRangeSet& second) { - std::scoped_lock lock(first.ranges_guard, second.ranges_guard); - std::swap(first.ranges_, second.ranges_); - } - - CoreRangeSet(const CoreRangeSet &other) { - std::scoped_lock lock(other.ranges_guard); - this->ranges_ = other.ranges_; - } - CoreRangeSet &operator=(const CoreRangeSet &other) { - std::scoped_lock lock(other.ranges_guard); - this->ranges_ = other.ranges_; - return *this; - } - - CoreRangeSet(CoreRangeSet &&other) { - swap(*this, other); - } - - CoreRangeSet &operator=(CoreRangeSet &&other) {; - swap(*this, other); - return *this; - } - - auto size() const { return ranges_.size(); } - - CoreRangeSet merge(const std::set &other) const { - size_t min_x = std::numeric_limits::max(), max_x = 0, min_y = std::numeric_limits::max(), - max_y = 0; - std::set crs = this->ranges_; - crs.insert(other.begin(), other.end()); - - for (const auto &cr : crs) { - // std::cout << "merging " << cr.str() << std::endl; - min_x = std::min(min_x, cr.start_coord.x); - max_x = std::max(max_x, cr.end_coord.x); - min_y = std::min(min_y, cr.start_coord.y); - max_y = std::max(max_y, cr.end_coord.y); - } - - // By overallocating by one x entry, we can avoid needing to check for - // boundary conditions when iterating, since there'll always be one - // last false entry - bool grid[max_y + 1][max_x + 2]; - memset(grid, 0, sizeof(grid)); - - for (const auto &cr : crs) - for (unsigned y = cr.start_coord.y; y <= cr.end_coord.y; y++) - for (unsigned x = cr.start_coord.x; x <= cr.end_coord.x; x++) grid[y][x] = true; - - crs.clear(); - for (unsigned y = min_y; y <= max_y; y++) { - std::set filter_set, tmp, new_crs; - std::vector ranges; - for (unsigned x = min_x; x <= max_x + 1; x++) { - if (grid[y][x]) { - unsigned x_start = x; - while (grid[y][x]) x++; - ranges.push_back(CoreRange({x_start, y}, {x - 1, y})); - } - } - - for (const auto &cr : ranges) { - for (const auto &prev_cr : crs) { - if (auto merged = cr.merge(prev_cr)) { - // std::cout << "merging " << cr.str() << " and " << prev_cr.str() << " with " << - // merged.value().str() << std::endl; - new_crs.insert(merged.value()); - filter_set.insert(prev_cr); - filter_set.insert(cr); - } - } - crs.insert(cr); - } - // Set(A) = Set(A) - Set(B) - std::set_difference( - std::make_move_iterator(crs.begin()), - std::make_move_iterator(crs.end()), - filter_set.begin(), - filter_set.end(), - std::inserter(tmp, tmp.end())); - crs.swap(tmp); - crs.insert(new_crs.begin(), new_crs.end()); - } - // for ( const auto & cr : crs ){ - // std::cout << " final merged CR:" << cr.str() << std::endl; - // } - return CoreRangeSet(crs); - } - - CoreRangeSet merge(const CoreRangeSet &s) const { return this->merge(s.ranges()); } - - inline bool core_coord_in_core_ranges(const CoreCoord &core_coord) const { - ZoneScoped; - for (const auto &cr : this->ranges_) { - if (cr.contains(core_coord)) - return true; - } - return false; - } - - inline bool intersects(const CoreRange &cr) const { - for (const auto &local_cr : this->ranges_) { - if (local_cr.intersects(cr)) - return true; - } - return false; - } - - const std::set &ranges() const { return this->ranges_; } - - std::string str() const { - if (this->ranges().size() > 0) { - std::string core_range_set_str = "{"; - for (const auto &core_range : this->ranges_) { - core_range_set_str += core_range.str() + ", "; - } - core_range_set_str[core_range_set_str.length() - 2] = '}'; - core_range_set_str.pop_back(); - return core_range_set_str; - } else { - return "{}"; - } - } - - const uint32_t num_cores() const { - uint32_t num_cores = 0; - for (const auto &core_range : this->ranges()) { - num_cores += core_range.size(); - } - return num_cores; - } - - CoreRange bounding_box() const { - TT_FATAL(this->ranges().size() > 0, "Cannot get bounding_box of an empty CoreRangeSet!"); - size_t min_x = UINT32_MAX, min_y = UINT32_MAX, max_x = 0, max_y = 0; - for (const auto &cr : this->ranges()) { - min_x = std::min(min_x, cr.start_coord.x); - max_x = std::max(max_x, cr.end_coord.x); - min_y = std::min(min_y, cr.start_coord.y); - max_y = std::max(max_y, cr.end_coord.y); - } - return {{min_x, min_y}, {max_x, max_y}}; - } - - private: - mutable std::mutex ranges_guard; - std::set ranges_; + CoreRangeSet(const std::vector &core_ranges); + + CoreRangeSet(const std::set &core_ranges); + + CoreRangeSet(const CoreRange &core_range); + + CoreRangeSet() = default; + + friend void swap(CoreRangeSet &first, CoreRangeSet &second); + + CoreRangeSet(const CoreRangeSet &other); + + CoreRangeSet &operator=(const CoreRangeSet &other); + + CoreRangeSet(CoreRangeSet &&other); + + CoreRangeSet &operator=(CoreRangeSet &&other); + + CoreRangeSet(std::vector &&core_ranges); + + size_t size() const; + + template + CoreRangeSet merge(const T &other) const; + + bool core_coord_in_core_ranges(const CoreCoord &core_coord) const; + + bool intersects(const CoreRange &cr) const; + + const std::vector &ranges() const; + + std::string str() const; + + uint32_t num_cores() const; + + CoreRange bounding_box() const; + + private: + void validate_no_overlap(); + + mutable std::mutex ranges_guard; + std::vector ranges_; }; -const inline bool operator==(const CoreRangeSet &a, const CoreRangeSet &b) { - if (a.ranges().size() == b.ranges().size()) { - auto range_a = a.ranges(); - auto range_b = b.ranges(); - for (auto it_a = range_a.begin(), it_b = range_b.begin(); it_a != range_a.end(); it_a++, it_b++) { - if (*it_a != *it_b) { - return false; - } - } - return true; - } - return false; -} +bool operator==(const CoreRangeSet &a, const CoreRangeSet &b); -inline std::vector grid_to_cores( - uint32_t num_cores, uint32_t grid_size_x, uint32_t grid_size_y, bool row_wise = false) { - std::vector cores; - cores.reserve(num_cores); - TT_ASSERT( - num_cores <= grid_size_x * grid_size_y, - "Number of cores {} exceeds grid size {}x{}", - num_cores, - grid_size_x, - grid_size_y); - if (row_wise) { - for (uint32_t i = 0; i < num_cores; ++i) { - cores.push_back({i % grid_size_x, i / grid_size_x}); - } - } else { - for (uint32_t i = 0; i < num_cores; ++i) { - cores.push_back({i / grid_size_y, i % grid_size_y}); - } - } - return cores; -} +std::vector grid_to_cores( + uint32_t num_cores, uint32_t grid_size_x, uint32_t grid_size_y, bool row_wise = false); -inline std::vector grid_to_cores(CoreCoord start, CoreCoord end, bool row_wise = false) { - std::vector cores; - auto num_cores_x = (end.x + 1) - start.x; - auto num_cores_y = (end.y + 1) - start.y; - uint32_t num_cores = num_cores_x * num_cores_y; - cores.reserve(num_cores); - if (row_wise) { - for (uint32_t j = start.y; j < (end.y + 1); j++) { - for (uint32_t i = start.x; i < (end.x + 1); i++) { - cores.push_back({i, j}); - } - } - - } else { - for (uint32_t i = start.x; i < (end.x + 1); i++) { - for (uint32_t j = start.y; j < (end.y + 1); j++) { - cores.push_back({i, j}); - } - } - } - return cores; -} +std::vector grid_to_cores(CoreCoord start, CoreCoord end, bool row_wise = false); // Noop cores are appended at the end with no guarantees on ordering -inline std::vector grid_to_cores_with_noop( +std::vector grid_to_cores_with_noop( const uint32_t bbox_x, const uint32_t bbox_y, const uint32_t grid_size_x, const uint32_t grid_size_y, - const bool row_wise = false) { - ZoneScoped; - std::vector cores; - cores.reserve(grid_size_x * grid_size_y); - TT_ASSERT(bbox_x < grid_size_x); - TT_ASSERT(bbox_y < grid_size_y); - const uint32_t box_size_x = bbox_x + 1; - const uint32_t box_size_y = bbox_y + 1; - - if (row_wise) { - for (uint32_t i = 0; i < box_size_x * box_size_y; ++i) { - cores.push_back({i % box_size_x, i / box_size_x}); - } - } else { - for (uint32_t i = 0; i < box_size_x * box_size_y; ++i) { - cores.push_back({i / box_size_y, i % box_size_y}); - } - } - - // Right rectangle noops - for (uint32_t x = box_size_x; x < grid_size_x; ++x) { - for (uint32_t y = 0; y < grid_size_y; ++y) { - cores.push_back({x, y}); - } - } - - // Bottom rectangle noops - for (uint32_t y = box_size_y; y < grid_size_y; ++y) { - for (uint32_t x = 0; x < box_size_x; ++x) { - cores.push_back({x, y}); - } - } - - return cores; -} + const bool row_wise = false); -inline std::vector corerange_to_cores( - const CoreRangeSet &crs, std::optional max_cores = std::nullopt, bool row_wise = false) { - uint32_t num_total_cores = 0; - std::vector all_cores; - uint32_t offset = 0; - - for (auto core_range : crs.ranges()) { - auto start_coord = core_range.start_coord; - auto end_coord = core_range.end_coord; - auto cores = grid_to_cores(start_coord, end_coord, row_wise); - if (max_cores.has_value()) { - if (all_cores.size() + cores.size() > max_cores.value()) { - uint32_t num_cores_to_add = max_cores.value() - all_cores.size(); - all_cores.insert(all_cores.end(), cores.begin(), cores.begin() + num_cores_to_add); - } else { - all_cores.insert(all_cores.end(), cores.begin(), cores.end()); - } - } else { - all_cores.insert(all_cores.end(), cores.begin(), cores.end()); - } - } - - return all_cores; -} +std::vector corerange_to_cores( + const CoreRangeSet &crs, std::optional max_cores = std::nullopt, bool row_wise = false); -const inline bool operator!=(const CoreRangeSet &a, const CoreRangeSet &b) { return !(a == b); } +bool operator!=(const CoreRangeSet &a, const CoreRangeSet &b); template <> struct fmt::formatter { constexpr auto parse(format_parse_context &ctx) -> format_parse_context::iterator { return ctx.end(); } - auto format(const CoreRangeSet &core_range_set, format_context &ctx) const -> format_context::iterator { - std::stringstream ss; - ss << core_range_set.str(); - return fmt::format_to(ctx.out(), "{}", ss.str()); - } + auto format(const CoreRangeSet &core_range_set, format_context &ctx) const -> format_context::iterator; }; // Adding to tt::tt_metal namespace as we transition to moving this out of global namespace eventually. namespace tt::tt_metal { - using ::CoreCoord; - using ::CoreRange; - using ::CoreRangeSet; -} +using ::CoreCoord; +using ::CoreRange; +using ::CoreRangeSet; +} // namespace tt::tt_metal namespace std { + +template <> +struct hash { + std::size_t operator()(const CoreRange &core_range) const; +}; + +template <> +struct hash { + std::size_t operator()(RelativeCoreCoord const &o) const; +}; + template <> struct hash { - std::size_t operator()(const CoreRangeSet &core_range_set) const { - std::size_t seed = 0; - for (const auto &core_range : core_range_set.ranges()) { - seed = std::hash{}(core_range) + 0x9e3779b9 + (seed << 6) + (seed >> 2); - } - return seed; - } + std::size_t operator()(const CoreRangeSet &core_range_set) const; }; + } // namespace std namespace tt::stl::json { template <> struct to_json_t { - nlohmann::json operator()(const CoreCoord &core_coord) noexcept { - return {{"x", to_json(core_coord.x)}, {"y", to_json(core_coord.y)}}; - } + nlohmann::json operator()(const CoreCoord &core_coord) noexcept; }; template <> struct from_json_t { - CoreCoord operator()(const nlohmann::json &json) noexcept { - return {from_json(json.at("x")), from_json(json.at("y"))}; - } + CoreCoord operator()(const nlohmann::json &json) noexcept; }; template <> struct to_json_t { - nlohmann::json operator()(const RelativeCoreCoord &relative_core_coord) noexcept { - return {{"x", to_json(relative_core_coord.x)}, {"y", to_json(relative_core_coord.y)}}; - } + nlohmann::json operator()(const RelativeCoreCoord &relative_core_coord) noexcept; }; template <> struct from_json_t { - RelativeCoreCoord operator()(const nlohmann::json &json) noexcept { - return {from_json(json.at("x")), from_json(json.at("y"))}; - } + RelativeCoreCoord operator()(const nlohmann::json &json) noexcept; }; template <> struct to_json_t { - nlohmann::json operator()(const CoreRange &core_range) noexcept { - return {{"start", to_json(core_range.start_coord)}, {"end", to_json(core_range.end_coord)}}; - } + nlohmann::json operator()(const CoreRange &core_range) noexcept; }; template <> struct from_json_t { - CoreRange operator()(const nlohmann::json &json) noexcept { - return {from_json(json.at("start")), from_json(json.at("end"))}; - } + CoreRange operator()(const nlohmann::json &json) noexcept; }; template <> struct to_json_t { - nlohmann::json operator()(const CoreRangeSet &core_range_set) noexcept { - nlohmann::json core_range_set_json = nlohmann::json::array(); - return to_json(core_range_set.ranges()); - } + nlohmann::json operator()(const CoreRangeSet &core_range_set) noexcept; }; template <> struct from_json_t { - CoreRangeSet operator()(const nlohmann::json &json) noexcept { - return CoreRangeSet(from_json>(json)); - } + CoreRangeSet operator()(const nlohmann::json &json) noexcept; }; } // namespace tt::stl::json diff --git a/tt_metal/common/work_split.hpp b/tt_metal/common/work_split.hpp index c4c3153cc59..c6c6933a5ed 100644 --- a/tt_metal/common/work_split.hpp +++ b/tt_metal/common/work_split.hpp @@ -174,7 +174,7 @@ inline std::tuple(all_cores.ranges().begin(), all_cores.ranges().end()); // Uneven division of units across cores // This case should only be hit when there are more units of work than a full grid of cores // which is implicitly assumed in the following logic diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp index 01aae5f3130..426e0b6fcab 100644 --- a/tt_metal/impl/device/device.cpp +++ b/tt_metal/impl/device/device.cpp @@ -3218,7 +3218,7 @@ float Device::sfpu_inf() const{ return std::numeric_limits::infinity(); } -pair Device::build_processor_type_to_index(uint32_t programmable_core, uint32_t processor_class) const { +std::pair Device::build_processor_type_to_index(uint32_t programmable_core, uint32_t processor_class) const { TT_ASSERT(programmable_core < this->build_state_indices_.size(), "Programmable core type {} is not included in the FW or Kernel build state", programmable_core); TT_ASSERT(processor_class < this->build_state_indices_[programmable_core].size(), @@ -3236,7 +3236,7 @@ const JitBuildState& Device::build_kernel_state(uint32_t programmable_core, uint } const JitBuildStateSubset Device::build_kernel_states(uint32_t programmable_core, uint32_t processor_class) const { - pair bptti = build_processor_type_to_index(programmable_core, processor_class); + std::pair bptti = build_processor_type_to_index(programmable_core, processor_class); JitBuildStateSubset subset = { &this->kernel_build_states_[bptti.first], bptti.second diff --git a/tt_metal/impl/device/device.hpp b/tt_metal/impl/device/device.hpp index 01e1f104596..5ee266d37b7 100644 --- a/tt_metal/impl/device/device.hpp +++ b/tt_metal/impl/device/device.hpp @@ -6,6 +6,7 @@ #include #include +#include #include "hostdevcommon/common_values.hpp" #include "impl/dispatch/work_executor.hpp" @@ -334,7 +335,7 @@ class Device { T get_base_allocator_addr(const HalMemType &mem_type) const; template - std::vector> extract_dst_noc_multicast_info(const CoreRangeContainer& ranges, const CoreType core_type); + std::vector> extract_dst_noc_multicast_info(const CoreRangeContainer& ranges, const CoreType core_type); bool dispatch_s_enabled() const; bool distributed_dispatcher() const; @@ -374,9 +375,9 @@ inline T Device::get_base_allocator_addr(const HalMemType &mem_type) const { // TODO: Find a better home for this function template -std::vector> Device::extract_dst_noc_multicast_info(const CoreRangeContainer& ranges, const CoreType core_type) { +std::vector> Device::extract_dst_noc_multicast_info(const CoreRangeContainer& ranges, const CoreType core_type) { // This API extracts all the pairs of noc multicast encodings given a set of core ranges - std::vector> dst_noc_multicast_info; + std::vector> dst_noc_multicast_info; dst_noc_multicast_info.reserve(ranges.size()); for (const CoreRange& core_range : ranges) { CoreCoord physical_start = this->physical_core_from_logical_core(core_range.start_coord, core_type); diff --git a/tt_metal/impl/dispatch/command_queue.cpp b/tt_metal/impl/dispatch/command_queue.cpp index c54df2dddbe..e52670220ff 100644 --- a/tt_metal/impl/dispatch/command_queue.cpp +++ b/tt_metal/impl/dispatch/command_queue.cpp @@ -663,7 +663,7 @@ void EnqueueProgramCommand::assemble_runtime_args_commands(ProgramCommandSequenc .noc_xy_addr = this->device->get_noc_unicast_encoding(this->noc_index, physical_core)}); } } else { - vector> dst_noc_multicast_info = + vector> dst_noc_multicast_info = device->extract_dst_noc_multicast_info>( kernel->logical_coreranges(), core_type); common_sub_cmds.emplace>( diff --git a/tt_metal/impl/dispatch/data_collection.cpp b/tt_metal/impl/dispatch/data_collection.cpp index 516f7a27912..87c055f690c 100644 --- a/tt_metal/impl/dispatch/data_collection.cpp +++ b/tt_metal/impl/dispatch/data_collection.cpp @@ -123,7 +123,7 @@ class DataCollector { private: map> program_id_to_dispatch_data; - map>>> program_id_to_kernel_groups; + map>>> program_id_to_kernel_groups; map program_id_to_call_count; }; @@ -202,7 +202,7 @@ void DataCollector::DumpData() { // Dump kernel ids for each kernel group in this program for (auto &core_type_and_kernel_groups : program_id_to_kernel_groups[program_id]) { CoreType core_type = core_type_and_kernel_groups.first; - vector> &kernel_groups = core_type_and_kernel_groups.second; + vector> &kernel_groups = core_type_and_kernel_groups.second; outfile << fmt::format("\t{} Kernel Groups: {}\n", core_type, kernel_groups.size()); for (auto &ids_and_ranges : kernel_groups) { // Dump kernel ids in this group diff --git a/tt_metal/impl/dispatch/debug_tools.cpp b/tt_metal/impl/dispatch/debug_tools.cpp index 14ec9b706d3..f8c54fa3573 100644 --- a/tt_metal/impl/dispatch/debug_tools.cpp +++ b/tt_metal/impl/dispatch/debug_tools.cpp @@ -23,7 +23,7 @@ void match_device_program_data_with_host_program_data(const char* host_file, con host_dispatch_dump_file.open(host_file); device_dispatch_dump_file.open(device_file); - vector>> host_map; + vector>> host_map; string line; diff --git a/tt_metal/impl/program/program.cpp b/tt_metal/impl/program/program.cpp index d67d0e26ce5..24fd34c42fb 100644 --- a/tt_metal/impl/program/program.cpp +++ b/tt_metal/impl/program/program.cpp @@ -107,8 +107,11 @@ void DisablePersistentKernelCache() { enable_persistent_kernel_cache = false; } std::atomic Program::program_counter = 0; Program::Program() : - id(program_counter++), runtime_id(0), worker_crs_({}), local_circular_buffer_allocation_needed_(false), finalized_(false) { - + id(program_counter++), + runtime_id(0), + worker_crs_(), + local_circular_buffer_allocation_needed_(false), + finalized_(false) { uint32_t programmable_core_count = hal.get_programmable_core_type_count(); for (uint32_t i = 0; i < programmable_core_count; i++) { kernels_.push_back({}); @@ -146,7 +149,7 @@ std::shared_ptr Program::get_kernel(KernelHandle kernel_id) const { return nullptr; } -KernelGroup::KernelGroup() : core_ranges({}) {} +KernelGroup::KernelGroup() : core_ranges(CoreRangeSet()) {} KernelGroup::KernelGroup( const Program &program, @@ -155,8 +158,7 @@ KernelGroup::KernelGroup( bool erisc_is_idle, int last_cb_index, const CoreRangeSet &new_ranges) : - core_ranges({}) { - + core_ranges(CoreRangeSet()) { this->programmable_core_type_index = programmable_core_type_index; this->core_ranges = this->core_ranges.merge(new_ranges); this->kernel_ids = kernel_ids; @@ -664,9 +666,9 @@ void Program::populate_dispatch_data(Device *device) { }; auto extract_dst_noc_unicast_info = - [&device](const std::set &ranges, const CoreType core_type) -> std::vector> { + [&device](const auto &ranges, const CoreType core_type) -> std::vector> { // This API extracts all the pairs of noc multicast encodings given a set of core ranges - vector> dst_noc_unicast_info; + vector> dst_noc_unicast_info; for (const CoreRange &core_range : ranges) { for (auto x = core_range.start_coord.x; x <= core_range.end_coord.x; x++) { for (auto y = core_range.start_coord.y; y <= core_range.end_coord.y; y++) { @@ -686,8 +688,8 @@ void Program::populate_dispatch_data(Device *device) { // TODO: use semaphore.core_type from main if (semaphore.core_type() == CoreType::WORKER) { uint32_t index = hal.get_programmable_core_type_index(HalProgrammableCoreType::TENSIX); - vector> dst_noc_multicast_info = - device->extract_dst_noc_multicast_info>( + vector> dst_noc_multicast_info = + device->extract_dst_noc_multicast_info>( semaphore.core_range_set().ranges(), CoreType::WORKER); transfer_info transfer_info = { .dst_base_addr = semaphore.offset(), @@ -698,7 +700,7 @@ void Program::populate_dispatch_data(Device *device) { } else if (semaphore.core_type() == CoreType::ETH) { // TODO: we only fast dispatch to active eth... uint32_t index = hal.get_programmable_core_type_index(HalProgrammableCoreType::ACTIVE_ETH); - vector> dst_noc_unicast_info = + vector> dst_noc_unicast_info = extract_dst_noc_unicast_info(semaphore.core_range_set().ranges(), CoreType::ETH); transfer_info transfer_info = { .dst_base_addr = semaphore.offset(), @@ -796,8 +798,8 @@ void Program::populate_dispatch_data(Device *device) { for (KernelGroup &kernel_group : this->get_kernel_groups(index)) { // TODO: add a bit in the hal that says if this core type is unicast/multicast if (core_type == CoreType::WORKER) { - std::vector> dst_noc_multicast_info = - device->extract_dst_noc_multicast_info>( + std::vector> dst_noc_multicast_info = + device->extract_dst_noc_multicast_info>( kernel_group.core_ranges.ranges(), core_type); vector kernel_ids; @@ -815,7 +817,7 @@ void Program::populate_dispatch_data(Device *device) { } } else { TT_ASSERT(core_type == CoreType::ETH); - vector> dst_noc_unicast_info = + vector> dst_noc_unicast_info = extract_dst_noc_unicast_info(kernel_group.core_ranges.ranges(), core_type); vector kernel_ids; diff --git a/tt_metal/llrt/tlb_config.cpp b/tt_metal/llrt/tlb_config.cpp index c912d9b2a10..b3deadbc88b 100644 --- a/tt_metal/llrt/tlb_config.cpp +++ b/tt_metal/llrt/tlb_config.cpp @@ -7,6 +7,7 @@ #include "third_party/umd/device/blackhole/blackhole_implementation.h" #include "third_party/umd/device/grayskull/grayskull_implementation.h" #include "third_party/umd/device/wormhole/wormhole_implementation.h" +#include "tt_metal/common/assert.hpp" namespace ll_api { diff --git a/tt_metal/tt_metal.cpp b/tt_metal/tt_metal.cpp index f3276b74a24..e057ae5da3f 100644 --- a/tt_metal/tt_metal.cpp +++ b/tt_metal/tt_metal.cpp @@ -40,10 +40,10 @@ CoreRangeSet GetCoreRangeSet(const std::variant; if constexpr (std::is_same_v) { - return CoreRangeSet({CoreRange(core_spec, core_spec)}); + return CoreRangeSet(CoreRange(core_spec, core_spec)); } else if constexpr (std::is_same_v) { - return CoreRangeSet({core_spec}); + return CoreRangeSet(core_spec); } else if constexpr (std::is_same_v) { return core_spec; @@ -1060,9 +1060,9 @@ uint32_t CreateSemaphore( return std::visit( [&](auto &&c) -> uint32_t { using T = std::decay_t; - CoreRangeSet crs({}); + CoreRangeSet crs; if constexpr (std::is_same_v) { - crs = CoreRangeSet({c}); + crs = CoreRangeSet(c); } else { crs = c; } diff --git a/ttnn/cpp/ttnn/operations/ccl/reduce_scatter/device/host/reduce_scatter_full_worker_grid.cpp b/ttnn/cpp/ttnn/operations/ccl/reduce_scatter/device/host/reduce_scatter_full_worker_grid.cpp index 7c8cbda4539..06cd1494dd2 100644 --- a/ttnn/cpp/ttnn/operations/ccl/reduce_scatter/device/host/reduce_scatter_full_worker_grid.cpp +++ b/ttnn/cpp/ttnn/operations/ccl/reduce_scatter/device/host/reduce_scatter_full_worker_grid.cpp @@ -336,8 +336,10 @@ static std::pair> select_worker_cores_ TT_ASSERT(num_edm_channels % 2 == 0, "For line topologies, we expect a multiple of 2 number of channels for the algorithm and worker kernels to work."); const std::size_t workers_per_direction = num_edm_channels / num_directions_per_line; - auto const& lower_half_of_cores = CoreRangeSet({CoreRange(CoreCoord(0, 0), CoreCoord(workers_per_direction - 1, num_links - 1))}); - auto const& upper_half_of_cores = CoreRangeSet({CoreRange(CoreCoord(workers_per_direction, 0), CoreCoord(num_edm_channels - 1, num_links - 1))}); + auto const& lower_half_of_cores = + CoreRangeSet(CoreRange(CoreCoord(0, 0), CoreCoord(workers_per_direction - 1, num_links - 1))); + auto const& upper_half_of_cores = CoreRangeSet( + CoreRange(CoreCoord(workers_per_direction, 0), CoreCoord(num_edm_channels - 1, num_links - 1))); if (topology_config.ring_index == 0) { log_trace(tt::LogOp, "Start of line, putting CCL send cores in lower half"); return {upper_half_of_cores, lower_half_of_cores}; @@ -348,7 +350,9 @@ static std::pair> select_worker_cores_ return {lower_half_of_cores, upper_half_of_cores}; } else { log_trace(tt::LogOp, "Middle of line - no CCL kernel"); - return {CoreRangeSet({CoreRange(CoreCoord(0, 0), CoreCoord(num_edm_channels - 1, num_links - 1))}), std::nullopt}; + return { + CoreRangeSet(CoreRange(CoreCoord(0, 0), CoreCoord(num_edm_channels - 1, num_links - 1))), + std::nullopt}; } } @@ -376,9 +380,11 @@ static std::pair> select_worker_cores( } case ttnn::ccl::Topology::Ring: - return {CoreRangeSet({CoreRange(CoreCoord(0, 0), CoreCoord(num_edm_channels - 1, num_links - 1))}), std::nullopt}; + return { + CoreRangeSet(CoreRange(CoreCoord(0, 0), CoreCoord(num_edm_channels - 1, num_links - 1))), + std::nullopt}; - default: TT_ASSERT(false, "Unsupported topology"); return {CoreRangeSet({}), std::nullopt}; + default: TT_ASSERT(false, "Unsupported topology"); return {CoreRangeSet(), std::nullopt}; }; } diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp index 2c461b2f580..b32bea449e2 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp @@ -94,7 +94,7 @@ ParallelConfig determine_parallel_config( auto grid_size = device->compute_with_storage_grid_size(); uint32_t max_num_cores = grid_size.x * grid_size.y; uint32_t num_cores_nhw = 0; - CoreRangeSet grid = {{}}; + CoreRangeSet grid; if (shard_layout == TensorMemoryLayout::HEIGHT_SHARDED) { num_cores_nhw = find_closest_largest_divisor(out_nhw_ntiles, max_num_cores); if (num_cores_nhw < grid_size.x && out_nhw_ntiles > grid_size.x) { diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.cpp index 692e015c546..6d68ddecc5c 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.cpp @@ -24,7 +24,7 @@ namespace optimized_conv_op_utils { using namespace tt; using namespace tt::tt_metal; -pair, vector> compute_opt_conv_activation_as_mm_shape(const tt::tt_metal::LegacyShape& conv_activation_shape, ttnn::operations::sliding_window::SlidingWindowConfig sliding_window_config, uint32_t act_block_h_ntiles) { +std::pair, vector> compute_opt_conv_activation_as_mm_shape(const tt::tt_metal::LegacyShape& conv_activation_shape, ttnn::operations::sliding_window::SlidingWindowConfig sliding_window_config, uint32_t act_block_h_ntiles) { uint32_t filter_h = (uint32_t)sliding_window_config.window_hw.first; // filter_h uint32_t filter_w = (uint32_t)sliding_window_config.window_hw.second; // filter_W @@ -190,7 +190,8 @@ std::vector OptimizedConvNew::create_output_tensors(const std::vectorparallelization_config.grid_size.str()); uint32_t num_cores_x = this->parallelization_config.grid_size.x; uint32_t num_cores_y = this->parallelization_config.grid_size.y; - CoreRangeSet shard_grid = CoreRangeSet({{{0, 0}, {num_cores_x - 1, num_cores_y - 1}}}); + CoreRangeSet shard_grid = + CoreRangeSet(CoreRange({0, 0}, {num_cores_x - 1, num_cores_y - 1})); log_debug(tt::LogOp, "Calculated shard_grid: {}", shard_grid.str()); std::array shard_shape = {this->parallelization_config.per_core_out_matrix_height_ntiles * TILE_HEIGHT, this->parallelization_config.per_core_out_matrix_width_ntiles * TILE_WIDTH}; auto shard_spec = ShardSpec{shard_grid, shard_shape, this->memory_config.shard_spec.value().orientation}; diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.hpp b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.hpp index cf938e1da13..a22885832a8 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.hpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.hpp @@ -172,6 +172,6 @@ using namespace tt; using namespace tt::tt_metal; -pair, vector> compute_opt_conv_activation_as_mm_shape(const tt::tt_metal::LegacyShape& conv_activation_shape, ttnn::operations::sliding_window::SlidingWindowConfig sliding_window_config, uint32_t act_block_h_ntiles); +std::pair, vector> compute_opt_conv_activation_as_mm_shape(const tt::tt_metal::LegacyShape& conv_activation_shape, ttnn::operations::sliding_window::SlidingWindowConfig sliding_window_config, uint32_t act_block_h_ntiles); } // optimized_conv_op_utils diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_sharded_program_factory.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_sharded_program_factory.cpp index a8a7b9e4714..d60aaf02115 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_sharded_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_sharded_program_factory.cpp @@ -862,7 +862,7 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_v2_impl( auto bottom_right_core_physical = device->worker_core_from_logical_core(bottom_right_core); CoreRange mcast_sender_cores(top_left_core, top_left_core); // If single core, this kernel doesn't do mcasting - CoreRangeSet mcast_receiver_cores{{}}; + CoreRangeSet mcast_receiver_cores; uint32_t weights_mcast_sender_semaphore_id{}; uint32_t weights_mcast_receiver_semaphore_id{}; uint32_t act_mcast_sender_semaphore_id = 0; @@ -872,10 +872,10 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_v2_impl( // 2D mcast if (transpose_mcast) { mcast_sender_cores = CoreRange(top_left_core, CoreCoord(0, num_cores_y - 1)); - mcast_receiver_cores = {{CoreRange(CoreCoord(1, 0), bottom_right_core)}}; + mcast_receiver_cores = CoreRange(CoreCoord(1, 0), bottom_right_core); } else { mcast_sender_cores = CoreRange(top_left_core, CoreCoord(num_cores_x - 1, 0)); - mcast_receiver_cores = {{CoreRange(CoreCoord(0, 1), bottom_right_core)}}; + mcast_receiver_cores = CoreRange(CoreCoord(0, 1), bottom_right_core); } weights_mcast_sender_semaphore_id = tt_metal::CreateSemaphore(program, all_cores, INVALID); weights_mcast_receiver_semaphore_id = tt_metal::CreateSemaphore(program, all_cores, INVALID); diff --git a/ttnn/cpp/ttnn/operations/data_movement/bcast/device/bcast_device_operation.cpp b/ttnn/cpp/ttnn/operations/data_movement/bcast/device/bcast_device_operation.cpp index 2f890e9bb71..3c38f88ed64 100644 --- a/ttnn/cpp/ttnn/operations/data_movement/bcast/device/bcast_device_operation.cpp +++ b/ttnn/cpp/ttnn/operations/data_movement/bcast/device/bcast_device_operation.cpp @@ -124,7 +124,7 @@ std::vector EltwiseBinaryBroadcast::create_output_tensors(const std::vec } const auto& input_tensor = input_tensors.at(0); if (this->output_mem_config.is_sharded()) { - ShardSpec shard_spec{CoreRangeSet({}), {0, 0}}; + ShardSpec shard_spec{CoreRangeSet(), {0, 0}}; if (input_tensor.memory_config().is_sharded()) { // Derive output shard_spec based on input shard_spec = input_tensor.shard_spec().value(); diff --git a/ttnn/cpp/ttnn/operations/data_movement/bcast/device/multi_core_hw/bcast_op_multi_core_hw.cpp b/ttnn/cpp/ttnn/operations/data_movement/bcast/device/multi_core_hw/bcast_op_multi_core_hw.cpp index aac9e4d8e4c..7b684507703 100644 --- a/ttnn/cpp/ttnn/operations/data_movement/bcast/device/multi_core_hw/bcast_op_multi_core_hw.cpp +++ b/ttnn/cpp/ttnn/operations/data_movement/bcast/device/multi_core_hw/bcast_op_multi_core_hw.cpp @@ -81,7 +81,7 @@ operation::ProgramWithCallbacks bcast_multi_core_hw(const Tensor &a, const Tenso num_tiles_per_core_group_2 = 0; all_cores = shard_spec.value().grid; core_group_1 = all_cores; - core_group_2 = CoreRangeSet({}); + core_group_2 = CoreRangeSet(); } uint32_t num_input_tiles_cb0 = src0_sharded ? num_tiles_per_shard : num_input_tiles; @@ -270,7 +270,7 @@ operation::ProgramWithCallbacks bcast_multi_core_hw(const Tensor &a, const Tenso num_tiles_per_core_group_2 = 0; all_cores = shard_spec.value().grid; core_group_1 = all_cores; - core_group_2 = CoreRangeSet({}); + core_group_2 = CoreRangeSet(); } auto& cached_reader_args = GetRuntimeArgs(program, binary_reader_kernel_id); diff --git a/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/interleaved_to_sharded.cpp b/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/interleaved_to_sharded.cpp index 8d2a175fd6e..346e55d3820 100644 --- a/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/interleaved_to_sharded.cpp +++ b/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/interleaved_to_sharded.cpp @@ -41,7 +41,7 @@ ttnn::Tensor InterleavedToShardedOperation::invoke( bool row_wise = shard_orientation == ShardOrientation::ROW_MAJOR; CoreCoord grid_size; - CoreRangeSet grid_set({}); + CoreRangeSet grid_set; std::visit( [&](const auto &grid) { using GridType = std::decay_t; diff --git a/ttnn/cpp/ttnn/operations/data_movement/sharded_partial/interleaved_to_sharded_partial/interleaved_to_sharded_partial.cpp b/ttnn/cpp/ttnn/operations/data_movement/sharded_partial/interleaved_to_sharded_partial/interleaved_to_sharded_partial.cpp index 5ad85a81e03..297f7872d48 100644 --- a/ttnn/cpp/ttnn/operations/data_movement/sharded_partial/interleaved_to_sharded_partial/interleaved_to_sharded_partial.cpp +++ b/ttnn/cpp/ttnn/operations/data_movement/sharded_partial/interleaved_to_sharded_partial/interleaved_to_sharded_partial.cpp @@ -29,7 +29,7 @@ ttnn::Tensor InterleavedToShardedPartialOperation::invoke( bool row_wise = shard_orientation == ShardOrientation::ROW_MAJOR; CoreCoord grid_size; - CoreRangeSet grid_set({}); + CoreRangeSet grid_set; std::visit( [&](const auto &grid) { using GridType = std::decay_t; diff --git a/ttnn/cpp/ttnn/operations/data_movement/untilize/device/untilize_program_factory.cpp b/ttnn/cpp/ttnn/operations/data_movement/untilize/device/untilize_program_factory.cpp index 1570010fae0..2d9c90c2eb8 100644 --- a/ttnn/cpp/ttnn/operations/data_movement/untilize/device/untilize_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/data_movement/untilize/device/untilize_program_factory.cpp @@ -328,7 +328,7 @@ operation::ProgramWithCallbacks untilize_multi_core( uint32_t num_cores = all_cores.num_cores(); ncores = num_cores; core_range = all_cores; - core_range_cliff = CoreRangeSet({}); + core_range_cliff = CoreRangeSet(); ntiles_per_block = shard_spec.shape[1] / TILE_WIDTH; nblocks_per_core = shard_spec.shape[0] / TILE_HEIGHT; nblocks_per_core_cliff = 0; diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_device_operation.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_device_operation.cpp index 5ff657eaf8c..e42f7e72d70 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_device_operation.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_device_operation.cpp @@ -204,7 +204,7 @@ BinaryDeviceOperation::tensor_return_value_t BinaryDeviceOperation::create_outpu auto program_factory = select_program_factory(operation_attributes, tensor_args); if (std::holds_alternative(program_factory)) { if (operation_attributes.memory_config.is_sharded()) { - ShardSpec shard_spec{CoreRangeSet({}), {0, 0}}; + ShardSpec shard_spec{CoreRangeSet(), {0, 0}}; if (input_tensor_a.memory_config().is_sharded()) { shard_spec = input_tensor_a.shard_spec().value(); } else if (input_tensor_b.memory_config().is_sharded()) { @@ -219,7 +219,7 @@ BinaryDeviceOperation::tensor_return_value_t BinaryDeviceOperation::create_outpu } } else { if (operation_attributes.memory_config.is_sharded()) { - ShardSpec shard_spec{CoreRangeSet({}), {0, 0}}; + ShardSpec shard_spec{CoreRangeSet(), {0, 0}}; if (input_tensor_a.memory_config().is_sharded()) { // Derive output shard_spec based on input shard_spec = input_tensor_a.shard_spec().value(); diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_and_width_multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_and_width_multi_core_program_factory.cpp index b94004b4dd7..a964c226bd6 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_and_width_multi_core_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_and_width_multi_core_program_factory.cpp @@ -111,7 +111,7 @@ BinaryDeviceOperation::BroadcastHeightAndWidthMultiCore::create( num_tiles_per_core_group_2 = 0; all_cores = shard_spec.value().grid; core_group_1 = all_cores; - core_group_2 = CoreRangeSet({}); + core_group_2 = CoreRangeSet(); } uint32_t num_input_tiles_cb0 = src0_sharded ? num_tiles_per_shard : num_input_tiles; @@ -319,7 +319,7 @@ void BinaryDeviceOperation::BroadcastHeightAndWidthMultiCore::override_runtime_a num_tiles_per_core_group_2 = 0; all_cores = shard_spec.value().grid; core_group_1 = all_cores; - core_group_2 = CoreRangeSet({}); + core_group_2 = CoreRangeSet(); } auto& cached_reader_args = GetRuntimeArgs(program, binary_reader_kernel_id); diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp index 40ad1a58097..eb57eb345b9 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp @@ -39,7 +39,7 @@ inline __attribute__((always_inline)) void set_eltwise_binary_runtime_args( auto src_buffer_b = b.buffer(); auto dst_buffer = output.buffer(); - CoreRangeSet all_cores({}), core_group_1({}), core_group_2({}); + CoreRangeSet all_cores, core_group_1, core_group_2; std::optional shard_spec = std::nullopt; std::optional sharded_layout = std::nullopt; @@ -83,7 +83,7 @@ inline __attribute__((always_inline)) void set_eltwise_binary_runtime_args( all_cores = shard_spec.value().grid; num_cores = all_cores.num_cores(); core_group_1 = all_cores; - core_group_2 = CoreRangeSet({}); + core_group_2 = CoreRangeSet(); num_tiles_per_core_group_1 = shard_spec.value().shape[0] * shard_spec.value().shape[1] / TILE_HW; num_tiles_per_core_group_2 = 0; block_size_per_core_group_1 = find_max_block_size(num_tiles_per_core_group_1); diff --git a/ttnn/cpp/ttnn/operations/experimental/paged_cache/device/paged_fill_cache_program_factory.cpp b/ttnn/cpp/ttnn/operations/experimental/paged_cache/device/paged_fill_cache_program_factory.cpp index 7a39357cfb5..bf65a8965b3 100644 --- a/ttnn/cpp/ttnn/operations/experimental/paged_cache/device/paged_fill_cache_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/experimental/paged_cache/device/paged_fill_cache_program_factory.cpp @@ -57,7 +57,7 @@ operation::ProgramWithCallbacks paged_fill_cache_multi_core(const Tensor& cache_ bool row_major; uint32_t num_cores, num_blocks_per_core_group_1, num_blocks_per_core_group_2; - CoreRangeSet all_cores({}), core_group_1({}), core_group_2({}); + CoreRangeSet all_cores, core_group_1, core_group_2; row_major = true; std::tie(num_cores, all_cores, core_group_1, core_group_2, num_blocks_per_core_group_1, num_blocks_per_core_group_2) = tt::tt_metal::split_work_to_cores(compute_with_storage_grid_size, num_blocks_of_work, row_major); diff --git a/ttnn/cpp/ttnn/operations/experimental/transformer/nlp_concat_heads/device/nlp_concat_heads_program_factory.cpp b/ttnn/cpp/ttnn/operations/experimental/transformer/nlp_concat_heads/device/nlp_concat_heads_program_factory.cpp index 8e63c55b980..4f83def6d87 100644 --- a/ttnn/cpp/ttnn/operations/experimental/transformer/nlp_concat_heads/device/nlp_concat_heads_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/experimental/transformer/nlp_concat_heads/device/nlp_concat_heads_program_factory.cpp @@ -45,7 +45,7 @@ operation::ProgramWithCallbacks multi_core_nlp_concat_heads(const Tensor &a, Ten // Block is a unit of work; ie. num of per_tensor_tiles per core uint32_t num_blocks = ashape[0] * ashape[2] / TILE_HEIGHT; uint32_t num_cores = 0, num_blocks_per_core_group_1 = 0, num_blocks_per_core_group_2 = 0; - CoreRangeSet all_cores = CoreRangeSet({}), core_group_1 = CoreRangeSet({}), core_group_2 = CoreRangeSet({}); + CoreRangeSet all_cores = CoreRangeSet(), core_group_1 = CoreRangeSet(), core_group_2 = CoreRangeSet(); bool row_major = false; if (in_sharded) { all_cores = a.shard_spec().value().grid; diff --git a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding/device/rotary_embedding_device_operation.cpp b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding/device/rotary_embedding_device_operation.cpp index 1fa6578efcc..c174d458fdf 100644 --- a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding/device/rotary_embedding_device_operation.cpp +++ b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding/device/rotary_embedding_device_operation.cpp @@ -72,7 +72,7 @@ std::vector RotaryEmbedding::create_output_tensors(const std::vectorcompute_output_shapes(input_tensors)[0]; if (this->output_mem_config.is_sharded()) { - ShardSpec shard_spec{CoreRangeSet({}), {0, 0}}; + ShardSpec shard_spec{CoreRangeSet(), {0, 0}}; if (input_tensor.is_sharded()) { shard_spec = input_tensor.shard_spec().value(); } else { diff --git a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding/device/rotary_embedding_program_factory.cpp b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding/device/rotary_embedding_program_factory.cpp index faeb4eefe39..f9ed0492f56 100644 --- a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding/device/rotary_embedding_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding/device/rotary_embedding_program_factory.cpp @@ -64,7 +64,7 @@ operation::ProgramWithCallbacks rotary_embedding_multi_core( bool row_major; uint32_t num_cores, num_rows_per_core_group_1, num_rows_per_core_group_2; - CoreRangeSet all_cores({}), core_group_1({}), core_group_2({}); + CoreRangeSet all_cores, core_group_1, core_group_2; bool in_sharded = input.shard_spec().has_value(); bool out_sharded = output.shard_spec().has_value(); @@ -77,7 +77,7 @@ operation::ProgramWithCallbacks rotary_embedding_multi_core( all_cores = shard_spec.value().grid; num_cores = all_cores.num_cores(); core_group_1 = all_cores; - core_group_2 = CoreRangeSet({}); + core_group_2 = CoreRangeSet(); num_rows_per_core_group_1 = shard_spec.value().shape[0] / TILE_HEIGHT; num_rows_per_core_group_2 = 0; num_input_tiles = diff --git a/ttnn/cpp/ttnn/operations/kv_cache/device/update_cache_op_multi_core.cpp b/ttnn/cpp/ttnn/operations/kv_cache/device/update_cache_op_multi_core.cpp index 95aaf14358a..4e7719e53ef 100644 --- a/ttnn/cpp/ttnn/operations/kv_cache/device/update_cache_op_multi_core.cpp +++ b/ttnn/cpp/ttnn/operations/kv_cache/device/update_cache_op_multi_core.cpp @@ -63,7 +63,7 @@ operation::ProgramWithCallbacks update_cache_multi_core(const Tensor& cache_tens bool row_major; uint32_t num_cores, num_batched_heads_per_core_group_1, num_batched_heads_per_core_group_2; - CoreRangeSet all_cores({}), core_group_1({}), core_group_2({}); + CoreRangeSet all_cores, core_group_1, core_group_2; std::optional shard_spec = input_tensor.shard_spec(); @@ -73,7 +73,7 @@ operation::ProgramWithCallbacks update_cache_multi_core(const Tensor& cache_tens all_cores = shard_spec.value().grid; num_cores = all_cores.num_cores(); core_group_1 = all_cores; - core_group_2 = CoreRangeSet({}); + core_group_2 = CoreRangeSet(); num_batched_heads_per_core_group_1 = shard_spec.value().shape[0] / TILE_HEIGHT; num_batched_heads_per_core_group_2 = 0; num_input_tiles = shard_spec.value().shape[0] * shard_spec.value().shape[1] / TILE_HW; @@ -324,7 +324,7 @@ operation::ProgramWithCallbacks fill_cache_multi_core(const Tensor& cache_tensor bool row_major; uint32_t num_cores, num_blocks_per_core_group_1, num_blocks_per_core_group_2; - CoreRangeSet all_cores({}), core_group_1({}), core_group_2({}); + CoreRangeSet all_cores, core_group_1, core_group_2; std::optional shard_spec = input_tensor.shard_spec(); @@ -334,7 +334,7 @@ operation::ProgramWithCallbacks fill_cache_multi_core(const Tensor& cache_tensor all_cores = shard_spec.value().grid; num_cores = all_cores.num_cores(); core_group_1 = all_cores; - core_group_2 = CoreRangeSet({}); + core_group_2 = CoreRangeSet(); num_blocks_per_core_group_1 = shard_spec.value().shape[0] / TILE_HEIGHT; num_blocks_per_core_group_2 = 0; num_input_tiles = shard_spec.value().shape[0] * shard_spec.value().shape[1] / TILE_HW; diff --git a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op.cpp b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op.cpp index 8414733611b..764ab25f3be 100644 --- a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op.cpp +++ b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op.cpp @@ -1421,7 +1421,7 @@ std::vector Matmul::create_output_tensors(const std::vector& inp uint32_t num_blocks_x = (N - 1) / per_core_N + 1; uint32_t num_blocks_total = num_blocks_y * num_blocks_x; uint32_t num_cores = num_blocks_x * num_blocks_y; - CoreRangeSet all_cores({}); + CoreRangeSet all_cores; ShardOrientation shard_orientation; if (program_config.transpose_mcast) { all_cores = CoreRangeSet({CoreRange({0, 0}, {num_blocks_y - 1, num_blocks_x - 1})}); diff --git a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_mcast_1d_program_factory.cpp b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_mcast_1d_program_factory.cpp index 23db6670f04..a292a31be18 100644 --- a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_mcast_1d_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_mcast_1d_program_factory.cpp @@ -146,10 +146,10 @@ operation::ProgramWithCallbacks create_program_mcast_in0( in0_mcast_receiver_num_cores, num_cores); // should always be number of cores in receiver grid up to number of active cores - CoreRangeSet in0_mcast_cores_with_work_and_in_receiver_grid({}); - CoreRangeSet in0_mcast_cores_without_work_and_in_receiver_grid({}); - CoreRangeSet in0_mcast_cores_without_work_and_not_in_receiver_grid({}); - CoreRangeSet in0_mcast_receivers({}); + CoreRangeSet in0_mcast_cores_with_work_and_in_receiver_grid; + CoreRangeSet in0_mcast_cores_without_work_and_in_receiver_grid; + CoreRangeSet in0_mcast_cores_without_work_and_not_in_receiver_grid; + CoreRangeSet in0_mcast_receivers; std::vector in0_mcast_noc_x; std::vector in0_mcast_noc_y; if (in0_is_sharded) { @@ -987,7 +987,7 @@ operation::ProgramWithCallbacks create_program_mcast_in1( uint32_t in1_mcast_receiver_num_cores = in1_mcast_receiver_cores_bounding_box.size(); // always mcast to full grid CoreRange in1_mcast_sender(start_core, start_core); - CoreRangeSet in1_mcast_receivers({}); + CoreRangeSet in1_mcast_receivers; if (in1_mcast_receiver_num_cores > 1) { auto receiver_start_core = start_core.x != (compute_with_storage_grid_size.x - 1) ? CoreCoord{start_core.x + 1, start_core.y} diff --git a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_mcast_dram_sharded_program_factory.cpp b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_mcast_dram_sharded_program_factory.cpp index 240f9aff9a0..96d914fbff2 100644 --- a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_mcast_dram_sharded_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_mcast_dram_sharded_program_factory.cpp @@ -459,7 +459,7 @@ operation::ProgramWithCallbacks create_program_dram_sharded( tt_metal::Program program{}; // get the dram readers - CoreRangeSet all_worker_cores = CoreRangeSet{{}}; + CoreRangeSet all_worker_cores; std::vector all_worker_cores_ordered; if (device->arch() == tt::ARCH::WORMHOLE_B0) { @@ -1031,7 +1031,8 @@ operation::ProgramWithCallbacks create_program_dram_sharded( for (uint32_t i = 0; i < all_cores_in_rect_grid_vec.size(); ++i) { auto core = all_cores_in_rect_grid_vec[i]; - if (all_worker_cores.ranges().find(core) == all_worker_cores.ranges().end()) { // not worker + if (std::find(all_worker_cores.ranges().begin(), all_worker_cores.ranges().end(), core) == + all_worker_cores.ranges().end()) { // not worker // in1 reader rt args bool is_worker_core = false; std::vector mm_in1_sender_writer_args; diff --git a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_optimized_program_factory.cpp b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_optimized_program_factory.cpp index 09e649bb368..4a6fd50be09 100644 --- a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_optimized_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_optimized_program_factory.cpp @@ -123,7 +123,7 @@ operation::ProgramWithCallbacks create_program( } uint32_t num_cores = 0, num_blocks_per_core_group_1 = 0, num_blocks_per_core_group_2 = 0; - CoreRangeSet all_cores({}), core_group_1({}), core_group_2({}); + CoreRangeSet all_cores, core_group_1, core_group_2; if (shard_spec.has_value()) { all_cores = shard_spec.value().grid; diff --git a/ttnn/cpp/ttnn/operations/normalization/layernorm/device/multi_core/layernorm_op_multi_core.cpp b/ttnn/cpp/ttnn/operations/normalization/layernorm/device/multi_core/layernorm_op_multi_core.cpp index cf5db0c6d72..392542fdaad 100644 --- a/ttnn/cpp/ttnn/operations/normalization/layernorm/device/multi_core/layernorm_op_multi_core.cpp +++ b/ttnn/cpp/ttnn/operations/normalization/layernorm/device/multi_core/layernorm_op_multi_core.cpp @@ -604,9 +604,9 @@ operation::ProgramWithCallbacks layernorm_multi_core_sharded( CoreCoord start_core = {0, 0}; CoreRangeSet all_cores = shard_spec.grid; CoreRange sender_cores(start_core, start_core); - CoreRangeSet all_to_all_cores({}); - CoreRangeSet all_to_all_workers_except_sender({}); - CoreRangeSet not_all_to_all_workers({}); + CoreRangeSet all_to_all_cores; + CoreRangeSet all_to_all_workers_except_sender; + CoreRangeSet not_all_to_all_workers; uint32_t num_cores_x_mcast, num_cores_y_mcast; if (mcast_1d) { sender_cores = {start_core, start_core}; @@ -707,18 +707,19 @@ operation::ProgramWithCallbacks layernorm_multi_core_sharded( sender_cores = { {(std::size_t) start_core.x, (std::size_t) start_core.y}, {(std::size_t) start_core.x, (std::size_t) start_core.y + num_cores_y - 1}}; - all_to_all_cores = CoreRangeSet({CoreRange( - {(std::size_t) start_core.x, (std::size_t) start_core.y}, - {(std::size_t) start_core.x + num_cores_all_to_all - 1, (std::size_t) start_core.y + num_cores_y - 1})}); + all_to_all_cores = CoreRangeSet(CoreRange( + {(std::size_t)start_core.x, (std::size_t)start_core.y}, + {(std::size_t)start_core.x + num_cores_all_to_all - 1, (std::size_t)start_core.y + num_cores_y - 1})); if (use_mcast && num_cores_all_to_all > 1) { - all_to_all_workers_except_sender = CoreRangeSet({CoreRange( - {(std::size_t) start_core.x + 1, (std::size_t) start_core.y}, - {(std::size_t) start_core.x + num_cores_all_to_all - 1, (std::size_t) start_core.y + num_cores_y - 1})}); + all_to_all_workers_except_sender = CoreRangeSet(CoreRange( + {(std::size_t)start_core.x + 1, (std::size_t)start_core.y}, + {(std::size_t)start_core.x + num_cores_all_to_all - 1, + (std::size_t)start_core.y + num_cores_y - 1})); } if (num_none_all_to_all_workers > 0) { - not_all_to_all_workers = CoreRangeSet({CoreRange( - {(std::size_t) start_core.x + num_cores_all_to_all, (std::size_t) start_core.y}, - {(std::size_t) start_core.x + num_cores_x - 1, (std::size_t) start_core.y + num_cores_y - 1})}); + not_all_to_all_workers = CoreRangeSet(CoreRange( + {(std::size_t)start_core.x + num_cores_all_to_all, (std::size_t)start_core.y}, + {(std::size_t)start_core.x + num_cores_x - 1, (std::size_t)start_core.y + num_cores_y - 1})); } num_cores_x_mcast = num_cores_x; num_cores_y_mcast = 1; @@ -726,18 +727,19 @@ operation::ProgramWithCallbacks layernorm_multi_core_sharded( sender_cores = { {(std::size_t) start_core.x, (std::size_t) start_core.y}, {(std::size_t) start_core.x + num_cores_x - 1, (std::size_t) start_core.y}}; - all_to_all_cores = CoreRangeSet({CoreRange( - {(std::size_t) start_core.x, (std::size_t) start_core.y}, - {(std::size_t) start_core.x + num_cores_x - 1, (std::size_t) start_core.y + num_cores_all_to_all - 1})}); + all_to_all_cores = CoreRangeSet(CoreRange( + {(std::size_t)start_core.x, (std::size_t)start_core.y}, + {(std::size_t)start_core.x + num_cores_x - 1, (std::size_t)start_core.y + num_cores_all_to_all - 1})); if (use_mcast && num_cores_all_to_all > 1) { - all_to_all_workers_except_sender = CoreRangeSet({CoreRange( - {(std::size_t) start_core.x, (std::size_t) start_core.y + 1}, - {(std::size_t) start_core.x + num_cores_x - 1, (std::size_t) start_core.y + num_cores_all_to_all - 1})}); + all_to_all_workers_except_sender = CoreRangeSet(CoreRange( + {(std::size_t)start_core.x, (std::size_t)start_core.y + 1}, + {(std::size_t)start_core.x + num_cores_x - 1, + (std::size_t)start_core.y + num_cores_all_to_all - 1})); } if (num_none_all_to_all_workers > 0) { - not_all_to_all_workers = CoreRangeSet({CoreRange( - {(std::size_t) start_core.x, (std::size_t) start_core.y + num_cores_all_to_all}, - {(std::size_t) start_core.x + num_cores_x - 1, (std::size_t) start_core.y + num_cores_y - 1})}); + not_all_to_all_workers = CoreRangeSet(CoreRange( + {(std::size_t)start_core.x, (std::size_t)start_core.y + num_cores_all_to_all}, + {(std::size_t)start_core.x + num_cores_x - 1, (std::size_t)start_core.y + num_cores_y - 1})); } num_cores_x_mcast = 1; num_cores_y_mcast = num_cores_y; diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool2d_multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool2d_multi_core_program_factory.cpp index 8e035fbe341..7c176dbf42f 100644 --- a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool2d_multi_core_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool2d_multi_core_program_factory.cpp @@ -88,7 +88,7 @@ MaxPool2D::MultiCore::cached_program_t max_pool_2d_multi_core_sharded_with_halo_ auto all_cores = input.shard_spec().value().grid; uint32_t ncores = all_cores.num_cores(); auto core_range = all_cores; - auto core_range_cliff = CoreRangeSet({}); + auto core_range_cliff = CoreRangeSet(); uint32_t in_nhw_per_core = input.shard_spec()->shape[0]; uint32_t in_nhw_per_core_cliff = 0; uint32_t out_nhw_per_core = output.shard_spec()->shape[0]; diff --git a/ttnn/cpp/ttnn/operations/reduction/generic/device/multi_core_h/reduce_op_multi_core_h.cpp b/ttnn/cpp/ttnn/operations/reduction/generic/device/multi_core_h/reduce_op_multi_core_h.cpp index 3ad22e3eef1..1a5fd46bce4 100644 --- a/ttnn/cpp/ttnn/operations/reduction/generic/device/multi_core_h/reduce_op_multi_core_h.cpp +++ b/ttnn/cpp/ttnn/operations/reduction/generic/device/multi_core_h/reduce_op_multi_core_h.cpp @@ -57,7 +57,7 @@ operation::ProgramWithCallbacks reduce_multi_core_h( all_cores = a.shard_spec().value().grid; num_cores = all_cores.num_cores(); core_group_1 = all_cores; - core_group_2 = CoreRangeSet({}); + core_group_2 = CoreRangeSet(); num_cols_per_core_group_1 = NC * (a.shard_spec().value().shape[1] / TILE_WIDTH); num_cols_per_core_group_2 = 0; } diff --git a/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.hpp b/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.hpp index fd58677d454..5a55ebbd0c5 100644 --- a/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.hpp +++ b/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.hpp @@ -13,7 +13,7 @@ namespace ttnn::operations::sliding_window { struct ParallelConfig { - CoreRangeSet grid = {{}}; + CoreRangeSet grid = {}; TensorMemoryLayout shard_scheme; ShardOrientation shard_orientation; @@ -46,7 +46,7 @@ struct SlidingWindowConfig { // parallel configuration uint32_t num_cores_nhw = 1; // num cores along collapsed height nhw uint32_t num_cores_c = 1; // num cores along width c - CoreRangeSet core_range_set = std::set{CoreRange({0, 0}, {0, 0})}; // active cores + CoreRangeSet core_range_set = CoreRangeSet(CoreRange({0, 0}, {0, 0})); // active cores bool snap_to_tile = false; bool is_bilinear = false;