diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/8_dram_adjacent_core_read/test_dram_read.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/8_dram_adjacent_core_read/test_dram_read.cpp
index facfd0ab019..773380ebee9 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/8_dram_adjacent_core_read/test_dram_read.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/8_dram_adjacent_core_read/test_dram_read.cpp
@@ -632,7 +632,7 @@ int main(int argc, char **argv) {
         uint32_t num_cores = num_banks; // number of DRAM banks
         // uint32_t num_banks_all = 12;
 
-        CoreRangeSet all_cores = CoreRangeSet{{}};
+        CoreRangeSet all_cores;
         std::vector<CoreCoord> all_cores_list;
         if (device->arch() == tt::ARCH::WORMHOLE_B0) {
             get_dram_reader_core_coords_wormhole_b0(device, all_cores, all_cores_list);
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/9_dram_adjacent_read_remote_l1_write/test_dram_read_l1_write.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/9_dram_adjacent_read_remote_l1_write/test_dram_read_l1_write.cpp
index a3d62706327..814b28abe02 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/9_dram_adjacent_read_remote_l1_write/test_dram_read_l1_write.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/9_dram_adjacent_read_remote_l1_write/test_dram_read_l1_write.cpp
@@ -815,9 +815,9 @@ int main(int argc, char **argv) {
         uint32_t num_tiles = static_cast<uint32_t>((input_size + single_tile_size - 1) / single_tile_size);
         uint32_t num_cores = num_banks; // number of DRAM banks
 
-        CoreRangeSet all_dram_reader_cores = CoreRangeSet{{}};
+        CoreRangeSet all_dram_reader_cores;
         std::vector<CoreCoord> all_dram_reader_cores_ordered;
-         CoreRangeSet all_l1_receiver_cores = CoreRangeSet{{}};
+        CoreRangeSet all_l1_receiver_cores;
         std::vector<CoreCoord> all_l1_writer_cores_ordered;
         if (device->arch() == tt::ARCH::BLACKHOLE) {
             get_dram_reader_core_coords_blackhole(device, all_dram_reader_cores, all_dram_reader_cores_ordered);
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_bw_and_latency.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_bw_and_latency.cpp
index 1128d5d7809..85b15175215 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_bw_and_latency.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_bw_and_latency.cpp
@@ -263,7 +263,7 @@ int main(int argc, char **argv) {
             {"MCAST_NOC_END_ADDR_Y", std::to_string(mcast_noc_addr_end_y)}
         };
         if (!page_size_as_runtime_arg_g) {
-            defines.insert(pair<string, string>("PAGE_SIZE", std::to_string(page_size_g)));
+            defines.insert(std::pair<string, string>("PAGE_SIZE", std::to_string(page_size_g)));
         }
 
         tt_metal::CircularBufferConfig cb_config = tt_metal::CircularBufferConfig(page_size_g * page_count_g, {{0, tt::DataFormat::Float32}})
diff --git a/tests/tt_metal/tt_metal/test_core_range_set.cpp b/tests/tt_metal/tt_metal/test_core_range_set.cpp
index 0c6cbb21ff0..d40d2516128 100644
--- a/tests/tt_metal/tt_metal/test_core_range_set.cpp
+++ b/tests/tt_metal/tt_metal/test_core_range_set.cpp
@@ -223,7 +223,7 @@ int main(int argc, char **argv) {
         tt_metal::Program program = tt_metal::CreateProgram();
         CoreRange core_range_one({0, 0}, {1, 1});
         CoreRange core_range_two({2, 2}, {3, 3});
-        CoreRangeSet core_ranges = CoreRangeSet({core_range_one, core_range_two});
+        CoreRangeSet core_ranges = CoreRangeSet(std::vector{core_range_one, core_range_two});
 
         pass &= test_program_specified_with_core_range_set(device, program, core_ranges);
 
diff --git a/tests/tt_metal/tt_metal/test_multi_core_kernel.cpp b/tests/tt_metal/tt_metal/test_multi_core_kernel.cpp
index 601ce80f696..d8b5c73f90c 100644
--- a/tests/tt_metal/tt_metal/test_multi_core_kernel.cpp
+++ b/tests/tt_metal/tt_metal/test_multi_core_kernel.cpp
@@ -251,7 +251,7 @@ bool test_multi_core_kernel_unique_runtime_args(tt_metal::Device *device) {
     CoreRange core_group({0, 1}, {1, 1});
     CoreRange single_core({1, 0}, {1, 0});
     CoreRange all_cores(start_core, end_core);
-    CoreRangeSet core_blocks = CoreRangeSet({start_core_range, single_core, core_group});
+    CoreRangeSet core_blocks = CoreRangeSet(std::vector{start_core_range, single_core, core_group});
 
     uint32_t single_tile_size = 2 * 1024;
     int32_t num_tiles = 2048;
diff --git a/tests/tt_metal/tt_metal/unit_tests/basic/initialize_semaphores.cpp b/tests/tt_metal/tt_metal/unit_tests/basic/initialize_semaphores.cpp
index 0865725e13f..d43254f7c37 100644
--- a/tests/tt_metal/tt_metal/unit_tests/basic/initialize_semaphores.cpp
+++ b/tests/tt_metal/tt_metal/unit_tests/basic/initialize_semaphores.cpp
@@ -132,7 +132,7 @@ TEST_F(DeviceFixture, CreateMultipleSemaphoresOnSameCore) {
 
     CoreRange core_range({1, 0}, {3, 0});
     CoreRangeSet core_range_set({core_range});
-    CoreRangeSet core_range_set2 = core_range_set.merge({core1});
+    CoreRangeSet core_range_set2 = core_range_set.merge(std::set<CoreRange>{core1});
     std::set<CoreRange> set_of_cores({CoreRange({2,0}, {2,0}), CoreRange({3,0}, {3,0}), CoreRange({5,0}, {5,0})});
     CoreRangeSet core_range_set3(set_of_cores);
     CoreRangeSet core_range_set4({CoreRange({5,0}, {6,0})});
diff --git a/tests/tt_metal/tt_metal/unit_tests/basic/runtime_args.cpp b/tests/tt_metal/tt_metal/unit_tests/basic/runtime_args.cpp
index ec54d27d5d6..520d04986d2 100644
--- a/tests/tt_metal/tt_metal/unit_tests/basic/runtime_args.cpp
+++ b/tests/tt_metal/tt_metal/unit_tests/basic/runtime_args.cpp
@@ -173,7 +173,7 @@ TEST_F(DeviceFixture, LegallyModifyRTArgsDataMovement) {
         // First run the program with the initial runtime args
         CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1));
         CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5));
-        CoreRangeSet core_range_set({first_core_range, second_core_range});
+        CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range});
         auto program = unit_tests::runtime_args::initialize_program_data_movement_rta(this->devices_.at(id), core_range_set, 2);
         ASSERT_TRUE(program.num_kernels() == 1);
         std::vector<uint32_t> initial_runtime_args = {101, 202};
@@ -219,7 +219,7 @@ TEST_F(DeviceFixture, LegallyModifyRTArgsCompute) {
         // First run the program with the initial runtime args
         CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1));
         CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5));
-        CoreRangeSet core_range_set({first_core_range, second_core_range});
+        CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range});
         std::vector<uint32_t> initial_runtime_args = {101, 202};
         std::vector<uint32_t> common_runtime_args = {11, 22, 33, 44};
         auto program = unit_tests::runtime_args::initialize_program_compute(this->devices_.at(id), core_range_set, initial_runtime_args.size(), common_runtime_args.size());
@@ -249,7 +249,7 @@ TEST_F(DeviceFixture, SetRuntimeArgsSubsetOfCoresCompute) {
         // First run the program with the initial runtime args
         CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1));
         CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5));
-        CoreRangeSet core_range_set({first_core_range, second_core_range});
+        CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range});
         std::vector<uint32_t> initial_runtime_args = {101, 202};
         std::vector<uint32_t> common_runtime_args = {11, 22, 33, 44};
 
@@ -277,7 +277,7 @@ TEST_F(DeviceFixture, SetRuntimeArgsUniqueValuesCompute) {
         // First run the program with the initial runtime args
         CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1));
         CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5));
-        CoreRangeSet core_range_set({first_core_range, second_core_range});
+        CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range});
         std::vector<uint32_t> common_runtime_args = {11, 22, 33, 44};
         auto program = unit_tests::runtime_args::initialize_program_compute(this->devices_.at(id), core_range_set, 2, common_runtime_args.size());
 
@@ -311,7 +311,7 @@ TEST_F(DeviceFixture, SetRuntimeArgsVaryingLengthPerCore) {
         // First run the program with the initial runtime args
         CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1));
         CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5));
-        CoreRangeSet core_range_set({first_core_range, second_core_range});
+        CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range});
         std::vector<uint32_t> common_runtime_args = {11, 22, 33, 44};
 
         // Figure out max number of unique runtime args across all cores, so kernel
@@ -359,7 +359,7 @@ TEST_F(DeviceFixture, SetRuntimeArgsVaryingLengthPerCore) {
 TEST_F(DeviceFixture, IllegalTooManyRuntimeArgs) {
     for (unsigned int id = 0; id < num_devices_; id++) {
         CoreRange first_core_range(CoreCoord(1, 1), CoreCoord(2, 2));
-        CoreRangeSet core_range_set({first_core_range});
+        CoreRangeSet core_range_set(first_core_range);
         auto program = unit_tests::runtime_args::initialize_program_compute(this->devices_.at(id), core_range_set, 0, 0); // Kernel isn't run here.
 
         // Set 100 unique args, then try to set 300 common args and fail.
@@ -381,7 +381,7 @@ TEST_F(DeviceFixture, IllegallyModifyRTArgs) {
         // First run the program with the initial runtime args
         CoreRange first_core_range(CoreCoord(0, 0), CoreCoord(1, 1));
         CoreRange second_core_range(CoreCoord(3, 3), CoreCoord(5, 5));
-        CoreRangeSet core_range_set({first_core_range, second_core_range});
+        CoreRangeSet core_range_set(std::vector{first_core_range, second_core_range});
         auto program = unit_tests::runtime_args::initialize_program_data_movement_rta(this->devices_.at(id), core_range_set, 2);
         ASSERT_TRUE(program.num_kernels() == 1);
         std::vector<uint32_t> initial_runtime_args = {101, 202};
diff --git a/tests/tt_metal/tt_metal/unit_tests/compute/test_sfpu_compute.cpp b/tests/tt_metal/tt_metal/unit_tests/compute/test_sfpu_compute.cpp
index 532276b9e39..56a9dbc37e0 100644
--- a/tests/tt_metal/tt_metal/unit_tests/compute/test_sfpu_compute.cpp
+++ b/tests/tt_metal/tt_metal/unit_tests/compute/test_sfpu_compute.cpp
@@ -105,7 +105,7 @@ struct SfpuConfig {
     size_t tile_byte_size = 0;
     tt::DataFormat l1_input_data_format = tt::DataFormat::Invalid;
     tt::DataFormat l1_output_data_format = tt::DataFormat::Invalid;
-    CoreRangeSet cores = {{}};
+    CoreRangeSet cores = CoreRangeSet();
     std::string sfpu_op = "";
     bool approx_mode = true;
 };
@@ -398,7 +398,7 @@ TEST_F(DeviceFixture, DISABLED_AllCoreSingleTileSfpuApproxCompute) {
         .tile_byte_size = 2 * 32 * 32,
         .l1_input_data_format = tt::DataFormat::Float16_b,
         .l1_output_data_format = tt::DataFormat::Float16_b,
-        .cores = {{}},
+        .cores = CoreRangeSet(),
         .approx_mode = true};
 
     auto arch = this->arch_;
@@ -437,7 +437,7 @@ TEST_F(DeviceFixture, DISABLED_AllCoreMultiTileSfpuApproxCompute) {
         .tile_byte_size = 2 * 32 * 32,
         .l1_input_data_format = tt::DataFormat::Float16_b,
         .l1_output_data_format = tt::DataFormat::Float16_b,
-        .cores = {{}},
+        .cores = CoreRangeSet(),
         .approx_mode = true};
 
     auto arch = this->arch_;
diff --git a/tests/tt_metal/tt_metal/unit_tests/core_coord/test_CoreRangeSet_construct.cpp b/tests/tt_metal/tt_metal/unit_tests/core_coord/test_CoreRangeSet_construct.cpp
index c1ba749cab4..ffd63eb3842 100644
--- a/tests/tt_metal/tt_metal/unit_tests/core_coord/test_CoreRangeSet_construct.cpp
+++ b/tests/tt_metal/tt_metal/unit_tests/core_coord/test_CoreRangeSet_construct.cpp
@@ -10,18 +10,18 @@ namespace basic_tests::CoreRangeSet{
 
 TEST_F(CoreCoordHarness, TestCoreRangeSetValidConstruct)
 {
-    EXPECT_NO_THROW ( ::CoreRangeSet({this->sc1, this->cr2}));
-    EXPECT_NO_THROW ( ::CoreRangeSet({this->cr1, this->cr2}) );
+    EXPECT_NO_THROW(::CoreRangeSet(std::vector{this->sc1, this->cr2}));
+    EXPECT_NO_THROW(::CoreRangeSet(std::vector{this->cr1, this->cr2}));
 
-    ::CoreRangeSet valid_ranges = ::CoreRangeSet({this->cr1, this->cr2});
+    ::CoreRangeSet valid_ranges = ::CoreRangeSet(std::vector{this->cr1, this->cr2});
     EXPECT_EQ(valid_ranges.ranges().size(), 2);
 }
 
 TEST_F(CoreCoordHarness, TestCoreRangeSetInvalidConstruct)
 {
     ::CoreRange overlapping_range({1, 2}, {3, 3});
-    EXPECT_ANY_THROW( ::CoreRangeSet({this->cr1, this->cr2, overlapping_range}) );
-    EXPECT_ANY_THROW( ::CoreRangeSet({this->sc1, this->cr1}) );
+    EXPECT_ANY_THROW(::CoreRangeSet(std::vector{this->cr1, this->cr2, overlapping_range}));
+    EXPECT_ANY_THROW(::CoreRangeSet(std::vector{this->sc1, this->cr1}));
 }
 
 
diff --git a/tests/tt_metal/tt_metal/unit_tests/core_coord/test_CoreRangeSet_merge.cpp b/tests/tt_metal/tt_metal/unit_tests/core_coord/test_CoreRangeSet_merge.cpp
index 32c0092741f..36822e744f3 100644
--- a/tests/tt_metal/tt_metal/unit_tests/core_coord/test_CoreRangeSet_merge.cpp
+++ b/tests/tt_metal/tt_metal/unit_tests/core_coord/test_CoreRangeSet_merge.cpp
@@ -12,18 +12,19 @@ namespace basic_tests::CoreRangeSet{
 
 TEST_F(CoreCoordHarness, TestCoreRangeSetMergeNoSolution)
 {
-    EXPECT_EQ ( ::CoreRangeSet({sc1}).merge({sc3}).ranges() , std::set<::CoreRange>( {sc1,sc3}) );
-    EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({cr2}).ranges() , std::set<::CoreRange>( {cr1,cr2}) );
-    EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({cr1,cr2}).ranges() , std::set<::CoreRange>( {cr1,cr2}) );
-    EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({cr2}).merge({cr3}).ranges() , std::set<::CoreRange>( {cr1,cr2,cr3}) );
+    EXPECT_EQ(::CoreRangeSet(sc1).merge(std::set{sc3}).ranges(), std::set<::CoreRange>({sc1, sc3}));
+    EXPECT_EQ(::CoreRangeSet(cr1).merge(std::set{cr2}).ranges(), std::set<::CoreRange>({cr1, cr2}));
+    EXPECT_EQ(::CoreRangeSet(cr1).merge(std::set{cr1, cr2}).ranges(), std::set<::CoreRange>({cr1, cr2}));
+    EXPECT_EQ(
+        ::CoreRangeSet(cr1).merge(std::set{cr2}).merge(std::set{cr3}).ranges(), std::set<::CoreRange>({cr1, cr2, cr3}));
 }
 
 TEST_F(CoreCoordHarness, TestCoreRangeSetMergeCoreCoord)
 {
-    ::CoreRangeSet empty_crs({});
-    EXPECT_EQ ( empty_crs.merge({this->sc1}).ranges().size(), 1);
-    EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({sc3, sc4}).ranges() , std::set<::CoreRange>( {cr16}) );
-    EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({sc3}).merge({sc4}).ranges() , std::set<::CoreRange>( {cr16}) );
+    ::CoreRangeSet empty_crs;
+    EXPECT_EQ(empty_crs.merge(std::set{this->sc1}).ranges().size(), 1);
+    EXPECT_EQ(::CoreRangeSet(cr1).merge(std::set{sc3, sc4}).ranges(), std::set<::CoreRange>({cr16}));
+    EXPECT_EQ(::CoreRangeSet(cr1).merge(std::set{sc3}).merge(std::set{sc4}).ranges(), std::set<::CoreRange>({cr16}));
     CoreRange rect ( {0,0}, {4,2});
     std::set<CoreRange> rect_pts;
     for (unsigned y = rect.start_coord.y; y <= rect.end_coord.y; y++){
@@ -38,18 +39,22 @@ TEST_F(CoreCoordHarness, TestCoreRangeSetMergeCoreCoord)
     EXPECT_EQ ( empty_crs.merge(rect_pts).ranges(), std::set<::CoreRange>( {rect, CoreRange( {2,3}, {3,5} ) } ));
 
     // "H", sub-optimal currently, should be reduced down to 3 CRs instead of 5
-    EXPECT_EQ ( empty_crs.merge( { CoreRange { {0,0}, {1,5} }, CoreRange { {3,0}, {4,5}}, CoreRange { {0,2} , {4,3} }  } ).ranges(),
+    EXPECT_EQ ( empty_crs.merge( std::vector{ CoreRange { {0,0}, {1,5} }, CoreRange { {3,0}, {4,5}}, CoreRange { {0,2} , {4,3} }  } ).ranges(),
                 std::set<::CoreRange>( { CoreRange { {0,0}, {1,1} }, CoreRange { {0,2}, {4,3}}, CoreRange{ {0,4}, {1,5}},
                                        CoreRange { {3,0}, {4,1} }, CoreRange{ {3,4}, {4,5} } } ));
 }
 
 TEST_F(CoreCoordHarness, TestCoreRangeSetMergeCoreRange)
 {
-    EXPECT_EQ ( ::CoreRangeSet({cr1}).merge({cr1}).ranges() , std::set<::CoreRange>( {cr1}) );
-    EXPECT_EQ ( ::CoreRangeSet({cr7}).merge({cr6}).merge({cr4}).ranges() , std::set<::CoreRange>( {cr8} ) );
-    EXPECT_EQ ( ::CoreRangeSet({cr8}).merge({cr7}).merge({cr6}).merge({cr4}).ranges() , std::set<::CoreRange>( {cr8} ) );
-    EXPECT_EQ ( ::CoreRangeSet({cr1, cr2, cr3}).merge({cr4}).ranges() , std::set<::CoreRange>( {cr4}) );
-    EXPECT_EQ ( ::CoreRangeSet({cr1, cr2}).merge({cr4}).merge({cr6}).ranges() , std::set<::CoreRange>( {cr6}) );
+    EXPECT_EQ(::CoreRangeSet(cr1).merge(std::set{cr1}).ranges(), std::set<::CoreRange>({cr1}));
+    EXPECT_EQ(::CoreRangeSet(cr7).merge(std::set{cr6}).merge(std::set{cr4}).ranges(), std::set<::CoreRange>({cr8}));
+    EXPECT_EQ(
+        ::CoreRangeSet(cr8).merge(std::set{cr7}).merge(std::set{cr6}).merge(std::set{cr4}).ranges(),
+        std::set<::CoreRange>({cr8}));
+    EXPECT_EQ(::CoreRangeSet(std::vector{cr1, cr2, cr3}).merge(std::set{cr4}).ranges(), std::set<::CoreRange>({cr4}));
+    EXPECT_EQ(
+        ::CoreRangeSet(std::vector{cr1, cr2}).merge(std::set{cr4}).merge(std::set{cr6}).ranges(),
+        std::set<::CoreRange>({cr6}));
 }
 
 }
diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/command_queue_test_utils.hpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/command_queue_test_utils.hpp
index 2223519bc63..3b1a12c88ba 100644
--- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/command_queue_test_utils.hpp
+++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/command_queue_test_utils.hpp
@@ -12,7 +12,7 @@ struct TestBufferConfig {
     tt::tt_metal::BufferType buftype;
 };
 
-inline pair<tt::tt_metal::Buffer, std::vector<uint32_t>> EnqueueWriteBuffer_prior_to_wrap(tt::tt_metal::Device* device, tt::tt_metal::CommandQueue& cq, const TestBufferConfig& config) {
+inline std::pair<tt::tt_metal::Buffer, std::vector<uint32_t>> EnqueueWriteBuffer_prior_to_wrap(tt::tt_metal::Device* device, tt::tt_metal::CommandQueue& cq, const TestBufferConfig& config) {
     // This function just enqueues a buffer (which should be large in the config)
     // write as a precursor to testing the wrap mechanism
     size_t buf_size = config.num_pages * config.page_size;
diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp
index 3194e16e35c..461f07c2825 100644
--- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp
+++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/command_queue/test_EnqueueProgram.cpp
@@ -980,7 +980,7 @@ TEST_F(CommandQueueSingleCardFixture, TestAllCbConfigsCorrectlySentMultipleCoreR
         CoreCoord worker_grid_size = device->compute_with_storage_grid_size();
         CoreRange cr1({worker_grid_size.x - 2, worker_grid_size.y - 2}, {worker_grid_size.x - 1, worker_grid_size.y - 1});
 
-        CoreRangeSet core_ranges({cr0, cr1});
+        CoreRangeSet core_ranges(std::vector{cr0, cr1});
 
         DummyProgramMultiCBConfig config = {.cr_set = core_ranges, .cb_config_vector = cb_config_vector};
 
@@ -1001,7 +1001,7 @@ TEST_F(CommandQueueSingleCardFixture, TestAllCbConfigsCorrectlySentUpdateSizeMul
         CoreCoord worker_grid_size = device->compute_with_storage_grid_size();
         CoreRange cr1({worker_grid_size.x - 2, worker_grid_size.y - 2}, {worker_grid_size.x - 1, worker_grid_size.y - 1});
 
-        CoreRangeSet core_ranges({cr0, cr1});
+        CoreRangeSet core_ranges(std::vector{cr0, cr1});
 
         DummyProgramMultiCBConfig config = {.cr_set = core_ranges, .cb_config_vector = cb_config_vector};
 
@@ -1023,7 +1023,7 @@ TEST_F(CommandQueueSingleCardFixture, TestMultiCbConfigsCorrectlySentUpdateSizeM
         CoreCoord worker_grid_size = device->compute_with_storage_grid_size();
         CoreRange cr1({worker_grid_size.x - 2, worker_grid_size.y - 2}, {worker_grid_size.x - 1, worker_grid_size.y - 1});
 
-        CoreRangeSet core_ranges({cr0, cr1});
+        CoreRangeSet core_ranges(std::vector{cr0, cr1});
 
         DummyProgramMultiCBConfig config = {.cr_set = core_ranges, .cb_config_vector = cb_config_vector};
 
@@ -1036,7 +1036,7 @@ TEST_F(CommandQueueSingleCardFixture, TestAllSemConfigsCorrectlySentMultiCore) {
         CoreCoord worker_grid_size = device->compute_with_storage_grid_size();
 
         CoreRange cr({0, 0}, {worker_grid_size.x - 1, worker_grid_size.y - 1});
-        CoreRangeSet cr_set({cr});
+        CoreRangeSet cr_set(cr);
 
         DummyProgramConfig config = {.cr_set = cr_set, .num_sems = NUM_SEMAPHORES};
 
@@ -1052,7 +1052,7 @@ TEST_F(CommandQueueSingleCardFixture, TestAllSemaphoreConfigsCorrectlySentMultip
         CoreCoord worker_grid_size = device->compute_with_storage_grid_size();
         CoreRange second_cr({worker_grid_size.x - 2, worker_grid_size.y - 2}, {worker_grid_size.x - 1, worker_grid_size.y - 1});
 
-        CoreRangeSet cr_set({first_cr, second_cr});
+        CoreRangeSet cr_set(std::vector{first_cr, second_cr});
 
         Program program;
         DummyProgramConfig config = {.cr_set = cr_set, .num_sems = NUM_SEMAPHORES};
@@ -1089,7 +1089,7 @@ TEST_F(CommandQueueSingleCardFixture, TestAllRuntimeArgsCorrectlySentMultiCore)
         CoreCoord worker_grid_size = device->compute_with_storage_grid_size();
 
         CoreRange cr({0, 0}, {worker_grid_size.x - 1, worker_grid_size.y - 1});
-        CoreRangeSet cr_set({cr});
+        CoreRangeSet cr_set(cr);
 
         DummyProgramConfig dummy_program_config = {.cr_set = cr_set};
         EXPECT_TRUE(local_test_functions::test_dummy_EnqueueProgram_with_runtime_args(device, device->command_queue(), dummy_program_config, 13, 17, 19, 1));
@@ -1101,7 +1101,7 @@ TEST_F(CommandQueueSingleCardFixture, TestAllRuntimeArgsCorrectlySentMultiCore_2
         CoreCoord worker_grid_size = device->compute_with_storage_grid_size();
 
         CoreRange cr({0, 0}, {worker_grid_size.x - 1, worker_grid_size.y - 1});
-        CoreRangeSet cr_set({cr});
+        CoreRangeSet cr_set(cr);
 
         DummyProgramConfig dummy_program_config = {.cr_set = cr_set};
         EXPECT_TRUE(local_test_functions::test_dummy_EnqueueProgram_with_runtime_args(device, device->command_queue(), dummy_program_config, 255, 255, 255, 1));
@@ -1114,7 +1114,7 @@ TEST_F(CommandQueueSingleCardFixture, TestSendRuntimeArgsMultiCoreRange) {
 
         CoreRange cr0({0, 0}, {worker_grid_size.x - 1, 3});
         CoreRange cr1({0, 4}, {worker_grid_size.x - 1, worker_grid_size.y - 1});
-        CoreRangeSet cr_set({cr0, cr1});
+        CoreRangeSet cr_set(std::vector{cr0, cr1});
 
         DummyProgramConfig dummy_program_config = {.cr_set = cr_set};
         EXPECT_TRUE(local_test_functions::test_dummy_EnqueueProgram_with_runtime_args_multi_crs(
@@ -1129,7 +1129,7 @@ TEST_F(CommandQueueSingleCardFixture, TestSendRuntimeArgsMultiNonOverlappingCore
 
         CoreRange cr0({0, 0}, {worker_grid_size.x - 1, 3});
         CoreRange cr1({0, 5}, {worker_grid_size.x - 1, worker_grid_size.y - 1});
-        CoreRangeSet cr_set({cr0, cr1});
+        CoreRangeSet cr_set(std::vector{cr0, cr1});
 
         DummyProgramConfig dummy_program_config = {.cr_set = cr_set};
         EXPECT_TRUE(local_test_functions::test_dummy_EnqueueProgram_with_runtime_args_multi_crs(
@@ -1143,7 +1143,7 @@ TEST_F(CommandQueueSingleCardFixture, TestUpdateRuntimeArgsMultiCoreRange) {
 
         CoreRange cr0({0, 0}, {worker_grid_size.x - 1, 3});
         CoreRange cr1({0, 5}, {worker_grid_size.x - 1, worker_grid_size.y - 1});
-        CoreRangeSet cr_set({cr0, cr1});
+        CoreRangeSet cr_set(std::vector{cr0, cr1});
 
         DummyProgramConfig dummy_program_config = {.cr_set = cr_set};
         EXPECT_TRUE(local_test_functions::test_dummy_EnqueueProgram_with_runtime_args_multi_crs(
@@ -1155,7 +1155,7 @@ TEST_F(CommandQueueSingleCardFixture, TestUpdateRuntimeArgsMultiCoreRange) {
 TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreCompute) {
     CoreRange cr0({1, 1}, {2, 2});
     CoreRange cr1({3, 3}, {4, 4});
-    CoreRangeSet cr_set({cr0, cr1});
+    CoreRangeSet cr_set(std::vector{cr0, cr1});
     DummyProgramConfig dummy_program_config = {.cr_set = cr_set};
     for (Device *device : devices_) {
         EXPECT_TRUE(local_test_functions::test_increment_runtime_args_sanity(device, dummy_program_config, 16, 16, tt::RISCV::COMPUTE));
@@ -1166,7 +1166,7 @@ TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreCompute
 TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreCompute_255_UniqueArgs) {
     CoreRange cr0({1, 1}, {2, 2});
     CoreRange cr1({3, 3}, {4, 4});
-    CoreRangeSet cr_set({cr0, cr1});
+    CoreRangeSet cr_set(std::vector{cr0, cr1});
     DummyProgramConfig dummy_program_config = {.cr_set = cr_set};
     for (Device *device : devices_) {
         EXPECT_TRUE(local_test_functions::test_increment_runtime_args_sanity(device, dummy_program_config, 255, 0, tt::RISCV::COMPUTE));
@@ -1177,7 +1177,7 @@ TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreCompute
 TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreCompute_255_CommonArgs) {
     CoreRange cr0({1, 1}, {2, 2});
     CoreRange cr1({3, 3}, {4, 4});
-    CoreRangeSet cr_set({cr0, cr1});
+    CoreRangeSet cr_set(std::vector{cr0, cr1});
     DummyProgramConfig dummy_program_config = {.cr_set = cr_set};
     for (Device *device : devices_) {
         EXPECT_TRUE(local_test_functions::test_increment_runtime_args_sanity(device, dummy_program_config, 0, 255, tt::RISCV::COMPUTE));
@@ -1188,7 +1188,7 @@ TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreCompute
 TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreDataMovementBrisc) {
     CoreRange cr0({1, 1}, {2, 2});
     CoreRange cr1({3, 3}, {4, 4});
-    CoreRangeSet cr_set({cr0, cr1});
+    CoreRangeSet cr_set(std::vector{cr0, cr1});
     DummyProgramConfig dummy_program_config = {.cr_set = cr_set};
     for (Device *device : devices_) {
         EXPECT_TRUE(local_test_functions::test_increment_runtime_args_sanity(device, dummy_program_config, 16, 16, tt::RISCV::BRISC));
@@ -1199,7 +1199,7 @@ TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreDataMov
 TEST_F(CommandQueueSingleCardFixture, IncrementRuntimeArgsSanityMultiCoreDataMovementNcrisc) {
     CoreRange cr0({1, 1}, {2, 2});
     CoreRange cr1({3, 3}, {4, 4});
-    CoreRangeSet cr_set({cr0, cr1});
+    CoreRangeSet cr_set(std::vector{cr0, cr1});
     DummyProgramConfig dummy_program_config = {.cr_set = cr_set};
     for (Device *device : devices_) {
         EXPECT_TRUE(local_test_functions::test_increment_runtime_args_sanity(device, dummy_program_config, 16, 16, tt::RISCV::NCRISC));
@@ -1219,7 +1219,7 @@ TEST_F(CommandQueueSingleCardFixture, DISABLED_TestFillDispatchCoreBuffer) {
         CoreCoord worker_grid_size = device->compute_with_storage_grid_size();
 
         CoreRange cr({0, 0}, {worker_grid_size.x - 1, worker_grid_size.y - 1});
-        CoreRangeSet cr_set({cr});
+        CoreRangeSet cr_set(cr);
 
         DummyProgramConfig dummy_program_config = {.cr_set = cr_set};
 
@@ -1240,7 +1240,7 @@ TEST_F(CommandQueueFixture, TestRandomizedProgram) {
 
     CoreCoord worker_grid_size = this->device_->compute_with_storage_grid_size();
     CoreRange cr({0, 0}, {worker_grid_size.x - 1, worker_grid_size.y - 1});
-    CoreRangeSet cr_set({cr});
+    CoreRangeSet cr_set(cr);
 
     log_info(tt::LogTest, "Starting compile of {} programs now.", NUM_PROGRAMS);
 
diff --git a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/compute/sfpu/sfpu_compute.cpp b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/compute/sfpu/sfpu_compute.cpp
index 1e6e8c54362..8cb072266de 100644
--- a/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/compute/sfpu/sfpu_compute.cpp
+++ b/tests/tt_metal/tt_metal/unit_tests_fast_dispatch/compute/sfpu/sfpu_compute.cpp
@@ -104,7 +104,7 @@ struct SfpuConfig {
     size_t tile_byte_size = 0;
     tt::DataFormat l1_input_data_format = tt::DataFormat::Invalid;
     tt::DataFormat l1_output_data_format = tt::DataFormat::Invalid;
-    CoreRangeSet cores = {{}};
+    CoreRangeSet cores = CoreRangeSet();
     std::string sfpu_op = "";
     bool approx_mode = true;
 };
diff --git a/tt_metal/common/CMakeLists.txt b/tt_metal/common/CMakeLists.txt
index 75433fda8c8..294d5700810 100644
--- a/tt_metal/common/CMakeLists.txt
+++ b/tt_metal/common/CMakeLists.txt
@@ -1,5 +1,6 @@
 
 set(COMMON_SRCS
+    ${CMAKE_CURRENT_SOURCE_DIR}/core_coord.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/core_descriptor.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/metal_soc_descriptor.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/tt_backend_api_types.cpp
diff --git a/tt_metal/common/core_coord.cpp b/tt_metal/common/core_coord.cpp
new file mode 100644
index 00000000000..3815f2ca7e1
--- /dev/null
+++ b/tt_metal/common/core_coord.cpp
@@ -0,0 +1,525 @@
+// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "tt_metal/common/core_coord.h"
+
+#include <algorithm>
+#include <limits>
+#include <mutex>
+#include <optional>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "third_party/umd/device/tt_xy_pair.h"
+#include "tt_metal/common/assert.hpp"
+#include "tt_metal/third_party/tracy/public/tracy/Tracy.hpp"
+#include "tt_metal/tt_stl/reflection.hpp"
+#include "tt_metal/tt_stl/span.hpp"
+
+auto fmt::formatter<CoreCoord>::format(const CoreCoord &core_coord, format_context &ctx) const
+    -> format_context::iterator {
+    std::stringstream ss;
+    ss << core_coord.str();
+    return fmt::format_to(ctx.out(), "{}", ss.str());
+}
+
+std::string RelativeCoreCoord::str() const { return "(x=" + std::to_string(x) + ",y=" + std::to_string(y) + ")"; }
+
+CoreCoord get_core_coord_from_relative(const RelativeCoreCoord &in, const CoreCoord &grid_size) {
+    CoreCoord coord;
+    coord.x = in.x + ((in.x < 0) ? grid_size.x : 0);
+    coord.y = in.y + ((in.y < 0) ? grid_size.y : 0);
+    return coord;
+}
+
+CoreRange::CoreRange(const CoreCoord &point) : start_coord(point), end_coord(point) {}
+
+CoreRange::CoreRange(const CoreCoord &start_coord, const CoreCoord &end_coord) {
+    TT_FATAL(
+        end_coord.x >= start_coord.x and end_coord.y >= start_coord.y,
+        "Invalid core range for start_coord: {}, end_coord: {}",
+        start_coord.str(),
+        end_coord.str());
+
+    this->start_coord = start_coord;
+    this->end_coord = end_coord;
+}
+
+std::optional<CoreRange> CoreRange::intersects(const CoreRange &other) const {
+    std::size_t x1 = std::max(this->start_coord.x, other.start_coord.x);
+    std::size_t y1 = std::max(this->start_coord.y, other.start_coord.y);
+    std::size_t x2 = std::min(this->end_coord.x, other.end_coord.x);
+    std::size_t y2 = std::min(this->end_coord.y, other.end_coord.y);
+    if (x1 <= x2 and y1 <= y2) {
+        return CoreRange({x1, y1}, {x2, y2});
+    }
+
+    return {};
+}
+
+bool CoreRange::adjacent(const CoreRange &other) const {
+    std::size_t x1 = std::max(this->start_coord.x, other.start_coord.x);
+    std::size_t y1 = std::max(this->start_coord.y, other.start_coord.y);
+    std::size_t x2 = std::min(this->end_coord.x, other.end_coord.x);
+    std::size_t y2 = std::min(this->end_coord.y, other.end_coord.y);
+    return ((x2 + 1 == x1 && y1 <= y2) || (y2 + 1 == y1 && x1 <= x2));
+}
+
+bool CoreRange::contains(const CoreRange &other) const {
+    return (other.start_coord.x >= this->start_coord.x) && (other.end_coord.x <= this->end_coord.x) &&
+           (other.start_coord.y >= this->start_coord.y) && (other.end_coord.y <= this->end_coord.y);
+}
+
+bool CoreRange::contains(const CoreCoord &other) const {
+    return (other.x >= this->start_coord.x) && (other.x <= this->end_coord.x) && (other.y >= this->start_coord.y) &&
+           (other.y <= this->end_coord.y);
+}
+
+// Merge lined-up (in x or y dimension) intersecting/adjacent rectangles
+std::optional<CoreRange> CoreRange::merge(const CoreRange &cr) const {
+    if (this->intersects(cr) || this->adjacent(cr)) {
+        if (this->start_coord.x == cr.start_coord.x && this->end_coord.x == cr.end_coord.x)
+            return CoreRange(
+                {this->start_coord.x, std::min(this->start_coord.y, cr.start_coord.y)},
+                {this->end_coord.x, std::max(this->end_coord.y, cr.end_coord.y)});
+
+        else if (this->start_coord.y == cr.start_coord.y && this->end_coord.y == cr.end_coord.y)
+            return CoreRange(
+                {std::min(this->start_coord.x, cr.start_coord.x), this->start_coord.y},
+                {std::max(this->end_coord.x, cr.end_coord.x), this->end_coord.y});
+    }
+    return std::nullopt;
+}
+
+std::string CoreRange::str() const { return "[" + this->start_coord.str() + " - " + this->end_coord.str() + "]"; }
+
+size_t CoreRange::size() const {
+    return (this->end_coord.x - this->start_coord.x + 1) * (this->end_coord.y - this->start_coord.y + 1);
+}
+
+CoreCoord CoreRange::grid_size() const {
+    return {this->end_coord.x - this->start_coord.x + 1, this->end_coord.y - this->start_coord.y + 1};
+}
+
+CoreRange::CoreIterator::CoreIterator(const CoreCoord &current, const CoreRange &core_range) :
+    current_(current), range_(core_range) {}
+
+CoreCoord &CoreRange::CoreIterator::operator*() { return current_; }
+
+CoreRange::CoreIterator &CoreRange::CoreIterator::operator++() {
+    CoreCoord next;
+
+    const bool is_curr_core_at_end_of_row = current_.x == range_.end_coord.x;
+    if (is_curr_core_at_end_of_row) {
+        // Go to the beginning of the next row
+        next.x = range_.start_coord.x;
+        next.y = current_.y + 1;
+    } else {
+        next.x = current_.x + 1;
+        next.y = current_.y;
+    }
+
+    current_ = next;
+    return *this;
+}
+
+CoreRange::CoreIterator CoreRange::begin() const { return CoreRange::CoreIterator(this->start_coord, *this); }
+
+CoreRange::CoreIterator CoreRange::end() const {
+    const CoreCoord iterator_end(this->start_coord.x, this->end_coord.y + 1);
+    return CoreRange::CoreIterator(iterator_end, *this);
+}
+
+bool CoreRange::CoreIterator::operator==(const CoreIterator &other) const { return current_ == other.current_; }
+
+bool CoreRange::CoreIterator::operator!=(const CoreIterator &other) const { return !(current_ == other.current_); }
+
+auto fmt::formatter<CoreRange>::format(const CoreRange &core_range, format_context &ctx) const
+    -> format_context::iterator {
+    std::stringstream ss;
+    ss << core_range.str();
+    return fmt::format_to(ctx.out(), "{}", ss.str());
+}
+
+CoreRangeSet::CoreRangeSet(const std::vector<CoreRange> &core_ranges) :
+    ranges_(core_ranges.begin(), core_ranges.end()) {
+    ZoneScoped;
+    this->validate_no_overlap();
+}
+
+CoreRangeSet::CoreRangeSet(const std::set<CoreRange> &core_ranges) : ranges_(core_ranges.begin(), core_ranges.end()) {
+    ZoneScoped;
+    this->validate_no_overlap();
+}
+
+CoreRangeSet::CoreRangeSet(const CoreRange &core_range) : ranges_{core_range} {}
+
+void swap(CoreRangeSet &first, CoreRangeSet &second) {
+    std::scoped_lock lock(first.ranges_guard, second.ranges_guard);
+    std::swap(first.ranges_, second.ranges_);
+}
+
+CoreRangeSet::CoreRangeSet(const CoreRangeSet &other) {
+    std::scoped_lock lock(other.ranges_guard);
+    this->ranges_ = other.ranges_;
+}
+
+CoreRangeSet &CoreRangeSet::operator=(const CoreRangeSet &other) {
+    std::scoped_lock lock(other.ranges_guard);
+    this->ranges_ = other.ranges_;
+    return *this;
+}
+
+CoreRangeSet::CoreRangeSet(CoreRangeSet &&other) { swap(*this, other); }
+
+CoreRangeSet &CoreRangeSet::operator=(CoreRangeSet &&other) {
+    swap(*this, other);
+    return *this;
+}
+
+CoreRangeSet::CoreRangeSet(std::vector<CoreRange> &&core_ranges) : ranges_(std::move(core_ranges)) {
+    ZoneScoped;
+    this->validate_no_overlap();
+}
+
+size_t CoreRangeSet::size() const { return ranges_.size(); }
+
+template <typename T>
+CoreRangeSet CoreRangeSet::merge(const T &other) const {
+    size_t min_x = std::numeric_limits<size_t>::max(), max_x = 0, min_y = std::numeric_limits<size_t>::max(), max_y = 0;
+    std::set<CoreRange> crs(this->ranges_.begin(), this->ranges_.end());
+    crs.insert(other.begin(), other.end());
+
+    for (const auto &cr : crs) {
+        min_x = std::min(min_x, cr.start_coord.x);
+        max_x = std::max(max_x, cr.end_coord.x);
+        min_y = std::min(min_y, cr.start_coord.y);
+        max_y = std::max(max_y, cr.end_coord.y);
+    }
+
+    // By overallocating by one x entry, we can avoid needing to check for
+    // boundary conditions when iterating, since there'll always be one
+    // last false entry
+    bool grid[max_y + 1][max_x + 2];
+    memset(grid, 0, sizeof(grid));
+
+    for (const auto &cr : crs)
+        for (unsigned y = cr.start_coord.y; y <= cr.end_coord.y; y++)
+            for (unsigned x = cr.start_coord.x; x <= cr.end_coord.x; x++) grid[y][x] = true;
+
+    crs.clear();
+    for (unsigned y = min_y; y <= max_y; y++) {
+        std::set<CoreRange> filter_set, tmp, new_crs;
+        std::vector<CoreRange> ranges;
+        for (unsigned x = min_x; x <= max_x + 1; x++) {
+            if (grid[y][x]) {
+                unsigned x_start = x;
+                while (grid[y][x]) x++;
+                ranges.push_back(CoreRange({x_start, y}, {x - 1, y}));
+            }
+        }
+
+        for (const auto &cr : ranges) {
+            for (const auto &prev_cr : crs) {
+                if (auto merged = cr.merge(prev_cr)) {
+                    new_crs.insert(merged.value());
+                    filter_set.insert(prev_cr);
+                    filter_set.insert(cr);
+                }
+            }
+            crs.insert(cr);
+        }
+        // Set(A) = Set(A) - Set(B)
+        std::set_difference(
+            std::make_move_iterator(crs.begin()),
+            std::make_move_iterator(crs.end()),
+            filter_set.begin(),
+            filter_set.end(),
+            std::inserter(tmp, tmp.end()));
+        crs.swap(tmp);
+        crs.insert(new_crs.begin(), new_crs.end());
+    }
+    return CoreRangeSet(crs);
+}
+
+template CoreRangeSet CoreRangeSet::merge<std::vector<CoreRange>>(const std::vector<CoreRange> &other) const;
+template CoreRangeSet CoreRangeSet::merge<std::set<CoreRange>>(const std::set<CoreRange> &other) const;
+
+template <>
+CoreRangeSet CoreRangeSet::merge<CoreRangeSet>(const CoreRangeSet &other) const {
+    return this->merge(other.ranges());
+}
+
+bool CoreRangeSet::core_coord_in_core_ranges(const CoreCoord &core_coord) const {
+    ZoneScoped;
+    for (const auto &cr : this->ranges_) {
+        if (cr.contains(core_coord))
+            return true;
+    }
+    return false;
+}
+
+bool CoreRangeSet::intersects(const CoreRange &cr) const {
+    for (const auto &local_cr : this->ranges_) {
+        if (local_cr.intersects(cr))
+            return true;
+    }
+    return false;
+}
+
+const std::vector<CoreRange> &CoreRangeSet::ranges() const { return this->ranges_; }
+
+std::string CoreRangeSet::str() const {
+    if (this->ranges().size() > 0) {
+        std::string core_range_set_str = "{";
+        for (const auto &core_range : this->ranges_) {
+            core_range_set_str += core_range.str() + ", ";
+        }
+        core_range_set_str[core_range_set_str.length() - 2] = '}';
+        core_range_set_str.pop_back();
+        return core_range_set_str;
+    } else {
+        return "{}";
+    }
+}
+
+uint32_t CoreRangeSet::num_cores() const {
+    uint32_t num_cores = 0;
+    for (const auto &core_range : this->ranges()) {
+        num_cores += core_range.size();
+    }
+    return num_cores;
+}
+
+CoreRange CoreRangeSet::bounding_box() const {
+    TT_FATAL(this->ranges().size() > 0, "Cannot get bounding_box of an empty CoreRangeSet!");
+    size_t min_x = UINT32_MAX, min_y = UINT32_MAX, max_x = 0, max_y = 0;
+    for (const auto &cr : this->ranges()) {
+        min_x = std::min(min_x, cr.start_coord.x);
+        max_x = std::max(max_x, cr.end_coord.x);
+        min_y = std::min(min_y, cr.start_coord.y);
+        max_y = std::max(max_y, cr.end_coord.y);
+    }
+    return {{min_x, min_y}, {max_x, max_y}};
+}
+
+void CoreRangeSet::validate_no_overlap() {
+    if (this->ranges_.size() < 2) {
+        return;
+    }
+    for (auto outer_it = this->ranges_.begin(); outer_it != this->ranges_.end() - 1; outer_it++) {
+        for (auto inner_it = outer_it + 1; inner_it != this->ranges_.end(); inner_it++) {
+            CoreRange &first_core_range = *outer_it;
+            CoreRange &second_core_range = *inner_it;
+            bool first_core_left_of_second = first_core_range.end_coord.x < second_core_range.start_coord.x;
+            bool first_core_right_of_second = first_core_range.start_coord.x > second_core_range.end_coord.x;
+            bool first_core_above_second = first_core_range.end_coord.y < second_core_range.start_coord.y;
+            bool first_core_below_second = first_core_range.start_coord.y > second_core_range.end_coord.y;
+            auto no_overlap = first_core_left_of_second or first_core_right_of_second or first_core_above_second or
+                              first_core_below_second;
+            if (not no_overlap) {
+                TT_THROW(
+                    "Cannot create CoreRangeSet with specified core ranges because core ranges {} and {} overlap!",
+                    first_core_range.str(),
+                    second_core_range.str());
+            }
+        }
+    }
+}
+
+bool operator==(const CoreRangeSet &a, const CoreRangeSet &b) {
+    if (a.ranges().size() == b.ranges().size()) {
+        auto range_a = a.ranges();
+        auto range_b = b.ranges();
+        for (auto it_a = range_a.begin(), it_b = range_b.begin(); it_a != range_a.end(); it_a++, it_b++) {
+            if (*it_a != *it_b) {
+                return false;
+            }
+        }
+        return true;
+    }
+    return false;
+}
+
+std::vector<CoreCoord> grid_to_cores(uint32_t num_cores, uint32_t grid_size_x, uint32_t grid_size_y, bool row_wise) {
+    std::vector<CoreCoord> cores;
+    cores.reserve(num_cores);
+    TT_ASSERT(
+        num_cores <= grid_size_x * grid_size_y,
+        "Number of cores {} exceeds grid size {}x{}",
+        num_cores,
+        grid_size_x,
+        grid_size_y);
+    if (row_wise) {
+        for (uint32_t i = 0; i < num_cores; ++i) {
+            cores.push_back({i % grid_size_x, i / grid_size_x});
+        }
+    } else {
+        for (uint32_t i = 0; i < num_cores; ++i) {
+            cores.push_back({i / grid_size_y, i % grid_size_y});
+        }
+    }
+    return cores;
+}
+
+std::vector<CoreCoord> grid_to_cores(CoreCoord start, CoreCoord end, bool row_wise) {
+    std::vector<CoreCoord> cores;
+    auto num_cores_x = (end.x + 1) - start.x;
+    auto num_cores_y = (end.y + 1) - start.y;
+    uint32_t num_cores = num_cores_x * num_cores_y;
+    cores.reserve(num_cores);
+    if (row_wise) {
+        for (uint32_t j = start.y; j < (end.y + 1); j++) {
+            for (uint32_t i = start.x; i < (end.x + 1); i++) {
+                cores.push_back({i, j});
+            }
+        }
+
+    } else {
+        for (uint32_t i = start.x; i < (end.x + 1); i++) {
+            for (uint32_t j = start.y; j < (end.y + 1); j++) {
+                cores.push_back({i, j});
+            }
+        }
+    }
+    return cores;
+}
+
+// Noop cores are appended at the end with no guarantees on ordering
+std::vector<CoreCoord> grid_to_cores_with_noop(
+    const uint32_t bbox_x,
+    const uint32_t bbox_y,
+    const uint32_t grid_size_x,
+    const uint32_t grid_size_y,
+    const bool row_wise) {
+    ZoneScoped;
+    std::vector<CoreCoord> cores;
+    cores.reserve(grid_size_x * grid_size_y);
+    TT_ASSERT(bbox_x < grid_size_x);
+    TT_ASSERT(bbox_y < grid_size_y);
+    const uint32_t box_size_x = bbox_x + 1;
+    const uint32_t box_size_y = bbox_y + 1;
+
+    if (row_wise) {
+        for (uint32_t i = 0; i < box_size_x * box_size_y; ++i) {
+            cores.push_back({i % box_size_x, i / box_size_x});
+        }
+    } else {
+        for (uint32_t i = 0; i < box_size_x * box_size_y; ++i) {
+            cores.push_back({i / box_size_y, i % box_size_y});
+        }
+    }
+
+    // Right rectangle noops
+    for (uint32_t x = box_size_x; x < grid_size_x; ++x) {
+        for (uint32_t y = 0; y < grid_size_y; ++y) {
+            cores.push_back({x, y});
+        }
+    }
+
+    // Bottom rectangle noops
+    for (uint32_t y = box_size_y; y < grid_size_y; ++y) {
+        for (uint32_t x = 0; x < box_size_x; ++x) {
+            cores.push_back({x, y});
+        }
+    }
+
+    return cores;
+}
+
+std::vector<CoreCoord> corerange_to_cores(const CoreRangeSet &crs, std::optional<uint32_t> max_cores, bool row_wise) {
+    uint32_t num_total_cores = 0;
+    std::vector<CoreCoord> all_cores;
+    uint32_t offset = 0;
+
+    for (auto core_range : crs.ranges()) {
+        auto start_coord = core_range.start_coord;
+        auto end_coord = core_range.end_coord;
+        auto cores = grid_to_cores(start_coord, end_coord, row_wise);
+        if (max_cores.has_value()) {
+            if (all_cores.size() + cores.size() > max_cores.value()) {
+                uint32_t num_cores_to_add = max_cores.value() - all_cores.size();
+                all_cores.insert(all_cores.end(), cores.begin(), cores.begin() + num_cores_to_add);
+            } else {
+                all_cores.insert(all_cores.end(), cores.begin(), cores.end());
+            }
+        } else {
+            all_cores.insert(all_cores.end(), cores.begin(), cores.end());
+        }
+    }
+
+    return all_cores;
+}
+
+bool operator!=(const CoreRangeSet &a, const CoreRangeSet &b) { return !(a == b); }
+
+auto fmt::formatter<CoreRangeSet>::format(const CoreRangeSet &core_range_set, format_context &ctx) const
+    -> format_context::iterator {
+    std::stringstream ss;
+    ss << core_range_set.str();
+    return fmt::format_to(ctx.out(), "{}", ss.str());
+}
+
+namespace std {
+
+std::size_t hash<RelativeCoreCoord>::operator()(RelativeCoreCoord const &o) const {
+    std::size_t seed = 0;
+    seed = std::hash<std::size_t>()(o.x) ^ std::hash<std::size_t>()(o.y) << 1;
+    return seed;
+}
+
+std::size_t hash<CoreRange>::operator()(const CoreRange &core_range) const {
+    std::size_t seed = 0;
+    seed = std::hash<CoreCoord>{}(core_range.start_coord) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+    seed = std::hash<CoreCoord>{}(core_range.end_coord) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+    return seed;
+}
+
+std::size_t hash<CoreRangeSet>::operator()(const CoreRangeSet &core_range_set) const {
+    std::size_t seed = 0;
+    for (const auto &core_range : core_range_set.ranges()) {
+        seed = std::hash<CoreRange>{}(core_range) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+    }
+    return seed;
+}
+
+}  // namespace std
+
+namespace tt::stl::json {
+
+nlohmann::json to_json_t<CoreCoord>::operator()(const CoreCoord &core_coord) noexcept {
+    return {{"x", to_json(core_coord.x)}, {"y", to_json(core_coord.y)}};
+}
+
+CoreCoord from_json_t<CoreCoord>::operator()(const nlohmann::json &json) noexcept {
+    return {from_json<uint32_t>(json.at("x")), from_json<uint32_t>(json.at("y"))};
+}
+
+nlohmann::json to_json_t<RelativeCoreCoord>::operator()(const RelativeCoreCoord &relative_core_coord) noexcept {
+    return {{"x", to_json(relative_core_coord.x)}, {"y", to_json(relative_core_coord.y)}};
+}
+
+RelativeCoreCoord from_json_t<RelativeCoreCoord>::operator()(const nlohmann::json &json) noexcept {
+    return {from_json<int32_t>(json.at("x")), from_json<int32_t>(json.at("y"))};
+}
+
+nlohmann::json to_json_t<CoreRange>::operator()(const CoreRange &core_range) noexcept {
+    return {{"start", to_json(core_range.start_coord)}, {"end", to_json(core_range.end_coord)}};
+}
+
+CoreRange from_json_t<CoreRange>::operator()(const nlohmann::json &json) noexcept {
+    return {from_json<CoreCoord>(json.at("start")), from_json<CoreCoord>(json.at("end"))};
+}
+
+nlohmann::json to_json_t<CoreRangeSet>::operator()(const CoreRangeSet &core_range_set) noexcept {
+    nlohmann::json core_range_set_json = nlohmann::json::array();
+    return to_json(core_range_set.ranges());
+}
+
+CoreRangeSet from_json_t<CoreRangeSet>::operator()(const nlohmann::json &json) noexcept {
+    return CoreRangeSet(from_json<std::vector<CoreRange>>(json));
+}
+
+}  // namespace tt::stl::json
diff --git a/tt_metal/common/core_coord.h b/tt_metal/common/core_coord.h
index 448ef85edb1..d5623836152 100644
--- a/tt_metal/common/core_coord.h
+++ b/tt_metal/common/core_coord.h
@@ -5,31 +5,23 @@
 #pragma once
 
 #include <algorithm>
-#include <limits>
 #include <mutex>
 #include <optional>
 #include <set>
 #include <string>
+#include <vector>
 
 #include "third_party/json/json.hpp"
 #include "third_party/umd/device/tt_xy_pair.h"
-#include "tt_metal/common/assert.hpp"
-#include "tt_metal/third_party/tracy/public/tracy/Tracy.hpp"
 #include "tt_metal/tt_stl/reflection.hpp"
 
-using std::pair;
-
 using CoreCoord = tt_xy_pair;
 
 template <>
 struct fmt::formatter<CoreCoord> {
     constexpr auto parse(format_parse_context &ctx) -> format_parse_context::iterator { return ctx.end(); }
 
-    auto format(const CoreCoord &core_coord, format_context &ctx) const -> format_context::iterator {
-        std::stringstream ss;
-        ss << core_coord.str();
-        return fmt::format_to(ctx.out(), "{}", ss.str());
-    }
+    auto format(const CoreCoord &core_coord, format_context &ctx) const -> format_context::iterator;
 };
 
 constexpr inline bool operator<=(const CoreCoord &a, const CoreCoord &b) { return (a < b) or (a == b); }
@@ -38,7 +30,7 @@ struct RelativeCoreCoord {
     long x = 0;
     long y = 0;
 
-    std::string str() const { return "(x=" + std::to_string(x) + ",y=" + std::to_string(y) + ")"; }
+    std::string str() const;
 };
 
 constexpr inline bool operator==(const RelativeCoreCoord &a, const RelativeCoreCoord &b) {
@@ -47,590 +39,217 @@ constexpr inline bool operator==(const RelativeCoreCoord &a, const RelativeCoreC
 
 constexpr inline bool operator!=(const RelativeCoreCoord &a, const RelativeCoreCoord &b) { return !(a == b); }
 
-namespace std {
-template <>
-struct hash<RelativeCoreCoord> {
-    std::size_t operator()(RelativeCoreCoord const &o) const {
-        std::size_t seed = 0;
-        seed = std::hash<std::size_t>()(o.x) ^ std::hash<std::size_t>()(o.y) << 1;
-        return seed;
-    }
-};
-}  // namespace std
-
-inline CoreCoord get_core_coord_from_relative(const RelativeCoreCoord &in, const CoreCoord &grid_size) {
-    CoreCoord coord;
-    coord.x = in.x + ((in.x < 0) ? grid_size.x : 0);
-    coord.y = in.y + ((in.y < 0) ? grid_size.y : 0);
-    return coord;
-}
+CoreCoord get_core_coord_from_relative(const RelativeCoreCoord &in, const CoreCoord &grid_size);
 
 struct CoreRange {
     CoreCoord start_coord;
     CoreCoord end_coord;
-    CoreRange(const CoreCoord &point) {
-        this->start_coord = point;
-        this->end_coord = point;
-    }
+    CoreRange(const CoreCoord &point);
 
-    CoreRange(const CoreCoord &start_coord, const CoreCoord &end_coord) {
-        TT_ASSERT(
-            end_coord.x >= start_coord.x and end_coord.y >= start_coord.y,
-            "Invalid core range for start_coord: {}, end_coord: {}", start_coord.str(), end_coord.str());
-
-        this->start_coord = start_coord;
-        this->end_coord = end_coord;
-    }
+    CoreRange(const CoreCoord &start_coord, const CoreCoord &end_coord);
 
     CoreRange(const CoreRange &other) = default;
     CoreRange &operator=(const CoreRange &other) = default;
     CoreRange(CoreRange &&other) = default;
     CoreRange &operator=(CoreRange &&other) = default;
 
-    // void validate() {
-    //     TT_FATAL(
-    //         end_coord.x >= start_coord.x and end_coord.y >= start_coord.y,
-    //         "Invalid core range for start_coord: {}, end_coord: {}", start_coord.str(), end_coord.str());
-    // }
-
-    inline std::optional<CoreRange> intersects(const CoreRange &other) const {
-        std::size_t x1 = std::max(this->start_coord.x, other.start_coord.x);
-        std::size_t y1 = std::max(this->start_coord.y, other.start_coord.y);
-        std::size_t x2 = std::min(this->end_coord.x, other.end_coord.x);
-        std::size_t y2 = std::min(this->end_coord.y, other.end_coord.y);
-        if (x1 <= x2 and y1 <= y2)
-            return CoreRange({x1, y1}, {x2, y2});
-
-        return {};
-    }
-
-    inline bool adjacent(const CoreRange &other) const {
-        std::size_t x1 = std::max(this->start_coord.x, other.start_coord.x);
-        std::size_t y1 = std::max(this->start_coord.y, other.start_coord.y);
-        std::size_t x2 = std::min(this->end_coord.x, other.end_coord.x);
-        std::size_t y2 = std::min(this->end_coord.y, other.end_coord.y);
-        return ((x2 + 1 == x1 && y1 <= y2) || (y2 + 1 == y1 && x1 <= x2));
-    }
-
-    inline bool contains(const CoreRange &other) const {
-        return (other.start_coord.x >= this->start_coord.x) && (other.end_coord.x <= this->end_coord.x) && (other.start_coord.y >= this->start_coord.y) &&
-               (other.end_coord.y <= this->end_coord.y);
-    }
-
-    inline bool contains(const CoreCoord &other) const {
-        return (other.x >= this->start_coord.x) && (other.x <= this->end_coord.x) && (other.y >= this->start_coord.y) &&
-               (other.y <= this->end_coord.y);
-    }
+    std::optional<CoreRange> intersects(const CoreRange &other) const;
+
+    bool adjacent(const CoreRange &other) const;
+
+    bool contains(const CoreRange &other) const;
+
+    bool contains(const CoreCoord &other) const;
 
     // Merge lined-up (in x or y dimension) intersecting/adjacent rectangles
-    std::optional<CoreRange> merge(const CoreRange &cr) const {
-        if (this->intersects(cr) || this->adjacent(cr)) {
-            if (this->start_coord.x == cr.start_coord.x && this->end_coord.x == cr.end_coord.x)
-                return CoreRange(
-                    {this->start_coord.x, std::min(this->start_coord.y, cr.start_coord.y)},
-                    {this->end_coord.x, std::max(this->end_coord.y, cr.end_coord.y)});
-
-            else if (this->start_coord.y == cr.start_coord.y && this->end_coord.y == cr.end_coord.y)
-                return CoreRange(
-                    {std::min(this->start_coord.x, cr.start_coord.x), this->start_coord.y},
-                    {std::max(this->end_coord.x, cr.end_coord.x), this->end_coord.y});
-        }
-        return std::nullopt;
-    }
-
-    std::string str() const { return "[" + this->start_coord.str() + " - " + this->end_coord.str() + "]"; }
-
-    size_t size() const { return (this->end_coord.x - this->start_coord.x + 1) * (this->end_coord.y - this->start_coord.y + 1); }
-
-    CoreCoord grid_size() const { return {this->end_coord.x - this->start_coord.x + 1, this->end_coord.y - this->start_coord.y + 1}; }
-
-    class CoreIterator
-    {
-    public:
-        CoreIterator(const CoreCoord& current, const CoreRange& core_range) :
-            current_(current),
-            range_(core_range)
-        {}
-
-        CoreCoord& operator*()
-        {
-            return current_;
-        }
-
-        CoreIterator& operator++()
-        {
-            CoreCoord next;
-
-            const bool is_curr_core_at_end_of_row = current_.x == range_.end_coord.x;
-            if (is_curr_core_at_end_of_row)
-            {
-                // Go to the beginning of the next row
-                next.x = range_.start_coord.x;
-                next.y = current_.y + 1;
-            }
-            else
-            {
-                next.x = current_.x + 1;
-                next.y = current_.y;
-            }
-
-            current_ = next;
-            return *this;
-        }
-
-        bool operator==(const CoreIterator& other) const
-        {
-            return current_ == other.current_;
-        }
-
-        bool operator!=(const CoreIterator& other) const
-        {
-            return !(current_ == other.current_);
-        }
-
-    private:
+    std::optional<CoreRange> merge(const CoreRange &cr) const;
+
+    std::string str() const;
+
+    size_t size() const;
+
+    CoreCoord grid_size() const;
+
+    class CoreIterator {
+       public:
+        CoreIterator(const CoreCoord &current, const CoreRange &core_range);
+
+        CoreCoord &operator*();
+
+        CoreIterator &operator++();
+
+        bool operator==(const CoreIterator &other) const;
+
+        bool operator!=(const CoreIterator &other) const;
+
+       private:
         CoreCoord current_;
-        const CoreRange& range_;
+        const CoreRange &range_;
     };
 
-    CoreIterator begin() const
-    {
-        return CoreIterator(this->start_coord, *this);
-    }
+    CoreIterator begin() const;
 
-    CoreIterator end() const
-    {
-        const CoreCoord iterator_end(this->start_coord.x, this->end_coord.y + 1);
-        return CoreIterator(iterator_end, *this);
-    }
+    CoreIterator end() const;
 };
 
-constexpr inline bool operator==(const CoreRange &a, const CoreRange &b) {
+constexpr bool operator==(const CoreRange &a, const CoreRange &b) {
     return a.start_coord == b.start_coord && a.end_coord == b.end_coord;
 }
 
-constexpr inline bool operator!=(const CoreRange &a, const CoreRange &b) { return !(a == b); }
+constexpr bool operator!=(const CoreRange &a, const CoreRange &b) { return !(a == b); }
 
-constexpr inline bool operator<(const CoreRange &left, const CoreRange &right) {
-    return (left.start_coord < right.start_coord || (left.start_coord == right.start_coord && left.end_coord < right.end_coord));
+constexpr bool operator<(const CoreRange &left, const CoreRange &right) {
+    return (
+        left.start_coord < right.start_coord ||
+        (left.start_coord == right.start_coord && left.end_coord < right.end_coord));
 }
 
 template <>
 struct fmt::formatter<CoreRange> {
     constexpr auto parse(format_parse_context &ctx) -> format_parse_context::iterator { return ctx.end(); }
 
-    auto format(const CoreRange &core_range, format_context &ctx) const -> format_context::iterator {
-        std::stringstream ss;
-        ss << core_range.str();
-        return fmt::format_to(ctx.out(), "{}", ss.str());
-    }
+    auto format(const CoreRange &core_range, format_context &ctx) const -> format_context::iterator;
 };
 
-namespace std {
-template <>
-struct hash<CoreRange> {
-    std::size_t operator()(const CoreRange &core_range) const {
-        std::size_t seed = 0;
-        seed = std::hash<CoreCoord>{}(core_range.start_coord) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
-        seed = std::hash<CoreCoord>{}(core_range.end_coord) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
-        return seed;
-    }
-};
-}  // namespace std
-
 class CoreRangeSet {
    public:
-    CoreRangeSet(const std::set<CoreRange> &core_ranges) : ranges_(core_ranges) {
-        ZoneScoped;
-        for (auto outer_it = this->ranges_.begin(); outer_it != this->ranges_.end(); outer_it++) {
-            for (auto inner_it = this->ranges_.begin(); inner_it != this->ranges_.end(); inner_it++) {
-                if (outer_it == inner_it) {
-                    continue;
-                }
-                CoreRange first_core_range = *outer_it;
-                CoreRange second_core_range = *inner_it;
-                bool first_core_left_of_second = first_core_range.end_coord.x < second_core_range.start_coord.x;
-                bool first_core_right_of_second = first_core_range.start_coord.x > second_core_range.end_coord.x;
-                bool first_core_above_second = first_core_range.end_coord.y < second_core_range.start_coord.y;
-                bool first_core_below_second = first_core_range.start_coord.y > second_core_range.end_coord.y;
-                auto no_overlap = first_core_left_of_second or first_core_right_of_second or first_core_above_second or
-                                  first_core_below_second;
-                if (not no_overlap) {
-                    TT_THROW(
-                        "Cannot create CoreRangeSet with specified core ranges because core ranges {} and {} overlap!",
-                        first_core_range.str(),
-                        second_core_range.str());
-                }
-            }
-        }
-    }
-
-    friend void swap(CoreRangeSet& first, CoreRangeSet& second) {
-        std::scoped_lock lock(first.ranges_guard, second.ranges_guard);
-        std::swap(first.ranges_, second.ranges_);
-    }
-
-    CoreRangeSet(const CoreRangeSet &other) {
-        std::scoped_lock lock(other.ranges_guard);
-        this->ranges_ = other.ranges_;
-    }
-    CoreRangeSet &operator=(const CoreRangeSet &other) {
-        std::scoped_lock lock(other.ranges_guard);
-        this->ranges_ = other.ranges_;
-        return *this;
-    }
-
-    CoreRangeSet(CoreRangeSet &&other) {
-        swap(*this, other);
-    }
-
-    CoreRangeSet &operator=(CoreRangeSet &&other) {;
-        swap(*this, other);
-        return *this;
-    }
-
-    auto size() const { return ranges_.size(); }
-
-    CoreRangeSet merge(const std::set<CoreRange> &other) const {
-        size_t min_x = std::numeric_limits<size_t>::max(), max_x = 0, min_y = std::numeric_limits<size_t>::max(),
-               max_y = 0;
-        std::set<CoreRange> crs = this->ranges_;
-        crs.insert(other.begin(), other.end());
-
-        for (const auto &cr : crs) {
-            // std::cout << "merging " << cr.str() << std::endl;
-            min_x = std::min(min_x, cr.start_coord.x);
-            max_x = std::max(max_x, cr.end_coord.x);
-            min_y = std::min(min_y, cr.start_coord.y);
-            max_y = std::max(max_y, cr.end_coord.y);
-        }
-
-        // By overallocating by one x entry, we can avoid needing to check for
-        // boundary conditions when iterating, since there'll always be one
-        // last false entry
-        bool grid[max_y + 1][max_x + 2];
-        memset(grid, 0, sizeof(grid));
-
-        for (const auto &cr : crs)
-            for (unsigned y = cr.start_coord.y; y <= cr.end_coord.y; y++)
-                for (unsigned x = cr.start_coord.x; x <= cr.end_coord.x; x++) grid[y][x] = true;
-
-        crs.clear();
-        for (unsigned y = min_y; y <= max_y; y++) {
-            std::set<CoreRange> filter_set, tmp, new_crs;
-            std::vector<CoreRange> ranges;
-            for (unsigned x = min_x; x <= max_x + 1; x++) {
-                if (grid[y][x]) {
-                    unsigned x_start = x;
-                    while (grid[y][x]) x++;
-                    ranges.push_back(CoreRange({x_start, y}, {x - 1, y}));
-                }
-            }
-
-            for (const auto &cr : ranges) {
-                for (const auto &prev_cr : crs) {
-                    if (auto merged = cr.merge(prev_cr)) {
-                        // std::cout << "merging " << cr.str() << " and " << prev_cr.str() << " with " <<
-                        // merged.value().str() << std::endl;
-                        new_crs.insert(merged.value());
-                        filter_set.insert(prev_cr);
-                        filter_set.insert(cr);
-                    }
-                }
-                crs.insert(cr);
-            }
-            // Set(A) = Set(A) - Set(B)
-            std::set_difference(
-                std::make_move_iterator(crs.begin()),
-                std::make_move_iterator(crs.end()),
-                filter_set.begin(),
-                filter_set.end(),
-                std::inserter(tmp, tmp.end()));
-            crs.swap(tmp);
-            crs.insert(new_crs.begin(), new_crs.end());
-        }
-        // for ( const auto & cr : crs ){
-        //   std::cout << " final merged CR:" << cr.str() << std::endl;
-        // }
-        return CoreRangeSet(crs);
-    }
-
-    CoreRangeSet merge(const CoreRangeSet &s) const { return this->merge(s.ranges()); }
-
-    inline bool core_coord_in_core_ranges(const CoreCoord &core_coord) const {
-        ZoneScoped;
-        for (const auto &cr : this->ranges_) {
-            if (cr.contains(core_coord))
-                return true;
-        }
-        return false;
-    }
-
-    inline bool intersects(const CoreRange &cr) const {
-        for (const auto &local_cr : this->ranges_) {
-            if (local_cr.intersects(cr))
-                return true;
-        }
-        return false;
-    }
-
-    const std::set<CoreRange> &ranges() const { return this->ranges_; }
-
-    std::string str() const {
-        if (this->ranges().size() > 0) {
-            std::string core_range_set_str = "{";
-            for (const auto &core_range : this->ranges_) {
-                core_range_set_str += core_range.str() + ", ";
-            }
-            core_range_set_str[core_range_set_str.length() - 2] = '}';
-            core_range_set_str.pop_back();
-            return core_range_set_str;
-        } else {
-            return "{}";
-        }
-    }
-
-    const uint32_t num_cores() const {
-        uint32_t num_cores = 0;
-        for (const auto &core_range : this->ranges()) {
-            num_cores += core_range.size();
-        }
-        return num_cores;
-    }
-
-    CoreRange bounding_box() const {
-        TT_FATAL(this->ranges().size() > 0, "Cannot get bounding_box of an empty CoreRangeSet!");
-        size_t min_x = UINT32_MAX, min_y = UINT32_MAX, max_x = 0, max_y = 0;
-        for (const auto &cr : this->ranges()) {
-            min_x = std::min(min_x, cr.start_coord.x);
-            max_x = std::max(max_x, cr.end_coord.x);
-            min_y = std::min(min_y, cr.start_coord.y);
-            max_y = std::max(max_y, cr.end_coord.y);
-        }
-        return {{min_x, min_y}, {max_x, max_y}};
-    }
-
-    private:
-     mutable std::mutex ranges_guard;
-     std::set<CoreRange> ranges_;
+    CoreRangeSet(const std::vector<CoreRange> &core_ranges);
+
+    CoreRangeSet(const std::set<CoreRange> &core_ranges);
+
+    CoreRangeSet(const CoreRange &core_range);
+
+    CoreRangeSet() = default;
+
+    friend void swap(CoreRangeSet &first, CoreRangeSet &second);
+
+    CoreRangeSet(const CoreRangeSet &other);
+
+    CoreRangeSet &operator=(const CoreRangeSet &other);
+
+    CoreRangeSet(CoreRangeSet &&other);
+
+    CoreRangeSet &operator=(CoreRangeSet &&other);
+
+    CoreRangeSet(std::vector<CoreRange> &&core_ranges);
+
+    size_t size() const;
+
+    template <typename T>
+    CoreRangeSet merge(const T &other) const;
+
+    bool core_coord_in_core_ranges(const CoreCoord &core_coord) const;
+
+    bool intersects(const CoreRange &cr) const;
+
+    const std::vector<CoreRange> &ranges() const;
+
+    std::string str() const;
+
+    uint32_t num_cores() const;
+
+    CoreRange bounding_box() const;
+
+   private:
+    void validate_no_overlap();
+
+    mutable std::mutex ranges_guard;
+    std::vector<CoreRange> ranges_;
 };
 
-const inline bool operator==(const CoreRangeSet &a, const CoreRangeSet &b) {
-    if (a.ranges().size() == b.ranges().size()) {
-        auto range_a = a.ranges();
-        auto range_b = b.ranges();
-        for (auto it_a = range_a.begin(), it_b = range_b.begin(); it_a != range_a.end(); it_a++, it_b++) {
-            if (*it_a != *it_b) {
-                return false;
-            }
-        }
-        return true;
-    }
-    return false;
-}
+bool operator==(const CoreRangeSet &a, const CoreRangeSet &b);
 
-inline std::vector<CoreCoord> grid_to_cores(
-    uint32_t num_cores, uint32_t grid_size_x, uint32_t grid_size_y, bool row_wise = false) {
-    std::vector<CoreCoord> cores;
-    cores.reserve(num_cores);
-    TT_ASSERT(
-        num_cores <= grid_size_x * grid_size_y,
-        "Number of cores {} exceeds grid size {}x{}",
-        num_cores,
-        grid_size_x,
-        grid_size_y);
-    if (row_wise) {
-        for (uint32_t i = 0; i < num_cores; ++i) {
-            cores.push_back({i % grid_size_x, i / grid_size_x});
-        }
-    } else {
-        for (uint32_t i = 0; i < num_cores; ++i) {
-            cores.push_back({i / grid_size_y, i % grid_size_y});
-        }
-    }
-    return cores;
-}
+std::vector<CoreCoord> grid_to_cores(
+    uint32_t num_cores, uint32_t grid_size_x, uint32_t grid_size_y, bool row_wise = false);
 
-inline std::vector<CoreCoord> grid_to_cores(CoreCoord start, CoreCoord end, bool row_wise = false) {
-    std::vector<CoreCoord> cores;
-    auto num_cores_x = (end.x + 1) - start.x;
-    auto num_cores_y = (end.y + 1) - start.y;
-    uint32_t num_cores = num_cores_x * num_cores_y;
-    cores.reserve(num_cores);
-    if (row_wise) {
-        for (uint32_t j = start.y; j < (end.y + 1); j++) {
-            for (uint32_t i = start.x; i < (end.x + 1); i++) {
-                cores.push_back({i, j});
-            }
-        }
-
-    } else {
-        for (uint32_t i = start.x; i < (end.x + 1); i++) {
-            for (uint32_t j = start.y; j < (end.y + 1); j++) {
-                cores.push_back({i, j});
-            }
-        }
-    }
-    return cores;
-}
+std::vector<CoreCoord> grid_to_cores(CoreCoord start, CoreCoord end, bool row_wise = false);
 
 // Noop cores are appended at the end with no guarantees on ordering
-inline std::vector<CoreCoord> grid_to_cores_with_noop(
+std::vector<CoreCoord> grid_to_cores_with_noop(
     const uint32_t bbox_x,
     const uint32_t bbox_y,
     const uint32_t grid_size_x,
     const uint32_t grid_size_y,
-    const bool row_wise = false) {
-    ZoneScoped;
-    std::vector<CoreCoord> cores;
-    cores.reserve(grid_size_x * grid_size_y);
-    TT_ASSERT(bbox_x < grid_size_x);
-    TT_ASSERT(bbox_y < grid_size_y);
-    const uint32_t box_size_x = bbox_x + 1;
-    const uint32_t box_size_y = bbox_y + 1;
-
-    if (row_wise) {
-        for (uint32_t i = 0; i < box_size_x * box_size_y; ++i) {
-            cores.push_back({i % box_size_x, i / box_size_x});
-        }
-    } else {
-        for (uint32_t i = 0; i < box_size_x * box_size_y; ++i) {
-            cores.push_back({i / box_size_y, i % box_size_y});
-        }
-    }
-
-    // Right rectangle noops
-    for (uint32_t x = box_size_x; x < grid_size_x; ++x) {
-        for (uint32_t y = 0; y < grid_size_y; ++y) {
-            cores.push_back({x, y});
-        }
-    }
-
-    // Bottom rectangle noops
-    for (uint32_t y = box_size_y; y < grid_size_y; ++y) {
-        for (uint32_t x = 0; x < box_size_x; ++x) {
-            cores.push_back({x, y});
-        }
-    }
-
-    return cores;
-}
+    const bool row_wise = false);
 
-inline std::vector<CoreCoord> corerange_to_cores(
-    const CoreRangeSet &crs, std::optional<uint32_t> max_cores = std::nullopt, bool row_wise = false) {
-    uint32_t num_total_cores = 0;
-    std::vector<CoreCoord> all_cores;
-    uint32_t offset = 0;
-
-    for (auto core_range : crs.ranges()) {
-        auto start_coord = core_range.start_coord;
-        auto end_coord = core_range.end_coord;
-        auto cores = grid_to_cores(start_coord, end_coord, row_wise);
-        if (max_cores.has_value()) {
-            if (all_cores.size() + cores.size() > max_cores.value()) {
-                uint32_t num_cores_to_add = max_cores.value() - all_cores.size();
-                all_cores.insert(all_cores.end(), cores.begin(), cores.begin() + num_cores_to_add);
-            } else {
-                all_cores.insert(all_cores.end(), cores.begin(), cores.end());
-            }
-        } else {
-            all_cores.insert(all_cores.end(), cores.begin(), cores.end());
-        }
-    }
-
-    return all_cores;
-}
+std::vector<CoreCoord> corerange_to_cores(
+    const CoreRangeSet &crs, std::optional<uint32_t> max_cores = std::nullopt, bool row_wise = false);
 
-const inline bool operator!=(const CoreRangeSet &a, const CoreRangeSet &b) { return !(a == b); }
+bool operator!=(const CoreRangeSet &a, const CoreRangeSet &b);
 
 template <>
 struct fmt::formatter<CoreRangeSet> {
     constexpr auto parse(format_parse_context &ctx) -> format_parse_context::iterator { return ctx.end(); }
 
-    auto format(const CoreRangeSet &core_range_set, format_context &ctx) const -> format_context::iterator {
-        std::stringstream ss;
-        ss << core_range_set.str();
-        return fmt::format_to(ctx.out(), "{}", ss.str());
-    }
+    auto format(const CoreRangeSet &core_range_set, format_context &ctx) const -> format_context::iterator;
 };
 
 // Adding to tt::tt_metal namespace as we transition to moving this out of global namespace eventually.
 namespace tt::tt_metal {
-   using ::CoreCoord;
-   using ::CoreRange;
-   using ::CoreRangeSet;
-}
+using ::CoreCoord;
+using ::CoreRange;
+using ::CoreRangeSet;
+}  // namespace tt::tt_metal
 
 namespace std {
+
+template <>
+struct hash<CoreRange> {
+    std::size_t operator()(const CoreRange &core_range) const;
+};
+
+template <>
+struct hash<RelativeCoreCoord> {
+    std::size_t operator()(RelativeCoreCoord const &o) const;
+};
+
 template <>
 struct hash<CoreRangeSet> {
-    std::size_t operator()(const CoreRangeSet &core_range_set) const {
-        std::size_t seed = 0;
-        for (const auto &core_range : core_range_set.ranges()) {
-            seed = std::hash<CoreRange>{}(core_range) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
-        }
-        return seed;
-    }
+    std::size_t operator()(const CoreRangeSet &core_range_set) const;
 };
+
 }  // namespace std
 
 namespace tt::stl::json {
 
 template <>
 struct to_json_t<CoreCoord> {
-    nlohmann::json operator()(const CoreCoord &core_coord) noexcept {
-        return {{"x", to_json(core_coord.x)}, {"y", to_json(core_coord.y)}};
-    }
+    nlohmann::json operator()(const CoreCoord &core_coord) noexcept;
 };
 
 template <>
 struct from_json_t<CoreCoord> {
-    CoreCoord operator()(const nlohmann::json &json) noexcept {
-        return {from_json<uint32_t>(json.at("x")), from_json<uint32_t>(json.at("y"))};
-    }
+    CoreCoord operator()(const nlohmann::json &json) noexcept;
 };
 
 template <>
 struct to_json_t<RelativeCoreCoord> {
-    nlohmann::json operator()(const RelativeCoreCoord &relative_core_coord) noexcept {
-        return {{"x", to_json(relative_core_coord.x)}, {"y", to_json(relative_core_coord.y)}};
-    }
+    nlohmann::json operator()(const RelativeCoreCoord &relative_core_coord) noexcept;
 };
 
 template <>
 struct from_json_t<RelativeCoreCoord> {
-    RelativeCoreCoord operator()(const nlohmann::json &json) noexcept {
-        return {from_json<int32_t>(json.at("x")), from_json<int32_t>(json.at("y"))};
-    }
+    RelativeCoreCoord operator()(const nlohmann::json &json) noexcept;
 };
 
 template <>
 struct to_json_t<CoreRange> {
-    nlohmann::json operator()(const CoreRange &core_range) noexcept {
-        return {{"start", to_json(core_range.start_coord)}, {"end", to_json(core_range.end_coord)}};
-    }
+    nlohmann::json operator()(const CoreRange &core_range) noexcept;
 };
 
 template <>
 struct from_json_t<CoreRange> {
-    CoreRange operator()(const nlohmann::json &json) noexcept {
-        return {from_json<CoreCoord>(json.at("start")), from_json<CoreCoord>(json.at("end"))};
-    }
+    CoreRange operator()(const nlohmann::json &json) noexcept;
 };
 
 template <>
 struct to_json_t<CoreRangeSet> {
-    nlohmann::json operator()(const CoreRangeSet &core_range_set) noexcept {
-        nlohmann::json core_range_set_json = nlohmann::json::array();
-        return to_json(core_range_set.ranges());
-    }
+    nlohmann::json operator()(const CoreRangeSet &core_range_set) noexcept;
 };
 
 template <>
 struct from_json_t<CoreRangeSet> {
-    CoreRangeSet operator()(const nlohmann::json &json) noexcept {
-        return CoreRangeSet(from_json<std::set<CoreRange>>(json));
-    }
+    CoreRangeSet operator()(const nlohmann::json &json) noexcept;
 };
 
 }  // namespace tt::stl::json
diff --git a/tt_metal/common/work_split.hpp b/tt_metal/common/work_split.hpp
index c4c3153cc59..c6c6933a5ed 100644
--- a/tt_metal/common/work_split.hpp
+++ b/tt_metal/common/work_split.hpp
@@ -174,7 +174,7 @@ inline std::tuple<uint32_t, CoreRangeSet, CoreRangeSet, CoreRangeSet, uint32_t,
     uint32_t units_per_core_group_2 = 0;
     // Evenly divided units to all target cores
     if (units_to_divide % target_num_cores == 0) {
-        core_group_1_set = all_cores.ranges();
+        core_group_1_set = std::set<CoreRange>(all_cores.ranges().begin(), all_cores.ranges().end());
         // Uneven division of units across cores
         // This case should only be hit when there are more units of work than a full grid of cores
         // which is implicitly assumed in the following logic
diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp
index 01aae5f3130..426e0b6fcab 100644
--- a/tt_metal/impl/device/device.cpp
+++ b/tt_metal/impl/device/device.cpp
@@ -3218,7 +3218,7 @@ float Device::sfpu_inf() const{
     return std::numeric_limits<float>::infinity();
 }
 
-pair<int, int> Device::build_processor_type_to_index(uint32_t programmable_core, uint32_t processor_class) const {
+std::pair<int, int> Device::build_processor_type_to_index(uint32_t programmable_core, uint32_t processor_class) const {
     TT_ASSERT(programmable_core < this->build_state_indices_.size(),
         "Programmable core type {} is not included in the FW or Kernel build state", programmable_core);
     TT_ASSERT(processor_class < this->build_state_indices_[programmable_core].size(),
@@ -3236,7 +3236,7 @@ const JitBuildState& Device::build_kernel_state(uint32_t programmable_core, uint
 }
 
 const JitBuildStateSubset Device::build_kernel_states(uint32_t programmable_core, uint32_t processor_class) const {
-    pair<int, int> bptti = build_processor_type_to_index(programmable_core, processor_class);
+    std::pair<int, int> bptti = build_processor_type_to_index(programmable_core, processor_class);
     JitBuildStateSubset subset = {
         &this->kernel_build_states_[bptti.first],
         bptti.second
diff --git a/tt_metal/impl/device/device.hpp b/tt_metal/impl/device/device.hpp
index 01e1f104596..5ee266d37b7 100644
--- a/tt_metal/impl/device/device.hpp
+++ b/tt_metal/impl/device/device.hpp
@@ -6,6 +6,7 @@
 
 #include <memory>
 #include <mutex>
+#include <utility>
 
 #include "hostdevcommon/common_values.hpp"
 #include "impl/dispatch/work_executor.hpp"
@@ -334,7 +335,7 @@ class Device {
     T get_base_allocator_addr(const HalMemType &mem_type) const;
 
     template <typename CoreRangeContainer>
-    std::vector<pair<transfer_info_cores, uint32_t>> extract_dst_noc_multicast_info(const CoreRangeContainer& ranges, const CoreType core_type);
+    std::vector<std::pair<transfer_info_cores, uint32_t>> extract_dst_noc_multicast_info(const CoreRangeContainer& ranges, const CoreType core_type);
     bool dispatch_s_enabled() const;
     bool distributed_dispatcher() const;
 
@@ -374,9 +375,9 @@ inline T Device::get_base_allocator_addr(const HalMemType &mem_type) const {
 
 // TODO: Find a better home for this function
 template <typename CoreRangeContainer>
-std::vector<pair<transfer_info_cores, uint32_t>> Device::extract_dst_noc_multicast_info(const CoreRangeContainer& ranges, const CoreType core_type) {
+std::vector<std::pair<transfer_info_cores, uint32_t>> Device::extract_dst_noc_multicast_info(const CoreRangeContainer& ranges, const CoreType core_type) {
     // This API extracts all the pairs of noc multicast encodings given a set of core ranges
-    std::vector<pair<transfer_info_cores, uint32_t>> dst_noc_multicast_info;
+    std::vector<std::pair<transfer_info_cores, uint32_t>> dst_noc_multicast_info;
     dst_noc_multicast_info.reserve(ranges.size());
     for (const CoreRange& core_range : ranges) {
         CoreCoord physical_start = this->physical_core_from_logical_core(core_range.start_coord, core_type);
diff --git a/tt_metal/impl/dispatch/command_queue.cpp b/tt_metal/impl/dispatch/command_queue.cpp
index c54df2dddbe..e52670220ff 100644
--- a/tt_metal/impl/dispatch/command_queue.cpp
+++ b/tt_metal/impl/dispatch/command_queue.cpp
@@ -663,7 +663,7 @@ void EnqueueProgramCommand::assemble_runtime_args_commands(ProgramCommandSequenc
                                 .noc_xy_addr = this->device->get_noc_unicast_encoding(this->noc_index, physical_core)});
                         }
                     } else {
-                        vector<pair<transfer_info_cores, uint32_t>> dst_noc_multicast_info =
+                        vector<std::pair<transfer_info_cores, uint32_t>> dst_noc_multicast_info =
                             device->extract_dst_noc_multicast_info<std::vector<CoreRange>>(
                                 kernel->logical_coreranges(), core_type);
                         common_sub_cmds.emplace<std::vector<CQDispatchWritePackedMulticastSubCmd>>(
diff --git a/tt_metal/impl/dispatch/data_collection.cpp b/tt_metal/impl/dispatch/data_collection.cpp
index 516f7a27912..87c055f690c 100644
--- a/tt_metal/impl/dispatch/data_collection.cpp
+++ b/tt_metal/impl/dispatch/data_collection.cpp
@@ -123,7 +123,7 @@ class DataCollector {
 
 private:
     map<uint64_t, vector<DispatchData>> program_id_to_dispatch_data;
-    map<uint64_t, map<CoreType, vector<pair<kernel_id_array_t, CoreRangeSet>>>> program_id_to_kernel_groups;
+    map<uint64_t, map<CoreType, vector<std::pair<kernel_id_array_t, CoreRangeSet>>>> program_id_to_kernel_groups;
     map<uint64_t, int> program_id_to_call_count;
 };
 
@@ -202,7 +202,7 @@ void DataCollector::DumpData() {
         // Dump kernel ids for each kernel group in this program
         for (auto &core_type_and_kernel_groups : program_id_to_kernel_groups[program_id]) {
             CoreType core_type = core_type_and_kernel_groups.first;
-            vector<pair<kernel_id_array_t, CoreRangeSet>> &kernel_groups = core_type_and_kernel_groups.second;
+            vector<std::pair<kernel_id_array_t, CoreRangeSet>> &kernel_groups = core_type_and_kernel_groups.second;
             outfile << fmt::format("\t{} Kernel Groups: {}\n", core_type, kernel_groups.size());
             for (auto &ids_and_ranges : kernel_groups) {
                 // Dump kernel ids in this group
diff --git a/tt_metal/impl/dispatch/debug_tools.cpp b/tt_metal/impl/dispatch/debug_tools.cpp
index 14ec9b706d3..f8c54fa3573 100644
--- a/tt_metal/impl/dispatch/debug_tools.cpp
+++ b/tt_metal/impl/dispatch/debug_tools.cpp
@@ -23,7 +23,7 @@ void match_device_program_data_with_host_program_data(const char* host_file, con
     host_dispatch_dump_file.open(host_file);
     device_dispatch_dump_file.open(device_file);
 
-    vector<pair<string, vector<string>>> host_map;
+    vector<std::pair<string, vector<string>>> host_map;
 
 
     string line;
diff --git a/tt_metal/impl/program/program.cpp b/tt_metal/impl/program/program.cpp
index d67d0e26ce5..24fd34c42fb 100644
--- a/tt_metal/impl/program/program.cpp
+++ b/tt_metal/impl/program/program.cpp
@@ -107,8 +107,11 @@ void DisablePersistentKernelCache() { enable_persistent_kernel_cache = false; }
 std::atomic<uint64_t> Program::program_counter = 0;
 
 Program::Program() :
-    id(program_counter++), runtime_id(0), worker_crs_({}), local_circular_buffer_allocation_needed_(false), finalized_(false) {
-
+    id(program_counter++),
+    runtime_id(0),
+    worker_crs_(),
+    local_circular_buffer_allocation_needed_(false),
+    finalized_(false) {
     uint32_t programmable_core_count = hal.get_programmable_core_type_count();
     for (uint32_t i = 0; i < programmable_core_count; i++) {
         kernels_.push_back({});
@@ -146,7 +149,7 @@ std::shared_ptr<Kernel> Program::get_kernel(KernelHandle kernel_id) const {
     return nullptr;
 }
 
-KernelGroup::KernelGroup() : core_ranges({}) {}
+KernelGroup::KernelGroup() : core_ranges(CoreRangeSet()) {}
 
 KernelGroup::KernelGroup(
     const Program &program,
@@ -155,8 +158,7 @@ KernelGroup::KernelGroup(
     bool erisc_is_idle,
     int last_cb_index,
     const CoreRangeSet &new_ranges) :
-    core_ranges({}) {
-
+    core_ranges(CoreRangeSet()) {
     this->programmable_core_type_index = programmable_core_type_index;
     this->core_ranges = this->core_ranges.merge(new_ranges);
     this->kernel_ids = kernel_ids;
@@ -664,9 +666,9 @@ void Program::populate_dispatch_data(Device *device) {
     };
 
     auto extract_dst_noc_unicast_info =
-        [&device](const std::set<CoreRange> &ranges, const CoreType core_type) -> std::vector<pair<transfer_info_cores, uint32_t>> {
+        [&device](const auto &ranges, const CoreType core_type) -> std::vector<std::pair<transfer_info_cores, uint32_t>> {
         // This API extracts all the pairs of noc multicast encodings given a set of core ranges
-        vector<pair<transfer_info_cores, uint32_t>> dst_noc_unicast_info;
+        vector<std::pair<transfer_info_cores, uint32_t>> dst_noc_unicast_info;
         for (const CoreRange &core_range : ranges) {
             for (auto x = core_range.start_coord.x; x <= core_range.end_coord.x; x++) {
                 for (auto y = core_range.start_coord.y; y <= core_range.end_coord.y; y++) {
@@ -686,8 +688,8 @@ void Program::populate_dispatch_data(Device *device) {
         // TODO: use semaphore.core_type from main
         if (semaphore.core_type() == CoreType::WORKER) {
             uint32_t index = hal.get_programmable_core_type_index(HalProgrammableCoreType::TENSIX);
-            vector<pair<transfer_info_cores, uint32_t>> dst_noc_multicast_info =
-                device->extract_dst_noc_multicast_info<std::set<CoreRange>>(
+            vector<std::pair<transfer_info_cores, uint32_t>> dst_noc_multicast_info =
+                device->extract_dst_noc_multicast_info<std::vector<CoreRange>>(
                     semaphore.core_range_set().ranges(), CoreType::WORKER);
             transfer_info transfer_info = {
                 .dst_base_addr = semaphore.offset(),
@@ -698,7 +700,7 @@ void Program::populate_dispatch_data(Device *device) {
         } else if (semaphore.core_type() == CoreType::ETH) {
             // TODO: we only fast dispatch to active eth...
             uint32_t index = hal.get_programmable_core_type_index(HalProgrammableCoreType::ACTIVE_ETH);
-            vector<pair<transfer_info_cores, uint32_t>> dst_noc_unicast_info =
+            vector<std::pair<transfer_info_cores, uint32_t>> dst_noc_unicast_info =
                 extract_dst_noc_unicast_info(semaphore.core_range_set().ranges(), CoreType::ETH);
             transfer_info transfer_info = {
                 .dst_base_addr = semaphore.offset(),
@@ -796,8 +798,8 @@ void Program::populate_dispatch_data(Device *device) {
         for (KernelGroup &kernel_group : this->get_kernel_groups(index)) {
             // TODO: add a bit in the hal that says if this core type is unicast/multicast
             if (core_type == CoreType::WORKER) {
-                std::vector<pair<transfer_info_cores, uint32_t>> dst_noc_multicast_info =
-                    device->extract_dst_noc_multicast_info<std::set<CoreRange>>(
+                std::vector<std::pair<transfer_info_cores, uint32_t>> dst_noc_multicast_info =
+                    device->extract_dst_noc_multicast_info<std::vector<CoreRange>>(
                         kernel_group.core_ranges.ranges(), core_type);
 
                 vector<KernelHandle> kernel_ids;
@@ -815,7 +817,7 @@ void Program::populate_dispatch_data(Device *device) {
                 }
             } else {
                 TT_ASSERT(core_type == CoreType::ETH);
-                vector<pair<transfer_info_cores, uint32_t>> dst_noc_unicast_info =
+                vector<std::pair<transfer_info_cores, uint32_t>> dst_noc_unicast_info =
                     extract_dst_noc_unicast_info(kernel_group.core_ranges.ranges(), core_type);
 
                 vector<KernelHandle> kernel_ids;
diff --git a/tt_metal/llrt/tlb_config.cpp b/tt_metal/llrt/tlb_config.cpp
index c912d9b2a10..b3deadbc88b 100644
--- a/tt_metal/llrt/tlb_config.cpp
+++ b/tt_metal/llrt/tlb_config.cpp
@@ -7,6 +7,7 @@
 #include "third_party/umd/device/blackhole/blackhole_implementation.h"
 #include "third_party/umd/device/grayskull/grayskull_implementation.h"
 #include "third_party/umd/device/wormhole/wormhole_implementation.h"
+#include "tt_metal/common/assert.hpp"
 
 namespace ll_api {
 
diff --git a/tt_metal/tt_metal.cpp b/tt_metal/tt_metal.cpp
index f3276b74a24..e057ae5da3f 100644
--- a/tt_metal/tt_metal.cpp
+++ b/tt_metal/tt_metal.cpp
@@ -40,10 +40,10 @@ CoreRangeSet GetCoreRangeSet(const std::variant<CoreCoord, CoreRange, CoreRangeS
         {
             using T = std::decay_t<decltype(core_spec)>;
             if constexpr (std::is_same_v<T, CoreCoord>) {
-                return CoreRangeSet({CoreRange(core_spec, core_spec)});
+                return CoreRangeSet(CoreRange(core_spec, core_spec));
             }
             else if constexpr (std::is_same_v<T, CoreRange>) {
-                return CoreRangeSet({core_spec});
+                return CoreRangeSet(core_spec);
             }
             else if constexpr (std::is_same_v<T, CoreRangeSet>) {
                 return core_spec;
@@ -1060,9 +1060,9 @@ uint32_t CreateSemaphore(
     return std::visit(
         [&](auto &&c) -> uint32_t {
             using T = std::decay_t<decltype(c)>;
-            CoreRangeSet crs({});
+            CoreRangeSet crs;
             if constexpr (std::is_same_v<T, CoreRange>) {
-                crs = CoreRangeSet({c});
+                crs = CoreRangeSet(c);
             } else {
                 crs = c;
             }
diff --git a/ttnn/cpp/ttnn/operations/ccl/reduce_scatter/device/host/reduce_scatter_full_worker_grid.cpp b/ttnn/cpp/ttnn/operations/ccl/reduce_scatter/device/host/reduce_scatter_full_worker_grid.cpp
index 7c8cbda4539..06cd1494dd2 100644
--- a/ttnn/cpp/ttnn/operations/ccl/reduce_scatter/device/host/reduce_scatter_full_worker_grid.cpp
+++ b/ttnn/cpp/ttnn/operations/ccl/reduce_scatter/device/host/reduce_scatter_full_worker_grid.cpp
@@ -336,8 +336,10 @@ static std::pair<CoreRangeSet, std::optional<CoreRangeSet>> select_worker_cores_
 
     TT_ASSERT(num_edm_channels % 2 == 0, "For line topologies, we expect a multiple of 2 number of channels for the algorithm and worker kernels to work.");
     const std::size_t workers_per_direction = num_edm_channels / num_directions_per_line;
-    auto const& lower_half_of_cores = CoreRangeSet({CoreRange(CoreCoord(0, 0), CoreCoord(workers_per_direction - 1, num_links - 1))});
-    auto const& upper_half_of_cores = CoreRangeSet({CoreRange(CoreCoord(workers_per_direction, 0), CoreCoord(num_edm_channels - 1, num_links - 1))});
+    auto const& lower_half_of_cores =
+        CoreRangeSet(CoreRange(CoreCoord(0, 0), CoreCoord(workers_per_direction - 1, num_links - 1)));
+    auto const& upper_half_of_cores = CoreRangeSet(
+        CoreRange(CoreCoord(workers_per_direction, 0), CoreCoord(num_edm_channels - 1, num_links - 1)));
     if (topology_config.ring_index == 0) {
         log_trace(tt::LogOp, "Start of line, putting CCL send cores in lower half");
         return {upper_half_of_cores, lower_half_of_cores};
@@ -348,7 +350,9 @@ static std::pair<CoreRangeSet, std::optional<CoreRangeSet>> select_worker_cores_
         return {lower_half_of_cores, upper_half_of_cores};
     } else {
         log_trace(tt::LogOp, "Middle of line - no CCL kernel");
-        return {CoreRangeSet({CoreRange(CoreCoord(0, 0), CoreCoord(num_edm_channels - 1, num_links - 1))}), std::nullopt};
+        return {
+            CoreRangeSet(CoreRange(CoreCoord(0, 0), CoreCoord(num_edm_channels - 1, num_links - 1))),
+            std::nullopt};
     }
 }
 
@@ -376,9 +380,11 @@ static std::pair<CoreRangeSet, std::optional<CoreRangeSet>> select_worker_cores(
         }
 
         case ttnn::ccl::Topology::Ring:
-            return {CoreRangeSet({CoreRange(CoreCoord(0, 0), CoreCoord(num_edm_channels - 1, num_links - 1))}), std::nullopt};
+            return {
+                CoreRangeSet(CoreRange(CoreCoord(0, 0), CoreCoord(num_edm_channels - 1, num_links - 1))),
+                std::nullopt};
 
-        default: TT_ASSERT(false, "Unsupported topology"); return {CoreRangeSet({}), std::nullopt};
+        default: TT_ASSERT(false, "Unsupported topology"); return {CoreRangeSet(), std::nullopt};
     };
 }
 
diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp
index 2c461b2f580..b32bea449e2 100644
--- a/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp
+++ b/ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp
@@ -94,7 +94,7 @@ ParallelConfig determine_parallel_config(
     auto grid_size = device->compute_with_storage_grid_size();
     uint32_t max_num_cores = grid_size.x * grid_size.y;
     uint32_t num_cores_nhw = 0;
-    CoreRangeSet grid = {{}};
+    CoreRangeSet grid;
     if (shard_layout == TensorMemoryLayout::HEIGHT_SHARDED) {
         num_cores_nhw = find_closest_largest_divisor(out_nhw_ntiles, max_num_cores);
         if (num_cores_nhw < grid_size.x && out_nhw_ntiles > grid_size.x) {
diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.cpp
index 692e015c546..6d68ddecc5c 100644
--- a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.cpp
+++ b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.cpp
@@ -24,7 +24,7 @@ namespace optimized_conv_op_utils {
 using namespace tt;
 using namespace tt::tt_metal;
 
-pair<vector<uint32_t>, vector<uint32_t>> compute_opt_conv_activation_as_mm_shape(const tt::tt_metal::LegacyShape& conv_activation_shape, ttnn::operations::sliding_window::SlidingWindowConfig sliding_window_config, uint32_t act_block_h_ntiles) {
+std::pair<vector<uint32_t>, vector<uint32_t>> compute_opt_conv_activation_as_mm_shape(const tt::tt_metal::LegacyShape& conv_activation_shape, ttnn::operations::sliding_window::SlidingWindowConfig sliding_window_config, uint32_t act_block_h_ntiles) {
 
     uint32_t filter_h = (uint32_t)sliding_window_config.window_hw.first;  // filter_h
     uint32_t filter_w = (uint32_t)sliding_window_config.window_hw.second;  // filter_W
@@ -190,7 +190,8 @@ std::vector<Tensor> OptimizedConvNew::create_output_tensors(const std::vector<Te
             log_debug(tt::LogOp, "Parallelization config grid size: {}", this->parallelization_config.grid_size.str());
             uint32_t num_cores_x = this->parallelization_config.grid_size.x;
             uint32_t num_cores_y = this->parallelization_config.grid_size.y;
-            CoreRangeSet shard_grid = CoreRangeSet({{{0, 0}, {num_cores_x - 1, num_cores_y - 1}}});
+            CoreRangeSet shard_grid =
+                CoreRangeSet(CoreRange({0, 0}, {num_cores_x - 1, num_cores_y - 1}));
             log_debug(tt::LogOp, "Calculated shard_grid: {}", shard_grid.str());
             std::array<uint32_t, 2> shard_shape = {this->parallelization_config.per_core_out_matrix_height_ntiles * TILE_HEIGHT, this->parallelization_config.per_core_out_matrix_width_ntiles * TILE_WIDTH};
             auto shard_spec = ShardSpec{shard_grid, shard_shape, this->memory_config.shard_spec.value().orientation};
diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.hpp b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.hpp
index cf938e1da13..a22885832a8 100644
--- a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.hpp
+++ b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.hpp
@@ -172,6 +172,6 @@ using namespace tt;
 using namespace tt::tt_metal;
 
 
-pair<vector<uint32_t>, vector<uint32_t>> compute_opt_conv_activation_as_mm_shape(const tt::tt_metal::LegacyShape& conv_activation_shape, ttnn::operations::sliding_window::SlidingWindowConfig sliding_window_config, uint32_t act_block_h_ntiles);
+std::pair<vector<uint32_t>, vector<uint32_t>> compute_opt_conv_activation_as_mm_shape(const tt::tt_metal::LegacyShape& conv_activation_shape, ttnn::operations::sliding_window::SlidingWindowConfig sliding_window_config, uint32_t act_block_h_ntiles);
 
 } // optimized_conv_op_utils
diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_sharded_program_factory.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_sharded_program_factory.cpp
index a8a7b9e4714..d60aaf02115 100644
--- a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_sharded_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_sharded_program_factory.cpp
@@ -862,7 +862,7 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_v2_impl(
     auto bottom_right_core_physical = device->worker_core_from_logical_core(bottom_right_core);
 
     CoreRange mcast_sender_cores(top_left_core, top_left_core);  // If single core, this kernel doesn't do mcasting
-    CoreRangeSet mcast_receiver_cores{{}};
+    CoreRangeSet mcast_receiver_cores;
     uint32_t weights_mcast_sender_semaphore_id{};
     uint32_t weights_mcast_receiver_semaphore_id{};
     uint32_t act_mcast_sender_semaphore_id = 0;
@@ -872,10 +872,10 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_v2_impl(
         // 2D mcast
         if (transpose_mcast) {
             mcast_sender_cores = CoreRange(top_left_core, CoreCoord(0, num_cores_y - 1));
-            mcast_receiver_cores = {{CoreRange(CoreCoord(1, 0), bottom_right_core)}};
+            mcast_receiver_cores = CoreRange(CoreCoord(1, 0), bottom_right_core);
         } else {
             mcast_sender_cores = CoreRange(top_left_core, CoreCoord(num_cores_x - 1, 0));
-            mcast_receiver_cores = {{CoreRange(CoreCoord(0, 1), bottom_right_core)}};
+            mcast_receiver_cores = CoreRange(CoreCoord(0, 1), bottom_right_core);
         }
         weights_mcast_sender_semaphore_id = tt_metal::CreateSemaphore(program, all_cores, INVALID);
         weights_mcast_receiver_semaphore_id = tt_metal::CreateSemaphore(program, all_cores, INVALID);
diff --git a/ttnn/cpp/ttnn/operations/data_movement/bcast/device/bcast_device_operation.cpp b/ttnn/cpp/ttnn/operations/data_movement/bcast/device/bcast_device_operation.cpp
index 2f890e9bb71..3c38f88ed64 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/bcast/device/bcast_device_operation.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/bcast/device/bcast_device_operation.cpp
@@ -124,7 +124,7 @@ std::vector<Tensor> EltwiseBinaryBroadcast::create_output_tensors(const std::vec
     }
     const auto& input_tensor = input_tensors.at(0);
     if (this->output_mem_config.is_sharded()) {
-        ShardSpec shard_spec{CoreRangeSet({}), {0, 0}};
+        ShardSpec shard_spec{CoreRangeSet(), {0, 0}};
         if (input_tensor.memory_config().is_sharded()) {
             // Derive output shard_spec based on input
             shard_spec = input_tensor.shard_spec().value();
diff --git a/ttnn/cpp/ttnn/operations/data_movement/bcast/device/multi_core_hw/bcast_op_multi_core_hw.cpp b/ttnn/cpp/ttnn/operations/data_movement/bcast/device/multi_core_hw/bcast_op_multi_core_hw.cpp
index aac9e4d8e4c..7b684507703 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/bcast/device/multi_core_hw/bcast_op_multi_core_hw.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/bcast/device/multi_core_hw/bcast_op_multi_core_hw.cpp
@@ -81,7 +81,7 @@ operation::ProgramWithCallbacks bcast_multi_core_hw(const Tensor &a, const Tenso
         num_tiles_per_core_group_2 = 0;
         all_cores = shard_spec.value().grid;
         core_group_1 = all_cores;
-        core_group_2 = CoreRangeSet({});
+        core_group_2 = CoreRangeSet();
     }
 
     uint32_t num_input_tiles_cb0 = src0_sharded ? num_tiles_per_shard : num_input_tiles;
@@ -270,7 +270,7 @@ operation::ProgramWithCallbacks bcast_multi_core_hw(const Tensor &a, const Tenso
             num_tiles_per_core_group_2 = 0;
             all_cores = shard_spec.value().grid;
             core_group_1 = all_cores;
-            core_group_2 = CoreRangeSet({});
+            core_group_2 = CoreRangeSet();
         }
 
         auto& cached_reader_args = GetRuntimeArgs(program, binary_reader_kernel_id);
diff --git a/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/interleaved_to_sharded.cpp b/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/interleaved_to_sharded.cpp
index 8d2a175fd6e..346e55d3820 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/interleaved_to_sharded.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/interleaved_to_sharded.cpp
@@ -41,7 +41,7 @@ ttnn::Tensor InterleavedToShardedOperation::invoke(
 
     bool row_wise = shard_orientation == ShardOrientation::ROW_MAJOR;
     CoreCoord grid_size;
-    CoreRangeSet grid_set({});
+    CoreRangeSet grid_set;
     std::visit(
         [&](const auto &grid) {
             using GridType = std::decay_t<decltype(grid)>;
diff --git a/ttnn/cpp/ttnn/operations/data_movement/sharded_partial/interleaved_to_sharded_partial/interleaved_to_sharded_partial.cpp b/ttnn/cpp/ttnn/operations/data_movement/sharded_partial/interleaved_to_sharded_partial/interleaved_to_sharded_partial.cpp
index 5ad85a81e03..297f7872d48 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/sharded_partial/interleaved_to_sharded_partial/interleaved_to_sharded_partial.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/sharded_partial/interleaved_to_sharded_partial/interleaved_to_sharded_partial.cpp
@@ -29,7 +29,7 @@ ttnn::Tensor InterleavedToShardedPartialOperation::invoke(
 
     bool row_wise = shard_orientation == ShardOrientation::ROW_MAJOR;
     CoreCoord grid_size;
-    CoreRangeSet grid_set({});
+    CoreRangeSet grid_set;
     std::visit(
         [&](const auto &grid) {
             using GridType = std::decay_t<decltype(grid)>;
diff --git a/ttnn/cpp/ttnn/operations/data_movement/untilize/device/untilize_program_factory.cpp b/ttnn/cpp/ttnn/operations/data_movement/untilize/device/untilize_program_factory.cpp
index 1570010fae0..2d9c90c2eb8 100644
--- a/ttnn/cpp/ttnn/operations/data_movement/untilize/device/untilize_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/data_movement/untilize/device/untilize_program_factory.cpp
@@ -328,7 +328,7 @@ operation::ProgramWithCallbacks untilize_multi_core(
         uint32_t num_cores = all_cores.num_cores();
         ncores = num_cores;
         core_range = all_cores;
-        core_range_cliff = CoreRangeSet({});
+        core_range_cliff = CoreRangeSet();
         ntiles_per_block = shard_spec.shape[1] / TILE_WIDTH;
         nblocks_per_core = shard_spec.shape[0] / TILE_HEIGHT;
         nblocks_per_core_cliff = 0;
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_device_operation.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_device_operation.cpp
index 5ff657eaf8c..e42f7e72d70 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_device_operation.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/binary_device_operation.cpp
@@ -204,7 +204,7 @@ BinaryDeviceOperation::tensor_return_value_t BinaryDeviceOperation::create_outpu
     auto program_factory = select_program_factory(operation_attributes, tensor_args);
     if (std::holds_alternative<ElementWiseMultiCore>(program_factory)) {
         if (operation_attributes.memory_config.is_sharded()) {
-            ShardSpec shard_spec{CoreRangeSet({}), {0, 0}};
+            ShardSpec shard_spec{CoreRangeSet(), {0, 0}};
             if (input_tensor_a.memory_config().is_sharded()) {
                 shard_spec = input_tensor_a.shard_spec().value();
             } else if (input_tensor_b.memory_config().is_sharded()) {
@@ -219,7 +219,7 @@ BinaryDeviceOperation::tensor_return_value_t BinaryDeviceOperation::create_outpu
         }
     } else {
         if (operation_attributes.memory_config.is_sharded()) {
-            ShardSpec shard_spec{CoreRangeSet({}), {0, 0}};
+            ShardSpec shard_spec{CoreRangeSet(), {0, 0}};
             if (input_tensor_a.memory_config().is_sharded()) {
                 // Derive output shard_spec based on input
                 shard_spec = input_tensor_a.shard_spec().value();
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_and_width_multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_and_width_multi_core_program_factory.cpp
index b94004b4dd7..a964c226bd6 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_and_width_multi_core_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/broadcast_height_and_width_multi_core_program_factory.cpp
@@ -111,7 +111,7 @@ BinaryDeviceOperation::BroadcastHeightAndWidthMultiCore::create(
         num_tiles_per_core_group_2 = 0;
         all_cores = shard_spec.value().grid;
         core_group_1 = all_cores;
-        core_group_2 = CoreRangeSet({});
+        core_group_2 = CoreRangeSet();
     }
 
     uint32_t num_input_tiles_cb0 = src0_sharded ? num_tiles_per_shard : num_input_tiles;
@@ -319,7 +319,7 @@ void BinaryDeviceOperation::BroadcastHeightAndWidthMultiCore::override_runtime_a
         num_tiles_per_core_group_2 = 0;
         all_cores = shard_spec.value().grid;
         core_group_1 = all_cores;
-        core_group_2 = CoreRangeSet({});
+        core_group_2 = CoreRangeSet();
     }
 
     auto& cached_reader_args = GetRuntimeArgs(program, binary_reader_kernel_id);
diff --git a/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp
index 40ad1a58097..eb57eb345b9 100644
--- a/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/eltwise/binary/device/element_wise_multi_core_program_factory.cpp
@@ -39,7 +39,7 @@ inline __attribute__((always_inline)) void set_eltwise_binary_runtime_args(
     auto src_buffer_b = b.buffer();
     auto dst_buffer = output.buffer();
 
-    CoreRangeSet all_cores({}), core_group_1({}), core_group_2({});
+    CoreRangeSet all_cores, core_group_1, core_group_2;
 
     std::optional<ShardSpec> shard_spec = std::nullopt;
     std::optional<TensorMemoryLayout> sharded_layout = std::nullopt;
@@ -83,7 +83,7 @@ inline __attribute__((always_inline)) void set_eltwise_binary_runtime_args(
         all_cores = shard_spec.value().grid;
         num_cores = all_cores.num_cores();
         core_group_1 = all_cores;
-        core_group_2 = CoreRangeSet({});
+        core_group_2 = CoreRangeSet();
         num_tiles_per_core_group_1 = shard_spec.value().shape[0] * shard_spec.value().shape[1] / TILE_HW;
         num_tiles_per_core_group_2 = 0;
         block_size_per_core_group_1 = find_max_block_size(num_tiles_per_core_group_1);
diff --git a/ttnn/cpp/ttnn/operations/experimental/paged_cache/device/paged_fill_cache_program_factory.cpp b/ttnn/cpp/ttnn/operations/experimental/paged_cache/device/paged_fill_cache_program_factory.cpp
index 7a39357cfb5..bf65a8965b3 100644
--- a/ttnn/cpp/ttnn/operations/experimental/paged_cache/device/paged_fill_cache_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/experimental/paged_cache/device/paged_fill_cache_program_factory.cpp
@@ -57,7 +57,7 @@ operation::ProgramWithCallbacks paged_fill_cache_multi_core(const Tensor& cache_
     bool row_major;
     uint32_t num_cores, num_blocks_per_core_group_1, num_blocks_per_core_group_2;
 
-    CoreRangeSet all_cores({}), core_group_1({}), core_group_2({});
+    CoreRangeSet all_cores, core_group_1, core_group_2;
 
     row_major = true;
     std::tie(num_cores, all_cores, core_group_1, core_group_2, num_blocks_per_core_group_1, num_blocks_per_core_group_2) = tt::tt_metal::split_work_to_cores(compute_with_storage_grid_size,  num_blocks_of_work, row_major);
diff --git a/ttnn/cpp/ttnn/operations/experimental/transformer/nlp_concat_heads/device/nlp_concat_heads_program_factory.cpp b/ttnn/cpp/ttnn/operations/experimental/transformer/nlp_concat_heads/device/nlp_concat_heads_program_factory.cpp
index 8e63c55b980..4f83def6d87 100644
--- a/ttnn/cpp/ttnn/operations/experimental/transformer/nlp_concat_heads/device/nlp_concat_heads_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/experimental/transformer/nlp_concat_heads/device/nlp_concat_heads_program_factory.cpp
@@ -45,7 +45,7 @@ operation::ProgramWithCallbacks multi_core_nlp_concat_heads(const Tensor &a, Ten
     // Block is a unit of work; ie. num of per_tensor_tiles per core
     uint32_t num_blocks = ashape[0] * ashape[2] / TILE_HEIGHT;
     uint32_t num_cores = 0, num_blocks_per_core_group_1 = 0, num_blocks_per_core_group_2 = 0;
-    CoreRangeSet all_cores = CoreRangeSet({}), core_group_1 = CoreRangeSet({}), core_group_2 = CoreRangeSet({});
+    CoreRangeSet all_cores = CoreRangeSet(), core_group_1 = CoreRangeSet(), core_group_2 = CoreRangeSet();
     bool row_major = false;
     if (in_sharded) {
         all_cores = a.shard_spec().value().grid;
diff --git a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding/device/rotary_embedding_device_operation.cpp b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding/device/rotary_embedding_device_operation.cpp
index 1fa6578efcc..c174d458fdf 100644
--- a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding/device/rotary_embedding_device_operation.cpp
+++ b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding/device/rotary_embedding_device_operation.cpp
@@ -72,7 +72,7 @@ std::vector<Tensor> RotaryEmbedding::create_output_tensors(const std::vector<Ten
     const auto& input_tensor = input_tensors.at(0);
     auto output_shape = this->compute_output_shapes(input_tensors)[0];
     if (this->output_mem_config.is_sharded()) {
-        ShardSpec shard_spec{CoreRangeSet({}), {0, 0}};
+        ShardSpec shard_spec{CoreRangeSet(), {0, 0}};
         if (input_tensor.is_sharded()) {
             shard_spec = input_tensor.shard_spec().value();
         } else {
diff --git a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding/device/rotary_embedding_program_factory.cpp b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding/device/rotary_embedding_program_factory.cpp
index faeb4eefe39..f9ed0492f56 100644
--- a/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding/device/rotary_embedding_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/experimental/transformer/rotary_embedding/device/rotary_embedding_program_factory.cpp
@@ -64,7 +64,7 @@ operation::ProgramWithCallbacks rotary_embedding_multi_core(
     bool row_major;
     uint32_t num_cores, num_rows_per_core_group_1, num_rows_per_core_group_2;
 
-    CoreRangeSet all_cores({}), core_group_1({}), core_group_2({});
+    CoreRangeSet all_cores, core_group_1, core_group_2;
 
     bool in_sharded = input.shard_spec().has_value();
     bool out_sharded = output.shard_spec().has_value();
@@ -77,7 +77,7 @@ operation::ProgramWithCallbacks rotary_embedding_multi_core(
         all_cores = shard_spec.value().grid;
         num_cores = all_cores.num_cores();
         core_group_1 = all_cores;
-        core_group_2 = CoreRangeSet({});
+        core_group_2 = CoreRangeSet();
         num_rows_per_core_group_1 = shard_spec.value().shape[0] / TILE_HEIGHT;
         num_rows_per_core_group_2 = 0;
         num_input_tiles =
diff --git a/ttnn/cpp/ttnn/operations/kv_cache/device/update_cache_op_multi_core.cpp b/ttnn/cpp/ttnn/operations/kv_cache/device/update_cache_op_multi_core.cpp
index 95aaf14358a..4e7719e53ef 100644
--- a/ttnn/cpp/ttnn/operations/kv_cache/device/update_cache_op_multi_core.cpp
+++ b/ttnn/cpp/ttnn/operations/kv_cache/device/update_cache_op_multi_core.cpp
@@ -63,7 +63,7 @@ operation::ProgramWithCallbacks update_cache_multi_core(const Tensor& cache_tens
     bool row_major;
     uint32_t num_cores, num_batched_heads_per_core_group_1, num_batched_heads_per_core_group_2;
 
-    CoreRangeSet all_cores({}), core_group_1({}), core_group_2({});
+    CoreRangeSet all_cores, core_group_1, core_group_2;
 
     std::optional<ShardSpec> shard_spec = input_tensor.shard_spec();
 
@@ -73,7 +73,7 @@ operation::ProgramWithCallbacks update_cache_multi_core(const Tensor& cache_tens
         all_cores = shard_spec.value().grid;
         num_cores = all_cores.num_cores();
         core_group_1 = all_cores;
-        core_group_2 = CoreRangeSet({});
+        core_group_2 = CoreRangeSet();
         num_batched_heads_per_core_group_1 = shard_spec.value().shape[0] / TILE_HEIGHT;
         num_batched_heads_per_core_group_2 = 0;
         num_input_tiles = shard_spec.value().shape[0] * shard_spec.value().shape[1] / TILE_HW;
@@ -324,7 +324,7 @@ operation::ProgramWithCallbacks fill_cache_multi_core(const Tensor& cache_tensor
     bool row_major;
     uint32_t num_cores, num_blocks_per_core_group_1, num_blocks_per_core_group_2;
 
-    CoreRangeSet all_cores({}), core_group_1({}), core_group_2({});
+    CoreRangeSet all_cores, core_group_1, core_group_2;
 
     std::optional<ShardSpec> shard_spec = input_tensor.shard_spec();
 
@@ -334,7 +334,7 @@ operation::ProgramWithCallbacks fill_cache_multi_core(const Tensor& cache_tensor
         all_cores = shard_spec.value().grid;
         num_cores = all_cores.num_cores();
         core_group_1 = all_cores;
-        core_group_2 = CoreRangeSet({});
+        core_group_2 = CoreRangeSet();
         num_blocks_per_core_group_1 = shard_spec.value().shape[0] / TILE_HEIGHT;
         num_blocks_per_core_group_2 = 0;
         num_input_tiles = shard_spec.value().shape[0] * shard_spec.value().shape[1] / TILE_HW;
diff --git a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op.cpp b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op.cpp
index 8414733611b..764ab25f3be 100644
--- a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op.cpp
+++ b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op.cpp
@@ -1421,7 +1421,7 @@ std::vector<Tensor> Matmul::create_output_tensors(const std::vector<Tensor>& inp
                     uint32_t num_blocks_x = (N - 1) / per_core_N + 1;
                     uint32_t num_blocks_total = num_blocks_y * num_blocks_x;
                     uint32_t num_cores = num_blocks_x * num_blocks_y;
-                    CoreRangeSet all_cores({});
+                    CoreRangeSet all_cores;
                     ShardOrientation shard_orientation;
                     if (program_config.transpose_mcast) {
                         all_cores = CoreRangeSet({CoreRange({0, 0}, {num_blocks_y - 1, num_blocks_x - 1})});
diff --git a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_mcast_1d_program_factory.cpp b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_mcast_1d_program_factory.cpp
index 23db6670f04..a292a31be18 100644
--- a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_mcast_1d_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_mcast_1d_program_factory.cpp
@@ -146,10 +146,10 @@ operation::ProgramWithCallbacks create_program_mcast_in0(
         in0_mcast_receiver_num_cores,
         num_cores);  // should always be number of cores in receiver grid up to number of active cores
 
-    CoreRangeSet in0_mcast_cores_with_work_and_in_receiver_grid({});
-    CoreRangeSet in0_mcast_cores_without_work_and_in_receiver_grid({});
-    CoreRangeSet in0_mcast_cores_without_work_and_not_in_receiver_grid({});
-    CoreRangeSet in0_mcast_receivers({});
+    CoreRangeSet in0_mcast_cores_with_work_and_in_receiver_grid;
+    CoreRangeSet in0_mcast_cores_without_work_and_in_receiver_grid;
+    CoreRangeSet in0_mcast_cores_without_work_and_not_in_receiver_grid;
+    CoreRangeSet in0_mcast_receivers;
     std::vector<uint32_t> in0_mcast_noc_x;
     std::vector<uint32_t> in0_mcast_noc_y;
     if (in0_is_sharded) {
@@ -987,7 +987,7 @@ operation::ProgramWithCallbacks create_program_mcast_in1(
     uint32_t in1_mcast_receiver_num_cores = in1_mcast_receiver_cores_bounding_box.size();  // always mcast to full grid
 
     CoreRange in1_mcast_sender(start_core, start_core);
-    CoreRangeSet in1_mcast_receivers({});
+    CoreRangeSet in1_mcast_receivers;
     if (in1_mcast_receiver_num_cores > 1) {
         auto receiver_start_core = start_core.x != (compute_with_storage_grid_size.x - 1)
                                        ? CoreCoord{start_core.x + 1, start_core.y}
diff --git a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_mcast_dram_sharded_program_factory.cpp b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_mcast_dram_sharded_program_factory.cpp
index 240f9aff9a0..96d914fbff2 100644
--- a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_mcast_dram_sharded_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_mcast_dram_sharded_program_factory.cpp
@@ -459,7 +459,7 @@ operation::ProgramWithCallbacks create_program_dram_sharded(
     tt_metal::Program program{};
 
     // get the dram readers
-    CoreRangeSet all_worker_cores = CoreRangeSet{{}};
+    CoreRangeSet all_worker_cores;
     std::vector<CoreCoord> all_worker_cores_ordered;
 
     if (device->arch() == tt::ARCH::WORMHOLE_B0) {
@@ -1031,7 +1031,8 @@ operation::ProgramWithCallbacks create_program_dram_sharded(
     for (uint32_t i = 0; i < all_cores_in_rect_grid_vec.size(); ++i) {
         auto core = all_cores_in_rect_grid_vec[i];
 
-        if (all_worker_cores.ranges().find(core) == all_worker_cores.ranges().end()) {  // not worker
+        if (std::find(all_worker_cores.ranges().begin(), all_worker_cores.ranges().end(), core) ==
+            all_worker_cores.ranges().end()) {  // not worker
             // in1 reader rt args
             bool is_worker_core = false;
             std::vector<uint32_t> mm_in1_sender_writer_args;
diff --git a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_optimized_program_factory.cpp b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_optimized_program_factory.cpp
index 09e649bb368..4a6fd50be09 100644
--- a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_optimized_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op_multi_core_reuse_optimized_program_factory.cpp
@@ -123,7 +123,7 @@ operation::ProgramWithCallbacks create_program(
     }
 
     uint32_t num_cores = 0, num_blocks_per_core_group_1 = 0, num_blocks_per_core_group_2 = 0;
-    CoreRangeSet all_cores({}), core_group_1({}), core_group_2({});
+    CoreRangeSet all_cores, core_group_1, core_group_2;
 
     if (shard_spec.has_value()) {
         all_cores = shard_spec.value().grid;
diff --git a/ttnn/cpp/ttnn/operations/normalization/layernorm/device/multi_core/layernorm_op_multi_core.cpp b/ttnn/cpp/ttnn/operations/normalization/layernorm/device/multi_core/layernorm_op_multi_core.cpp
index cf5db0c6d72..392542fdaad 100644
--- a/ttnn/cpp/ttnn/operations/normalization/layernorm/device/multi_core/layernorm_op_multi_core.cpp
+++ b/ttnn/cpp/ttnn/operations/normalization/layernorm/device/multi_core/layernorm_op_multi_core.cpp
@@ -604,9 +604,9 @@ operation::ProgramWithCallbacks layernorm_multi_core_sharded(
     CoreCoord start_core = {0, 0};
     CoreRangeSet all_cores = shard_spec.grid;
     CoreRange sender_cores(start_core, start_core);
-    CoreRangeSet all_to_all_cores({});
-    CoreRangeSet all_to_all_workers_except_sender({});
-    CoreRangeSet not_all_to_all_workers({});
+    CoreRangeSet all_to_all_cores;
+    CoreRangeSet all_to_all_workers_except_sender;
+    CoreRangeSet not_all_to_all_workers;
     uint32_t num_cores_x_mcast, num_cores_y_mcast;
     if (mcast_1d) {
         sender_cores = {start_core, start_core};
@@ -707,18 +707,19 @@ operation::ProgramWithCallbacks layernorm_multi_core_sharded(
             sender_cores = {
                 {(std::size_t) start_core.x, (std::size_t) start_core.y},
                 {(std::size_t) start_core.x, (std::size_t) start_core.y + num_cores_y - 1}};
-            all_to_all_cores = CoreRangeSet({CoreRange(
-                {(std::size_t) start_core.x, (std::size_t) start_core.y},
-                {(std::size_t) start_core.x + num_cores_all_to_all - 1, (std::size_t) start_core.y + num_cores_y - 1})});
+            all_to_all_cores = CoreRangeSet(CoreRange(
+                {(std::size_t)start_core.x, (std::size_t)start_core.y},
+                {(std::size_t)start_core.x + num_cores_all_to_all - 1, (std::size_t)start_core.y + num_cores_y - 1}));
             if (use_mcast && num_cores_all_to_all > 1) {
-                all_to_all_workers_except_sender = CoreRangeSet({CoreRange(
-                    {(std::size_t) start_core.x + 1, (std::size_t) start_core.y},
-                    {(std::size_t) start_core.x + num_cores_all_to_all - 1, (std::size_t) start_core.y + num_cores_y - 1})});
+                all_to_all_workers_except_sender = CoreRangeSet(CoreRange(
+                    {(std::size_t)start_core.x + 1, (std::size_t)start_core.y},
+                    {(std::size_t)start_core.x + num_cores_all_to_all - 1,
+                     (std::size_t)start_core.y + num_cores_y - 1}));
             }
             if (num_none_all_to_all_workers > 0) {
-                not_all_to_all_workers = CoreRangeSet({CoreRange(
-                    {(std::size_t) start_core.x + num_cores_all_to_all, (std::size_t) start_core.y},
-                    {(std::size_t) start_core.x + num_cores_x - 1, (std::size_t) start_core.y + num_cores_y - 1})});
+                not_all_to_all_workers = CoreRangeSet(CoreRange(
+                    {(std::size_t)start_core.x + num_cores_all_to_all, (std::size_t)start_core.y},
+                    {(std::size_t)start_core.x + num_cores_x - 1, (std::size_t)start_core.y + num_cores_y - 1}));
             }
             num_cores_x_mcast = num_cores_x;
             num_cores_y_mcast = 1;
@@ -726,18 +727,19 @@ operation::ProgramWithCallbacks layernorm_multi_core_sharded(
             sender_cores = {
                 {(std::size_t) start_core.x, (std::size_t) start_core.y},
                 {(std::size_t) start_core.x + num_cores_x - 1, (std::size_t) start_core.y}};
-            all_to_all_cores = CoreRangeSet({CoreRange(
-                {(std::size_t) start_core.x, (std::size_t) start_core.y},
-                {(std::size_t) start_core.x + num_cores_x - 1, (std::size_t) start_core.y + num_cores_all_to_all - 1})});
+            all_to_all_cores = CoreRangeSet(CoreRange(
+                {(std::size_t)start_core.x, (std::size_t)start_core.y},
+                {(std::size_t)start_core.x + num_cores_x - 1, (std::size_t)start_core.y + num_cores_all_to_all - 1}));
             if (use_mcast && num_cores_all_to_all > 1) {
-                all_to_all_workers_except_sender = CoreRangeSet({CoreRange(
-                    {(std::size_t) start_core.x, (std::size_t) start_core.y + 1},
-                    {(std::size_t) start_core.x + num_cores_x - 1, (std::size_t) start_core.y + num_cores_all_to_all - 1})});
+                all_to_all_workers_except_sender = CoreRangeSet(CoreRange(
+                    {(std::size_t)start_core.x, (std::size_t)start_core.y + 1},
+                    {(std::size_t)start_core.x + num_cores_x - 1,
+                     (std::size_t)start_core.y + num_cores_all_to_all - 1}));
             }
             if (num_none_all_to_all_workers > 0) {
-                not_all_to_all_workers = CoreRangeSet({CoreRange(
-                    {(std::size_t) start_core.x, (std::size_t) start_core.y + num_cores_all_to_all},
-                    {(std::size_t) start_core.x + num_cores_x - 1, (std::size_t) start_core.y + num_cores_y - 1})});
+                not_all_to_all_workers = CoreRangeSet(CoreRange(
+                    {(std::size_t)start_core.x, (std::size_t)start_core.y + num_cores_all_to_all},
+                    {(std::size_t)start_core.x + num_cores_x - 1, (std::size_t)start_core.y + num_cores_y - 1}));
             }
             num_cores_x_mcast = 1;
             num_cores_y_mcast = num_cores_y;
diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool2d_multi_core_program_factory.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool2d_multi_core_program_factory.cpp
index 8e035fbe341..7c176dbf42f 100644
--- a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool2d_multi_core_program_factory.cpp
+++ b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool2d_multi_core_program_factory.cpp
@@ -88,7 +88,7 @@ MaxPool2D::MultiCore::cached_program_t max_pool_2d_multi_core_sharded_with_halo_
     auto all_cores = input.shard_spec().value().grid;
     uint32_t ncores = all_cores.num_cores();
     auto core_range = all_cores;
-    auto core_range_cliff = CoreRangeSet({});
+    auto core_range_cliff = CoreRangeSet();
     uint32_t in_nhw_per_core = input.shard_spec()->shape[0];
     uint32_t in_nhw_per_core_cliff = 0;
     uint32_t out_nhw_per_core = output.shard_spec()->shape[0];
diff --git a/ttnn/cpp/ttnn/operations/reduction/generic/device/multi_core_h/reduce_op_multi_core_h.cpp b/ttnn/cpp/ttnn/operations/reduction/generic/device/multi_core_h/reduce_op_multi_core_h.cpp
index 3ad22e3eef1..1a5fd46bce4 100644
--- a/ttnn/cpp/ttnn/operations/reduction/generic/device/multi_core_h/reduce_op_multi_core_h.cpp
+++ b/ttnn/cpp/ttnn/operations/reduction/generic/device/multi_core_h/reduce_op_multi_core_h.cpp
@@ -57,7 +57,7 @@ operation::ProgramWithCallbacks reduce_multi_core_h(
         all_cores = a.shard_spec().value().grid;
         num_cores = all_cores.num_cores();
         core_group_1 = all_cores;
-        core_group_2 = CoreRangeSet({});
+        core_group_2 = CoreRangeSet();
         num_cols_per_core_group_1 = NC * (a.shard_spec().value().shape[1] / TILE_WIDTH);
         num_cols_per_core_group_2 = 0;
     }
diff --git a/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.hpp b/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.hpp
index fd58677d454..5a55ebbd0c5 100644
--- a/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.hpp
+++ b/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.hpp
@@ -13,7 +13,7 @@
 namespace ttnn::operations::sliding_window {
 
 struct ParallelConfig {
-    CoreRangeSet grid = {{}};
+    CoreRangeSet grid = {};
     TensorMemoryLayout shard_scheme;
     ShardOrientation shard_orientation;
 
@@ -46,7 +46,7 @@ struct SlidingWindowConfig {
     // parallel configuration
     uint32_t num_cores_nhw = 1;        // num cores along collapsed height nhw
     uint32_t num_cores_c = 1;          // num cores along width c
-    CoreRangeSet core_range_set = std::set{CoreRange({0, 0}, {0, 0})};   // active cores
+    CoreRangeSet core_range_set = CoreRangeSet(CoreRange({0, 0}, {0, 0}));   // active cores
 
     bool snap_to_tile = false;
     bool is_bilinear = false;