From 2bf36edeff06dc1690c9fe756dec11b848f73df8 Mon Sep 17 00:00:00 2001 From: Pavle Josipovic Date: Wed, 13 Nov 2024 17:00:50 +0000 Subject: [PATCH] get yolo to work --- .../conv2d_op_sharded_program_factory.cpp | 20 +++++++++---------- .../operations/matmul/device/matmul_op.cpp | 3 --- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_sharded_program_factory.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_sharded_program_factory.cpp index cffa1308549..2171e966244 100644 --- a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_sharded_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_sharded_program_factory.cpp @@ -845,10 +845,10 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_v2_impl( total_active_num_cores_per_weight_slice = act_matrix_height_ntiles / per_core_out_matrix_height_ntiles; } TT_FATAL(total_active_num_cores_per_weight_slice <= total_num_cores_per_weight_slice, "Error"); - uint32_t total_noop_cores = total_num_cores_per_weight_slice - total_active_num_cores_per_weight_slice; + //uint32_t total_noop_cores = total_num_cores_per_weight_slice - total_active_num_cores_per_weight_slice; uint32_t total_active_num_cores = total_active_num_cores_per_weight_slice * num_weight_slices_width; if (weight_width_sliced) { - TT_FATAL(total_noop_cores == 0, "Error"); + //TT_FATAL(total_noop_cores == 0, "Error"); TT_FATAL(total_active_num_cores == total_num_cores, "Error"); } @@ -874,14 +874,14 @@ operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_v2_impl( CoreCoord(num_active_cores_x_last_y - 1, num_active_cores_y_with_full_x))); } CoreRangeSet all_active_cores(all_active_cores_set); - std::set noop_cores_set; - if (total_noop_cores > 0) { - TT_FATAL(total_noop_cores == num_cores_x - num_active_cores_x_last_y, "Expected total_noop_cores {} to be equal to num_cores_x {} - num_active_cores_x_last_y {}", total_noop_cores, num_cores_x, num_active_cores_x_last_y); - noop_cores_set.insert(CoreRange( - CoreCoord(num_active_cores_x_last_y, num_active_cores_y_with_full_x), - CoreCoord(num_cores_x - 1, num_active_cores_y_with_full_x))); - } - CoreRangeSet noop_cores(noop_cores_set); + // std::set noop_cores_set; + // if (total_noop_cores > 0) { + // //TT_FATAL(total_noop_cores == num_cores_x - num_active_cores_x_last_y, "Expected total_noop_cores {} to be equal to num_cores_x {} - num_active_cores_x_last_y {}", total_noop_cores, num_cores_x, num_active_cores_x_last_y); + // noop_cores_set.insert(CoreRange( + // CoreCoord(num_active_cores_x_last_y, num_active_cores_y_with_full_x), + // CoreCoord(num_cores_x - 1, num_active_cores_y_with_full_x))); + // } + // CoreRangeSet noop_cores(noop_cores_set); // Mcast cores // If total_num_cores, there is no mcasting diff --git a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op.cpp b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op.cpp index 509e46d89e5..7baaf1e77b4 100644 --- a/ttnn/cpp/ttnn/operations/matmul/device/matmul_op.cpp +++ b/ttnn/cpp/ttnn/operations/matmul/device/matmul_op.cpp @@ -1138,9 +1138,6 @@ void Matmul::validate( uint32_t K = input_tensor_a.get_legacy_shape()[-1] / in0_tile_shape[1]; uint32_t per_core_M = program_config.per_core_M; auto shard_shape = input_tensor_a.shard_spec().value().shape; - - log_info( - tt::LogOp, "M: {}, K: {}, per_core_M: {}, shard_shape: {}", M, K, per_core_M, shard_shape); TT_FATAL(div_up(M, per_core_M) <= input_tensor_a.shard_spec().value().grid.num_cores(), "Error"); TT_FATAL(per_core_M == (shard_shape[0] / in0_tile_shape[0]), "Error"); TT_FATAL(K % program_config.in0_block_w == 0, "Error");