diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_new_conv_api.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_new_conv_api.py index 3a5c75967e9..f034f1348d5 100644 --- a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_new_conv_api.py +++ b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_new_conv_api.py @@ -8,12 +8,37 @@ is_grayskull, is_wormhole_b0, _nearest_y, + nearest_y, pad_and_fold_conv_activation_for_unity_stride, ) from typing import List from loguru import logger from tests.ttnn.utils_for_testing import assert_with_pcc + +def get_core_grid_from_num_cores(num_cores: int, grid_rows: int, grid_cols: int): + columns = num_cores // grid_rows + assert columns <= grid_cols, "Not enough cores for specified core grid" + ranges = [] + if columns != 0: + ranges.append( + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(grid_rows - 1, columns - 1), + ) + ) + remainder = num_cores % grid_rows + if remainder != 0: + assert columns + 1 <= grid_cols, "Not enough cores for specified core grid" + ranges.append( + ttnn.CoreRange( + ttnn.CoreCoord(0, columns), + ttnn.CoreCoord(remainder - 1, columns), + ) + ) + return ttnn.CoreRangeSet({*ranges}) + + hardcoded_matmul_config_linear = { 8: ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig( compute_with_storage_grid_size=(8, 4), @@ -632,15 +657,38 @@ def __init__( conv_dummy_tensor = torch.rand((self.fold_output_shape), dtype=torch.bfloat16) conv_dummy_tensor = ttnn.from_torch(conv_dummy_tensor, layout=ttnn.ROW_MAJOR_LAYOUT) - _, self.override_fold_mem_config, _, _ = ttnn.get_conv_padded_input_shape_and_mem_config( - device=device, - input_tensor=conv_dummy_tensor, - conv_config=self.conv1_config, + + parallel_config = ttnn._ttnn.operations.conv.determine_parallel_config( + shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED, batch_size=self.batch_size, - height=self.conv1_output_height, - width=self.conv1_output_width, - in_channels=self.conv1_input_channels, - out_channels=self.conv1_output_channels, + input_channels=self.conv1_input_channels, + output_height=self.conv1_output_height, + output_width=self.conv1_output_width, + output_channels=self.conv1_output_channels, + compute_grid_size=device.compute_with_storage_grid_size(), + block_shard_orientation=ttnn.ShardOrientation.ROW_MAJOR, + is_conv2d_op=True, + is_out_tiled=True, + ) + # / Override compute grid size for Grayskull + # First convs would got to 108 cores by default + # but this would add padding into output tensor + # and reshard that follows first conv fails with padding ATM. + if is_grayskull(): + compute_grid = device.compute_with_storage_grid_size() + parallel_config.grid = get_core_grid_from_num_cores(98, compute_grid.x, compute_grid.y) + + self.override_fold_mem_config = ttnn._ttnn.operations.conv.create_sharded_memory_config_from_parallel_config( + tensor_shape=ttnn.Shape( + [ + 1, + 1, + self.conv1_input_width * self.conv1_input_height * self.batch_size, + nearest_y(self.conv1_input_channels, self.conv1_config.input_channels_alignment), + ] + ), + parallel_config=parallel_config, + tile_size=32, ) def __del__(self):