Skip to content

Commit

Permalink
gs resnet50 fix
Browse files Browse the repository at this point in the history
  • Loading branch information
Pavle Josipovic committed Nov 13, 2024
1 parent b18ae2f commit 3e92356
Showing 1 changed file with 56 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,37 @@
is_grayskull,
is_wormhole_b0,
_nearest_y,
nearest_y,
pad_and_fold_conv_activation_for_unity_stride,
)
from typing import List
from loguru import logger
from tests.ttnn.utils_for_testing import assert_with_pcc


def get_core_grid_from_num_cores(num_cores: int, grid_rows: int, grid_cols: int):
columns = num_cores // grid_rows
assert columns <= grid_cols, "Not enough cores for specified core grid"
ranges = []
if columns != 0:
ranges.append(
ttnn.CoreRange(
ttnn.CoreCoord(0, 0),
ttnn.CoreCoord(grid_rows - 1, columns - 1),
)
)
remainder = num_cores % grid_rows
if remainder != 0:
assert columns + 1 <= grid_cols, "Not enough cores for specified core grid"
ranges.append(
ttnn.CoreRange(
ttnn.CoreCoord(0, columns),
ttnn.CoreCoord(remainder - 1, columns),
)
)
return ttnn.CoreRangeSet({*ranges})


hardcoded_matmul_config_linear = {
8: ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig(
compute_with_storage_grid_size=(8, 4),
Expand Down Expand Up @@ -632,15 +657,38 @@ def __init__(

conv_dummy_tensor = torch.rand((self.fold_output_shape), dtype=torch.bfloat16)
conv_dummy_tensor = ttnn.from_torch(conv_dummy_tensor, layout=ttnn.ROW_MAJOR_LAYOUT)
_, self.override_fold_mem_config, _, _ = ttnn.get_conv_padded_input_shape_and_mem_config(
device=device,
input_tensor=conv_dummy_tensor,
conv_config=self.conv1_config,

parallel_config = ttnn._ttnn.operations.conv.determine_parallel_config(
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED,
batch_size=self.batch_size,
height=self.conv1_output_height,
width=self.conv1_output_width,
in_channels=self.conv1_input_channels,
out_channels=self.conv1_output_channels,
input_channels=self.conv1_input_channels,
output_height=self.conv1_output_height,
output_width=self.conv1_output_width,
output_channels=self.conv1_output_channels,
compute_grid_size=device.compute_with_storage_grid_size(),
block_shard_orientation=ttnn.ShardOrientation.ROW_MAJOR,
is_conv2d_op=True,
is_out_tiled=True,
)
# / Override compute grid size for Grayskull
# First convs would got to 108 cores by default
# but this would add padding into output tensor
# and reshard that follows first conv fails with padding ATM.
if is_grayskull():
compute_grid = device.compute_with_storage_grid_size()
parallel_config.grid = get_core_grid_from_num_cores(98, compute_grid.x, compute_grid.y)

self.override_fold_mem_config = ttnn._ttnn.operations.conv.create_sharded_memory_config_from_parallel_config(
tensor_shape=ttnn.Shape(
[
1,
1,
self.conv1_input_width * self.conv1_input_height * self.batch_size,
nearest_y(self.conv1_input_channels, self.conv1_config.input_channels_alignment),
]
),
parallel_config=parallel_config,
tile_size=32,
)

def __del__(self):
Expand Down

0 comments on commit 3e92356

Please sign in to comment.