Skip to content

Commit

Permalink
#4730: Add sweep test for ttnn.concat
Browse files Browse the repository at this point in the history
  • Loading branch information
eyonland authored and arakhmati committed Jan 19, 2024
1 parent f007930 commit 5db295b
Show file tree
Hide file tree
Showing 16 changed files with 281 additions and 110 deletions.
21 changes: 13 additions & 8 deletions tests/ttnn/sweep_tests/run_failed_and_crashed_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,32 @@
from tests.ttnn.sweep_tests.sweep import run_failed_and_crashed_tests


def parse_exclude_string(exclude):
if exclude is None:
exclude = []
def convert_string_to_list(string):
if string is None:
output = []
else:
exclude = exclude.split(",")
exclude = [test_name.strip() for test_name in exclude]
return set(exclude)
output = string.split(",")
output = [element.strip() for element in output]
return set(output)


def main():
parser = argparse.ArgumentParser()
parser.add_argument("--include", type=str)
parser.add_argument("--exclude", type=str)
parser.add_argument("--stepwise", action="store_true")

include = parser.parse_args().include
exclude = parser.parse_args().exclude
stepwise = parser.parse_args().stepwise

exclude = parse_exclude_string(exclude)
include = convert_string_to_list(include)
exclude = convert_string_to_list(exclude)
if include and exclude:
raise ValueError("Cannot specify both include and exclude")

device = ttnn.open(0)
run_failed_and_crashed_tests(device=device, stepwise=stepwise, exclude=exclude)
run_failed_and_crashed_tests(device=device, stepwise=stepwise, include=include, exclude=exclude)
ttnn.close(device)


Expand Down
11 changes: 9 additions & 2 deletions tests/ttnn/sweep_tests/run_single_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,16 @@ def main():

if status == "passed":
logger.info(f"Passed")
elif status in {"failed", "crashed"}:
logger.info(f"Error: {message}")
elif status == "is_expected_to_fail":
logger.info(f'Failed as expected with the following error message: "{message}"')
elif status in "failed":
logger.info(f'Failed:"{message}"')
exit(-1)
elif status in "crashed":
logger.info(f'Crashed: "{message}"')
exit(-1)
elif status in "skipped":
logger.info(f'Skipped: "{message}"')
else:
raise RuntimeError(f"Unknown status {status}")

Expand Down
125 changes: 62 additions & 63 deletions tests/ttnn/sweep_tests/sweep.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
from loguru import logger
import pandas as pd

import ttnn

SWEEPS_DIR = pathlib.Path(__file__).parent
SWEEP_SOURCES_DIR = SWEEPS_DIR / "sweeps"
SWEEP_RESULTS_DIR = SWEEPS_DIR / "results"
Expand Down Expand Up @@ -63,71 +61,67 @@ def get_parameter_values(parameter_names, permutation):
yield parameter_value


def sweep(sweep_file_name, run, skip, parameters, *, device):
sweep_name = pathlib.Path(sweep_file_name).stem
parameter_names = get_parameter_names(parameters)
column_names = ["status", "exception"] + parameter_names

rows = []
for permutation in permutations(parameters):
parameter_values = list(get_parameter_values(parameter_names, permutation))

if skip(**permutation):
rows.append(["skipped", None] + parameter_values)
continue

try:
passed, message = run(**permutation, device=device)
if passed:
rows.append(["passed", None] + parameter_values)
else:
rows.append(["failed", message] + parameter_values)
except Exception as e:
rows.append(["crashed", str(e)] + parameter_values)
finally:
import tt_lib as ttl
def _run_single_test(run, skip, is_expected_to_fail, permutation, *, device):
try:
should_be_skipped, message = skip(**permutation)
if should_be_skipped:
return "skipped", message

ttl.device.ClearCommandQueueProgramCache(device)
ttl.device.DeallocateBuffers(device)
passed, message = run(**permutation, device=device)
status = "passed" if passed else "failed"
if passed:
message = None
except Exception as e:
should_fail, expected_exception = is_expected_to_fail(**permutation)
if should_fail and expected_exception == str(e):
status = "is_expected_to_fail"
message = expected_exception
else:
status = "crashed"
message = f"Exception: {e}"
finally:
import tt_lib as ttl

SWEEP_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
file_name = (SWEEP_RESULTS_DIR / sweep_name).with_suffix(".csv")
ttl.device.ClearCommandQueueProgramCache(device)
ttl.device.DeallocateBuffers(device)
return status, message

df = pd.DataFrame(rows, columns=column_names)
df.to_csv(file_name)

logger.info(f"Saved sweep results to {file_name}")
def run_single_test(test_name, index, *, device):
file_name = (SWEEP_SOURCES_DIR / test_name).with_suffix(".py")
logger.info(f"Running {file_name}")

sweep_module = SourceFileLoader(f"sweep_module_{file_name.stem}", str(file_name)).load_module()
permutation = list(permutations(sweep_module.parameters))[index]

def _run_single_test(run, skip, parameters, index, *, device):
permutation = list(permutations(parameters))[index]
pretty_printed_parameters = ",\n".join(f"\t{key}={value}" for key, value in permutation.items())
logger.info(f"Running sweep test at index {index}:\n{{{pretty_printed_parameters}}}")
if skip(**permutation):
return "skipped", None
passed, message = run(**permutation, device=device)
return passed, message
return _run_single_test(
sweep_module.run, sweep_module.skip, sweep_module.is_expected_to_fail, permutation, device=device
)


def run_single_test(test_name, index, *, device):
file_name = (SWEEP_SOURCES_DIR / test_name).with_suffix(".py")
logger.info(f"Running {file_name}")
def run_sweep(sweep_file_name, *, device):
sweep_name = pathlib.Path(sweep_file_name).stem
sweep_module = SourceFileLoader(f"sweep_module_{sweep_name}", str(sweep_file_name)).load_module()

sweep_module = SourceFileLoader("sweep_module", str(file_name)).load_module()
parameter_names = get_parameter_names(sweep_module.parameters)
column_names = ["status", "exception"] + parameter_names

status = None
try:
passed, message = _run_single_test(
sweep_module.run, sweep_module.skip, sweep_module.parameters, index, device=device
rows = []
for permutation in permutations(sweep_module.parameters):
status, message = _run_single_test(
sweep_module.run, sweep_module.skip, sweep_module.is_expected_to_fail, permutation, device=device
)
status = "passed" if passed else "failed"
if not passed:
logger.error(message)
except Exception as e:
status = "crashed"
message = f"Exception: {e}"
logger.exception(message)
return status, message
rows.append([status, message] + list(get_parameter_values(parameter_names, permutation)))

SWEEP_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
file_name = (SWEEP_RESULTS_DIR / sweep_name).with_suffix(".csv")

df = pd.DataFrame(rows, columns=column_names)
df.to_csv(file_name)

logger.info(f"Saved sweep results to {file_name}")


def run_all_tests(*, device):
Expand All @@ -138,15 +132,18 @@ def run_all_tests(*, device):

for file_name in sorted(SWEEP_SOURCES_DIR.glob("*.py")):
logger.info(f"Running {file_name}")
sweep_module = SourceFileLoader("sweep_module", str(file_name)).load_module()
sweep(file_name, sweep_module.run, sweep_module.skip, sweep_module.parameters, device=device)
run_sweep(file_name, device=device)


def run_failed_and_crashed_tests(*, device, stepwise, exclude):
def run_failed_and_crashed_tests(*, device, stepwise, include, exclude):
keep_running = True
for file_name in sorted(SWEEP_RESULTS_DIR.glob("*.csv")):
test_name = file_name.stem
if test_name in exclude:

if include and test_name not in include:
continue

if exclude and test_name in exclude:
continue

if not keep_running:
Expand All @@ -164,9 +161,11 @@ def run_failed_and_crashed_tests(*, device, stepwise, exclude):

status, message = run_single_test(file_name.stem, index, device=device)
logger.info(status)
if status in {"failed", "crashed"} and stepwise:
keep_running = False
break
if status in {"failed", "crashed"}:
logger.error(f"{message}")
if stepwise:
keep_running = False
break

df.at[index, "status"] = status
df.at[index, "message"] = message
Expand All @@ -175,10 +174,10 @@ def run_failed_and_crashed_tests(*, device, stepwise, exclude):


def print_summary():
stats_df = pd.DataFrame(columns=["name", "passed", "failed", "skipped", "crashed"])
stats_df = pd.DataFrame(columns=["name", "passed", "failed", "crashed", "skipped", "is_expected_to_fail"])

def add_row(df, name):
df.loc[-1] = [name, 0, 0, 0, 0]
df.loc[-1] = [name] + [0] * len(df.columns[1:])
df.index = df.index + 1
df.reset_index(inplace=True, drop=True)
return df
Expand Down
14 changes: 10 additions & 4 deletions tests/ttnn/sweep_tests/sweeps/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

# SPDX-License-Identifier: Apache-2.0

from typing import Optional, Tuple

import torch

import ttnn
Expand All @@ -24,10 +26,14 @@
}


def skip(*, broadcast, input_b_layout, **_):
def skip(*, broadcast, input_b_layout, **_) -> Tuple[bool, Optional[str]]:
if broadcast in {"w", "hw"} and input_b_layout == ttnn.ROW_MAJOR_LAYOUT:
return True
return False
return True, "Broadcasting along width is not supported for row major layout"
return False, None


def is_expected_to_fail(**_) -> Tuple[bool, Optional[str]]:
return False, None


def run(
Expand All @@ -44,7 +50,7 @@ def run(
output_memory_config,
*,
device,
):
) -> Tuple[bool, Optional[str]]:
input_shape_a = (*batch_sizes, height, width)
input_shape_b = (*batch_sizes, height, width)
if broadcast == "hw":
Expand Down
90 changes: 90 additions & 0 deletions tests/ttnn/sweep_tests/sweeps/concat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

from typing import Optional, Tuple

import torch
import ttnn
import random
from tests.ttnn.utils_for_testing import check_with_pcc
from models.utility_functions import torch_random

parameters = {
"number_of_tensors": [1, 2, 3, 4, 5],
"rank_of_tensors": [1, 2, 3, 4],
"max_random_size_of_each_dim": [32],
"dimension_to_concatenate_on": [0, 1, 2, 3, 4, 5],
"layout": [ttnn.ROW_MAJOR_LAYOUT, ttnn.TILE_LAYOUT],
"dtype": [ttnn.bfloat16],
"memory_config": [ttnn.DRAM_MEMORY_CONFIG, ttnn.L1_MEMORY_CONFIG],
}


def skip(rank_of_tensors, layout, **_) -> Tuple[bool, Optional[str]]:
if rank_of_tensors < 2 and layout == ttnn.TILE_LAYOUT:
return True, "Tile layout is only supported for tensors with rank >= 2"
return False, None


def is_expected_to_fail(
number_of_tensors, rank_of_tensors, dimension_to_concatenate_on, **_
) -> Tuple[bool, Optional[str]]:
if number_of_tensors == 1:
return True, "You must have at least two tensors to concat!"

if dimension_to_concatenate_on >= rank_of_tensors:
dimension_range = f"[{-rank_of_tensors}, {rank_of_tensors - 1}]"
return (
True,
f"Dimension out of range (expected to be in range of {dimension_range}, but got {dimension_to_concatenate_on})",
)

return False, None


def run(
number_of_tensors,
rank_of_tensors,
max_random_size_of_each_dim,
dimension_to_concatenate_on,
layout,
dtype,
memory_config,
*,
device,
) -> Tuple[bool, Optional[str]]:
random.seed(0)

def get_size_of_dim(index):
size_of_dim = random.randint(1, max_random_size_of_each_dim)
if layout == ttnn.ROW_MAJOR_LAYOUT and index == rank_of_tensors - 1 and size_of_dim % 2 == 1:
size_of_dim = (size_of_dim + 1) % max_random_size_of_each_dim
if size_of_dim == 0:
size_of_dim = 2
return size_of_dim

def calculate_input_shape():
return [get_size_of_dim(index) for index in range(rank_of_tensors)]

input_shape = calculate_input_shape()
torch_input_tensors = [torch_random(input_shape, -0.1, 0.1, dtype=torch.bfloat16)]

if number_of_tensors > 1:
first_tensor = torch_input_tensors[0]
for _ in range(number_of_tensors - 1):
shape = list(first_tensor.shape)
if dimension_to_concatenate_on < rank_of_tensors:
shape[dimension_to_concatenate_on] = get_size_of_dim(dimension_to_concatenate_on)
new_tensor = torch_random(shape, -0.1, 0.1, dtype=torch.bfloat16)
torch_input_tensors.append(new_tensor)

input_tensors = [
ttnn.from_torch(torch_input_tensor, device=device, layout=layout, dtype=dtype, memory_config=memory_config)
for torch_input_tensor in torch_input_tensors
]
output_tensor = ttnn.concat(input_tensors, dim=dimension_to_concatenate_on)
output_tensor = ttnn.to_torch(output_tensor)

torch_output_tensor = torch.concat(torch_input_tensors, dim=dimension_to_concatenate_on)
return check_with_pcc(torch_output_tensor, output_tensor, 0.9999)
12 changes: 9 additions & 3 deletions tests/ttnn/sweep_tests/sweeps/layer_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

# SPDX-License-Identifier: Apache-2.0

from typing import Optional, Tuple

import torch

import ttnn
Expand All @@ -21,8 +23,12 @@
}


def skip(**_):
return False
def skip(**_) -> Tuple[bool, Optional[str]]:
return False, None


def is_expected_to_fail(**_) -> Tuple[bool, Optional[str]]:
return False, None


def run(
Expand All @@ -36,7 +42,7 @@ def run(
output_memory_config,
*,
device,
):
) -> Tuple[bool, Optional[str]]:
input_shape = (*batch_sizes, height, width)

torch_input_tensor = torch_random(input_shape, -0.1, 0.1, dtype=torch.float32)
Expand Down
Loading

0 comments on commit 5db295b

Please sign in to comment.