Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hetero cgra lopsided layout #1965

Merged
merged 4 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 65 additions & 25 deletions aha/util/regress.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def add_subparser(subparser):
parser.add_argument("--unroll", default=1, type=int)
parser.add_argument("--using-matrix-unit", action="store_true")
parser.add_argument("--mu-datawidth", default=16, type=int)
parser.add_argument("--num-fabric-cols-removed", default=0, type=int)
parser.set_defaults(dispatch=dispatch)


Expand Down Expand Up @@ -68,7 +69,7 @@ def buildkite_call(command, env={}, return_output=False, out_file=None):
else:
raise

def gen_garnet(width, height, dense_only=False, using_matrix_unit=False, mu_datawidth=16):
def gen_garnet(width, height, dense_only=False, using_matrix_unit=False, mu_datawidth=16, num_fabric_cols_removed=0):
print("--- Generating Garnet", flush=True)
start = time.time()
if not os.path.exists("/aha/garnet/garnet.v"):
Expand All @@ -93,13 +94,16 @@ def gen_garnet(width, height, dense_only=False, using_matrix_unit=False, mu_data
buildkite_args.append("--using-matrix-unit")
buildkite_args.append("--mu-datawidth")
buildkite_args.append(str(mu_datawidth))
buildkite_args.append("--give-north-io-sbs")
buildkite_args.append("--num-fabric-cols-removed")
buildkite_args.append(str(num_fabric_cols_removed))

buildkite_call(buildkite_args)

return time.time() - start


def generate_sparse_bitstreams(sparse_tests, width, height, seed_flow, data_tile_pairs, kernel_name, opal_workaround=False, unroll=1, using_matrix_unit=False):
def generate_sparse_bitstreams(sparse_tests, width, height, seed_flow, data_tile_pairs, kernel_name, opal_workaround=False, unroll=1, using_matrix_unit=False, num_fabric_cols_removed=0):
if len(sparse_tests) == 0:
return 0

Expand Down Expand Up @@ -133,6 +137,9 @@ def generate_sparse_bitstreams(sparse_tests, width, height, seed_flow, data_tile
build_tb_cmd.append("--opal-workaround")
if using_matrix_unit:
build_tb_cmd.append("--using-matrix-unit")
build_tb_cmd.append("--give-north-io-sbs")
build_tb_cmd.append("--num-fabric-cols-removed")
build_tb_cmd.append(str(num_fabric_cols_removed))
buildkite_call(
build_tb_cmd,
env=env_vars,
Expand Down Expand Up @@ -166,6 +173,9 @@ def generate_sparse_bitstreams(sparse_tests, width, height, seed_flow, data_tile
build_tb_cmd.append("--opal-workaround")
if using_matrix_unit:
build_tb_cmd.append("--using-matrix-unit")
build_tb_cmd.append("--give-north-io-sbs")
build_tb_cmd.append("--num-fabric-cols-removed")
build_tb_cmd.append(str(num_fabric_cols_removed))
buildkite_call(
build_tb_cmd,
env=env_vars,
Expand Down Expand Up @@ -212,15 +222,16 @@ def format_concat_tiles(test, data_tile_pairs, kernel_name, pipeline_num=32, unr
return all_tiles, num_list


def test_sparse_app(testname, seed_flow, data_tile_pairs, pipeline_num_l=None, opal_workaround=False, test="", test_dataset_runtime_dict=None, using_matrix_unit=False, cgra_height=32, mu_datawidth=16):
def test_sparse_app(testname, seed_flow, data_tile_pairs, pipeline_num_l=None, opal_workaround=False, test="", test_dataset_runtime_dict=None, using_matrix_unit=False, cgra_height=32, mu_datawidth=16, num_fabric_cols_removed=0):
if test == "":
test = testname

print(f"--- {test}")

env_vars = {"PYTHONPATH": "/aha/garnet/"}
if using_matrix_unit:
env_vars["WEST_IN_IO_SIDES"] = "1"
if num_fabric_cols_removed == 0:
env_vars["WEST_IN_IO_SIDES"] = "1"
env_vars["USING_MATRIX_UNIT"] = "1"
env_vars["OC_0"] = str(2*cgra_height)
env_vars["MU_DATAWIDTH"] = str(mu_datawidth)
Expand Down Expand Up @@ -294,7 +305,7 @@ def test_sparse_app(testname, seed_flow, data_tile_pairs, pipeline_num_l=None, o
return 0, 0, time_test


def test_dense_app(test, width, height, env_parameters, extra_args, layer=None, dense_only=False, use_fp=False, using_matrix_unit=False, cgra_height=32, mu_datawidth=16):
def test_dense_app(test, width, height, env_parameters, extra_args, layer=None, dense_only=False, use_fp=False, using_matrix_unit=False, cgra_height=32, mu_datawidth=16, num_fabric_cols_removed=0):
env_parameters = str(env_parameters)
testname = layer if layer is not None else test
print(f"--- {testname}")
Expand Down Expand Up @@ -342,7 +353,13 @@ def test_dense_app(test, width, height, env_parameters, extra_args, layer=None,

if using_matrix_unit:
buildkite_args.append("--using-matrix-unit")
env_vars["WEST_IN_IO_SIDES"] = "1"
buildkite_args.append("--give-north-io-sbs")
buildkite_args.append("--num-fabric-cols-removed")
buildkite_args.append(str(num_fabric_cols_removed))

if num_fabric_cols_removed == 0:
env_vars["WEST_IN_IO_SIDES"] = "1"

env_vars["USING_MATRIX_UNIT"] = "1"
env_vars["OC_0"] = str(2*cgra_height)
env_vars["MU_DATAWIDTH"] = str(mu_datawidth)
Expand All @@ -362,7 +379,7 @@ def test_dense_app(test, width, height, env_parameters, extra_args, layer=None,
return time_compile, time_map, time_test


def test_hardcoded_dense_app(test, width, height, env_parameters, extra_args, layer=None, dense_only=False, using_matrix_unit=False, cgra_height=32, mu_datawidth=16):
def test_hardcoded_dense_app(test, width, height, env_parameters, extra_args, layer=None, dense_only=False, using_matrix_unit=False, cgra_height=32, mu_datawidth=16, num_fabric_cols_removed=0):
env_parameters = str(env_parameters)
testname = layer if layer is not None else test
print(f"--- {testname}")
Expand Down Expand Up @@ -431,7 +448,11 @@ def test_hardcoded_dense_app(test, width, height, env_parameters, extra_args, la

if using_matrix_unit:
buildkite_args.append("--using-matrix-unit")
env_vars["WEST_IN_IO_SIDES"] = "1"
buildkite_args.append("--give-north-io-sbs")

if num_fabric_cols_removed == 0:
env_vars["WEST_IN_IO_SIDES"] = "1"

env_vars["USING_MATRIX_UNIT"] = "1"
env_vars["OC_0"] = str(2*cgra_height)
env_vars["MU_DATAWIDTH"] = str(mu_datawidth)
Expand All @@ -454,8 +475,13 @@ def dispatch(args, extra_args=None):
pipeline_num = args.pipeline_num
using_matrix_unit = args.using_matrix_unit
mu_datawidth = args.mu_datawidth
num_fabric_cols_removed = args.num_fabric_cols_removed
unroll = args.unroll

# Can only remove col that is multiple of 4
assert num_fabric_cols_removed % 4 == 0, "ERROR: Number of cols removed must be a multiple of 4"
assert num_fabric_cols_removed <= 8, "ERROR: Removing more than 8 columns is not supported yet. Hardware modifications may be necessary to proceed."

# Preserve backward compatibility
if args.config == "daily": args.config = "pr_aha" # noqa
if args.config == "pr": args.config = "pr_submod" # noqa
Expand Down Expand Up @@ -499,7 +525,7 @@ def dispatch(args, extra_args=None):

print(f"--- Running regression: {args.config}", flush=True)
info = []
t = gen_garnet(width, height, dense_only=False, using_matrix_unit=using_matrix_unit, mu_datawidth=mu_datawidth)
t = gen_garnet(width, height, dense_only=False, using_matrix_unit=using_matrix_unit, mu_datawidth=mu_datawidth, num_fabric_cols_removed=num_fabric_cols_removed)
info.append(["garnet with sparse and dense", t])

data_tile_pairs = []
Expand All @@ -526,16 +552,19 @@ def dispatch(args, extra_args=None):
print("HERE ARE THE DATA TILE PAIRS!")
print(data_tile_pairs)

generate_sparse_bitstreams(sparse_tests, width, height, seed_flow, data_tile_pairs, kernel_name, opal_workaround=args.opal_workaround, unroll=unroll, using_matrix_unit=using_matrix_unit)
generate_sparse_bitstreams(sparse_tests, width, height, seed_flow, data_tile_pairs, kernel_name,
opal_workaround=args.opal_workaround, unroll=unroll, using_matrix_unit=using_matrix_unit, num_fabric_cols_removed=num_fabric_cols_removed)

for test in sparse_tests:
if use_pipeline:
assert (not seed_flow), "Pipeline mode is not supported with seed flow"
tile_pairs, pipeline_num_l = format_concat_tiles(test, data_tile_pairs, kernel_name, pipeline_num, unroll)
t0, t1, t2 = test_sparse_app(test, seed_flow, tile_pairs, pipeline_num_l, opal_workaround=args.opal_workaround, test_dataset_runtime_dict=test_dataset_runtime_dict, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
t0, t1, t2 = test_sparse_app(test, seed_flow, tile_pairs, pipeline_num_l, opal_workaround=args.opal_workaround, test_dataset_runtime_dict=test_dataset_runtime_dict,
using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth, num_fabric_cols_removed=num_fabric_cols_removed)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])
else:
t0, t1, t2 = test_sparse_app(test, seed_flow, data_tile_pairs, opal_workaround=args.opal_workaround, test_dataset_runtime_dict=test_dataset_runtime_dict, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
t0, t1, t2 = test_sparse_app(test, seed_flow, data_tile_pairs, opal_workaround=args.opal_workaround, test_dataset_runtime_dict=test_dataset_runtime_dict,
using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth, num_fabric_cols_removed=num_fabric_cols_removed)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])

# remove the generated collateral for tiles that passed to avoid overrunning the disk
Expand All @@ -547,46 +576,55 @@ def dispatch(args, extra_args=None):
for dataset, time_value in dataset_runtime_dict.items():
perf_out_file.write(f"{testname} {dataset} {time_value}\n")
else:
generate_sparse_bitstreams(sparse_tests, width, height, seed_flow, data_tile_pairs, kernel_name, opal_workaround=args.opal_workaround, unroll=unroll, using_matrix_unit=using_matrix_unit)
generate_sparse_bitstreams(sparse_tests, width, height, seed_flow, data_tile_pairs, kernel_name,
opal_workaround=args.opal_workaround, unroll=unroll, using_matrix_unit=using_matrix_unit, num_fabric_cols_removed=num_fabric_cols_removed)

for test in sparse_tests:
assert(not use_pipeline), "Pipeline mode is not supported with seed flow"
t0, t1, t2 = test_sparse_app(test, seed_flow, data_tile_pairs, opal_workaround=args.opal_workaround, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
t0, t1, t2 = test_sparse_app(test, seed_flow, data_tile_pairs, opal_workaround=args.opal_workaround,
using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth, num_fabric_cols_removed=num_fabric_cols_removed)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])

for test in glb_tests:
t0, t1, t2 = test_dense_app(test,
width, height, args.env_parameters, extra_args, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
width, height, args.env_parameters, extra_args,
using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth, num_fabric_cols_removed=num_fabric_cols_removed)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])

for test in glb_tests_fp:
t0, t1, t2 = test_dense_app(test,
width, height, args.env_parameters, extra_args, use_fp=True, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
width, height, args.env_parameters, extra_args, use_fp=True,
using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth, num_fabric_cols_removed=num_fabric_cols_removed)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])

for test in resnet_tests:
if "residual" in test:
t0, t1, t2 = test_dense_app("apps/resnet_residual",
width, height, args.env_parameters, extra_args, layer=test, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
width, height, args.env_parameters, extra_args, layer=test,
using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth, num_fabric_cols_removed=num_fabric_cols_removed)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])
else:
t0, t1, t2 = test_dense_app("apps/resnet_output_stationary",
width, height, args.env_parameters, extra_args, layer=test, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
width, height, args.env_parameters, extra_args, layer=test,
using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth, num_fabric_cols_removed=num_fabric_cols_removed)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])

for test in resnet_tests_fp:
if "residual" in test:
t0, t1, t2 = test_dense_app("apps/conv2D_residual_fp",
width, height, args.env_parameters, extra_args, layer=test, use_fp=True, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
width, height, args.env_parameters, extra_args, layer=test, use_fp=True,
using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth, num_fabric_cols_removed=num_fabric_cols_removed)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])
else:
t0, t1, t2 = test_dense_app("apps/conv2D_fp",
width, height, args.env_parameters, extra_args, layer=test, use_fp=True, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
width, height, args.env_parameters, extra_args, layer=test, use_fp=True,
using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth, num_fabric_cols_removed=num_fabric_cols_removed)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])

for test in hardcoded_dense_tests:
t0, t1, t2 = test_hardcoded_dense_app(test,
width, height, args.env_parameters, extra_args, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
width, height, args.env_parameters, extra_args,
using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth, num_fabric_cols_removed=num_fabric_cols_removed)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])

if args.include_dense_only_tests:
Expand All @@ -596,27 +634,29 @@ def dispatch(args, extra_args=None):
if os.WEXITSTATUS(exit_status) != 0:
raise RuntimeError(f"Command 'rm /aha/garnet/garnet.v' returned non-zero exit status {os.WEXITSTATUS(exit_status)}.")

t = gen_garnet(width, height, dense_only=True, using_matrix_unit=using_matrix_unit, mu_datawidth=mu_datawidth)
t = gen_garnet(width, height, dense_only=True, using_matrix_unit=using_matrix_unit, mu_datawidth=mu_datawidth, num_fabric_cols_removed=num_fabric_cols_removed)
info.append(["garnet with dense only", t])

num_dense_only_glb_tests = 5
for test_index, test in enumerate(glb_tests):
if test_index == num_dense_only_glb_tests:
break
t0, t1, t2 = test_dense_app(test,
width, height, args.env_parameters, extra_args, dense_only=True, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
width, height, args.env_parameters, extra_args, dense_only=True,
using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth, num_fabric_cols_removed=num_fabric_cols_removed)
info.append([test + "_glb dense only", t0 + t1 + t2, t0, t1, t2])

for test in resnet_tests:
# residual resnet test is not working with dense only mode
if "residual" not in test:
t0, t1, t2 = test_dense_app("apps/resnet_output_stationary",
width, height, args.env_parameters, extra_args, layer=test, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
width, height, args.env_parameters, extra_args, layer=test,
using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth, num_fabric_cols_removed=num_fabric_cols_removed)
info.append([test + "_glb dense only", t0 + t1 + t2, t0, t1, t2])

print(f"+++ TIMING INFO", flush=True)
print(tabulate(info, headers=["step", "total", "compile", "map", "test"], floatfmt=".0f"), flush=True)


def gather_tests(tags):
pass
pass
2 changes: 1 addition & 1 deletion canal
2 changes: 1 addition & 1 deletion garnet
Loading