Skip to content

Commit

Permalink
Merge pull request #1960 from StanfordAHA/hetero-cgra
Browse files Browse the repository at this point in the history
MU2F I/O tiles
  • Loading branch information
mcoduoza authored Dec 3, 2024
2 parents 129bebe + 66e2f60 commit 40a9ca2
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 29 deletions.
85 changes: 60 additions & 25 deletions aha/util/regress.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ def add_subparser(subparser):
parser.add_argument("--pipeline-num", default=32, type=int)
parser.add_argument("--sparse-tile-pairs-list", default="", type=str, nargs="*")
parser.add_argument("--unroll", default=1, type=int)
parser.add_argument("--using-matrix-unit", action="store_true")
parser.add_argument("--mu-datawidth", default=16, type=int)
parser.set_defaults(dispatch=dispatch)


Expand Down Expand Up @@ -66,15 +68,14 @@ def buildkite_call(command, env={}, return_output=False, out_file=None):
else:
raise

def gen_garnet(width, height, dense_only=False):
def gen_garnet(width, height, dense_only=False, using_matrix_unit=False, mu_datawidth=16):
print("--- Generating Garnet", flush=True)
start = time.time()
if not os.path.exists("/aha/garnet/garnet.v"):
# Daemon is no good if/when we build new/different verilog
buildkite_call("aha garnet --daemon kill".split())

# No garnet verilog yet, so build it now.

buildkite_args = [
"aha",
"garnet",
Expand All @@ -88,12 +89,17 @@ def gen_garnet(width, height, dense_only=False):
if dense_only:
buildkite_args.append("--dense-only")

if using_matrix_unit:
buildkite_args.append("--using-matrix-unit")
buildkite_args.append("--mu-datawidth")
buildkite_args.append(str(mu_datawidth))

buildkite_call(buildkite_args)

return time.time() - start


def generate_sparse_bitstreams(sparse_tests, width, height, seed_flow, data_tile_pairs, kernel_name, opal_workaround=False, unroll=1):
def generate_sparse_bitstreams(sparse_tests, width, height, seed_flow, data_tile_pairs, kernel_name, opal_workaround=False, unroll=1, using_matrix_unit=False):
if len(sparse_tests) == 0:
return 0

Expand Down Expand Up @@ -125,6 +131,8 @@ def generate_sparse_bitstreams(sparse_tests, width, height, seed_flow, data_tile
]
if opal_workaround:
build_tb_cmd.append("--opal-workaround")
if using_matrix_unit:
build_tb_cmd.append("--using-matrix-unit")
buildkite_call(
build_tb_cmd,
env=env_vars,
Expand Down Expand Up @@ -156,6 +164,8 @@ def generate_sparse_bitstreams(sparse_tests, width, height, seed_flow, data_tile
]
if opal_workaround:
build_tb_cmd.append("--opal-workaround")
if using_matrix_unit:
build_tb_cmd.append("--using-matrix-unit")
buildkite_call(
build_tb_cmd,
env=env_vars,
Expand Down Expand Up @@ -202,13 +212,18 @@ def format_concat_tiles(test, data_tile_pairs, kernel_name, pipeline_num=32, unr
return all_tiles, num_list


def test_sparse_app(testname, seed_flow, data_tile_pairs, pipeline_num_l=None, opal_workaround=False, test="", test_dataset_runtime_dict=None):
def test_sparse_app(testname, seed_flow, data_tile_pairs, pipeline_num_l=None, opal_workaround=False, test="", test_dataset_runtime_dict=None, using_matrix_unit=False, cgra_height=32, mu_datawidth=16):
if test == "":
test = testname

print(f"--- {test}")

env_vars = {"PYTHONPATH": "/aha/garnet/"}
if using_matrix_unit:
env_vars["WEST_IN_IO_SIDES"] = "1"
env_vars["USING_MATRIX_UNIT"] = "1"
env_vars["OC_0"] = str(2*cgra_height)
env_vars["MU_DATAWIDTH"] = str(mu_datawidth)

app_path = f"{testname}_0/GLB_DIR/{testname}_combined_seed_0"
print(app_path, flush=True)
Expand Down Expand Up @@ -279,7 +294,7 @@ def test_sparse_app(testname, seed_flow, data_tile_pairs, pipeline_num_l=None, o
return 0, 0, time_test


def test_dense_app(test, width, height, env_parameters, extra_args, layer=None, dense_only=False, use_fp=False):
def test_dense_app(test, width, height, env_parameters, extra_args, layer=None, dense_only=False, use_fp=False, using_matrix_unit=False, cgra_height=32, mu_datawidth=16):
env_parameters = str(env_parameters)
testname = layer if layer is not None else test
print(f"--- {testname}")
Expand Down Expand Up @@ -323,22 +338,31 @@ def test_dense_app(test, width, height, env_parameters, extra_args, layer=None,
if dense_only:
buildkite_args.append("--dense-only")

env_vars = {}

if using_matrix_unit:
buildkite_args.append("--using-matrix-unit")
env_vars["WEST_IN_IO_SIDES"] = "1"
env_vars["USING_MATRIX_UNIT"] = "1"
env_vars["OC_0"] = str(2*cgra_height)
env_vars["MU_DATAWIDTH"] = str(mu_datawidth)

buildkite_call(buildkite_args)

time_map = time.time() - start

print(f"--- {testname} - glb testing", flush=True)
start = time.time()
if use_fp:
buildkite_call(["aha", "test", test, "--dense-fp"])
buildkite_call(["aha", "test", test, "--dense-fp"], env=env_vars)
else:
buildkite_call(["aha", "test", test])
buildkite_call(["aha", "test", test], env=env_vars)
time_test = time.time() - start

return time_compile, time_map, time_test


def test_hardcoded_dense_app(test, width, height, env_parameters, extra_args, layer=None, dense_only=False):
def test_hardcoded_dense_app(test, width, height, env_parameters, extra_args, layer=None, dense_only=False, using_matrix_unit=False, cgra_height=32, mu_datawidth=16):
env_parameters = str(env_parameters)
testname = layer if layer is not None else test
print(f"--- {testname}")
Expand Down Expand Up @@ -401,14 +425,23 @@ def test_hardcoded_dense_app(test, width, height, env_parameters, extra_args, la

if dense_only:
buildkite_args.append("--dense-only")

env_vars = {}

if using_matrix_unit:
buildkite_args.append("--using-matrix-unit")
env_vars["WEST_IN_IO_SIDES"] = "1"
env_vars["USING_MATRIX_UNIT"] = "1"
env_vars["OC_0"] = str(2*cgra_height)
env_vars["MU_DATAWIDTH"] = str(mu_datawidth)

buildkite_call(buildkite_args)

time_map = time.time() - start

print(f"--- {testname} - glb testing", flush=True)
start = time.time()
buildkite_call(["aha", "test", test])
buildkite_call(["aha", "test", test], env=env_vars)
time_test = time.time() - start

return time_compile, time_map, time_test
Expand All @@ -418,6 +451,8 @@ def dispatch(args, extra_args=None):
seed_flow = not args.non_seed_flow
use_pipeline = args.use_pipeline
pipeline_num = args.pipeline_num
using_matrix_unit = args.using_matrix_unit
mu_datawidth = args.mu_datawidth
unroll = args.unroll

# Preserve backward compatibility
Expand Down Expand Up @@ -463,7 +498,7 @@ def dispatch(args, extra_args=None):

print(f"--- Running regression: {args.config}", flush=True)
info = []
t = gen_garnet(width, height, dense_only=False)
t = gen_garnet(width, height, dense_only=False, using_matrix_unit=using_matrix_unit, mu_datawidth=mu_datawidth)
info.append(["garnet with sparse and dense", t])

data_tile_pairs = []
Expand All @@ -490,16 +525,16 @@ def dispatch(args, extra_args=None):
print("HERE ARE THE DATA TILE PAIRS!")
print(data_tile_pairs)

generate_sparse_bitstreams(sparse_tests, width, height, seed_flow, data_tile_pairs, kernel_name, opal_workaround=args.opal_workaround, unroll=unroll)
generate_sparse_bitstreams(sparse_tests, width, height, seed_flow, data_tile_pairs, kernel_name, opal_workaround=args.opal_workaround, unroll=unroll, using_matrix_unit=using_matrix_unit)

for test in sparse_tests:
if use_pipeline:
assert (not seed_flow), "Pipeline mode is not supported with seed flow"
tile_pairs, pipeline_num_l = format_concat_tiles(test, data_tile_pairs, kernel_name, pipeline_num, unroll)
t0, t1, t2 = test_sparse_app(test, seed_flow, tile_pairs, pipeline_num_l, opal_workaround=args.opal_workaround, test_dataset_runtime_dict=test_dataset_runtime_dict)
t0, t1, t2 = test_sparse_app(test, seed_flow, tile_pairs, pipeline_num_l, opal_workaround=args.opal_workaround, test_dataset_runtime_dict=test_dataset_runtime_dict, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])
else:
t0, t1, t2 = test_sparse_app(test, seed_flow, data_tile_pairs, opal_workaround=args.opal_workaround, test_dataset_runtime_dict=test_dataset_runtime_dict)
t0, t1, t2 = test_sparse_app(test, seed_flow, data_tile_pairs, opal_workaround=args.opal_workaround, test_dataset_runtime_dict=test_dataset_runtime_dict, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])

# remove the generated collateral for tiles that passed to avoid overrunning the disk
Expand All @@ -511,46 +546,46 @@ def dispatch(args, extra_args=None):
for dataset, time_value in dataset_runtime_dict.items():
perf_out_file.write(f"{testname} {dataset} {time_value}\n")
else:
generate_sparse_bitstreams(sparse_tests, width, height, seed_flow, data_tile_pairs, kernel_name, opal_workaround=args.opal_workaround, unroll=unroll)
generate_sparse_bitstreams(sparse_tests, width, height, seed_flow, data_tile_pairs, kernel_name, opal_workaround=args.opal_workaround, unroll=unroll, using_matrix_unit=using_matrix_unit)

for test in sparse_tests:
assert(not use_pipeline), "Pipeline mode is not supported with seed flow"
t0, t1, t2 = test_sparse_app(test, seed_flow, data_tile_pairs, opal_workaround=args.opal_workaround)
t0, t1, t2 = test_sparse_app(test, seed_flow, data_tile_pairs, opal_workaround=args.opal_workaround, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])

for test in glb_tests:
t0, t1, t2 = test_dense_app(test,
width, height, args.env_parameters, extra_args)
width, height, args.env_parameters, extra_args, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])

for test in glb_tests_fp:
t0, t1, t2 = test_dense_app(test,
width, height, args.env_parameters, extra_args, use_fp=True)
width, height, args.env_parameters, extra_args, use_fp=True, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])

for test in resnet_tests:
if "residual" in test:
t0, t1, t2 = test_dense_app("apps/resnet_residual",
width, height, args.env_parameters, extra_args, layer=test)
width, height, args.env_parameters, extra_args, layer=test, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])
else:
t0, t1, t2 = test_dense_app("apps/resnet_output_stationary",
width, height, args.env_parameters, extra_args, layer=test)
width, height, args.env_parameters, extra_args, layer=test, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])

for test in resnet_tests_fp:
if "residual" in test:
t0, t1, t2 = test_dense_app("apps/conv2D_residual_fp",
width, height, args.env_parameters, extra_args, layer=test, use_fp=True)
width, height, args.env_parameters, extra_args, layer=test, use_fp=True, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])
else:
t0, t1, t2 = test_dense_app("apps/conv2D_fp",
width, height, args.env_parameters, extra_args, layer=test, use_fp=True)
width, height, args.env_parameters, extra_args, layer=test, use_fp=True, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])

for test in hardcoded_dense_tests:
t0, t1, t2 = test_hardcoded_dense_app(test,
width, height, args.env_parameters, extra_args)
width, height, args.env_parameters, extra_args, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
info.append([test + "_glb", t0 + t1 + t2, t0, t1, t2])

if args.include_dense_only_tests:
Expand All @@ -560,22 +595,22 @@ def dispatch(args, extra_args=None):
if os.WEXITSTATUS(exit_status) != 0:
raise RuntimeError(f"Command 'rm /aha/garnet/garnet.v' returned non-zero exit status {os.WEXITSTATUS(exit_status)}.")

t = gen_garnet(width, height, dense_only=True)
t = gen_garnet(width, height, dense_only=True, using_matrix_unit=using_matrix_unit, mu_datawidth=mu_datawidth)
info.append(["garnet with dense only", t])

num_dense_only_glb_tests = 5
for test_index, test in enumerate(glb_tests):
if test_index == num_dense_only_glb_tests:
break
t0, t1, t2 = test_dense_app(test,
width, height, args.env_parameters, extra_args, dense_only=True)
width, height, args.env_parameters, extra_args, dense_only=True, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
info.append([test + "_glb dense only", t0 + t1 + t2, t0, t1, t2])

for test in resnet_tests:
# residual resnet test is not working with dense only mode
if "residual" not in test:
t0, t1, t2 = test_dense_app("apps/resnet_output_stationary",
width, height, args.env_parameters, extra_args, layer=test)
width, height, args.env_parameters, extra_args, layer=test, using_matrix_unit=using_matrix_unit, cgra_height=height, mu_datawidth=mu_datawidth)
info.append([test + "_glb dense only", t0 + t1 + t2, t0, t1, t2])

print(f"+++ TIMING INFO", flush=True)
Expand Down
2 changes: 1 addition & 1 deletion archipelago
2 changes: 1 addition & 1 deletion canal
2 changes: 1 addition & 1 deletion lake

0 comments on commit 40a9ca2

Please sign in to comment.