Skip to content

Commit

Permalink
update the sparse ml formatting code
Browse files Browse the repository at this point in the history
  • Loading branch information
bobcheng15 committed Apr 18, 2024
1 parent cb38fb2 commit 4568766
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 15 deletions.
10 changes: 4 additions & 6 deletions scripts/formatting/datastructure_sparse_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,12 @@ def write_datastructure_tiles(args, tensor, out_path, tile_name):
def write_datastructure_bench(args, tensor, out_path, tiles=None):
shifter = ScipyTensorShifter()

print("Writing " + args.name + " in " + args.format + " for test " + args.benchname + "...")
dirname = args.output_dir_path if args.output_dir_path is not None else os.path.join(out_path, args.modelname, args.datasetname, args.layername, args.benchname)
print("Writing " + args.name + " in " + args.format + " to " + dirname + "...")

dirname = args.output_dir_path if args.output_dir_path is not None else os.path.join(out_path, args.modelname, args.datasetname, args.layername, args.benchname, args.name)
print("dirname: " + dirname)
if tiles is not None:
dirname = os.path.join(dirname, tiles)
dirpath = Path(dirname)
if os.path.exists(dirpath):
shutil.rmtree(dirpath)
dirpath.mkdir(parents=True, exist_ok=True, mode=0o777)

coo = inputCache.load(tensor, False)
Expand Down Expand Up @@ -91,7 +88,8 @@ def write_datastructure_bench(args, tensor, out_path, tiles=None):
out_dirname = args.output_dir_path

out_path = Path(out_dirname)
out_path.mkdir(parents=True, exist_ok=True, mode=0o777)
if not os.path.isdir(out_dirname):
out_path.mkdir(parents=True, exist_ok=True, mode=0o777)

if args.name is None:
print("Please enter a matrix name")
Expand Down
49 changes: 40 additions & 9 deletions scripts/formatting/generate_sparse_ml_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,66 @@
import subprocess
import json

def format_lookup(tensor_name, operation):
tensor_format = "ss01"
if operation == "matmul":
if tensor_name == "B":
tensor_format = "ss01"
elif tensor_name == "C":
tensor_format = "ss10"
elif operation == "spmm_sd" or operation == "spmm_sd_relu":
if tensor_name == "B":
tensor_format = "ss01"
elif tensor_name == "C":
tensor_format = "dd10"
elif operation == "dense_matmul":
breakpoint()
if tensor_name == "B":
tensor_format = "dd01"
elif tensor_name == "C":
tensor_format = "dd10"
return tensor_format


sparse_ml_bench = ['gcn']
sparse_ml_path = os.environ['SPARSE_ML_PATH']
out_dir = os.environ['SPARSE_ML_FORMATTED_PATH']
basedir = os.getcwd()

for bench in sparse_ml_bench:
bench_dir = os.path.join(sparse_ml_path, bench)
kernel_types = {}
with open(os.path.join(bench_dir, "kernel_types.json"), "r") as type_file:
kernel_types = json.load(type_file)
print("BENCHMARK:", bench)
for dataset in os.listdir(bench_dir):
dataset_dir = os.path.join(bench_dir, dataset)
if not os.path.isdir(dataset_dir):
# the kernel_tpyes json file is stored in this heirarchy
continue
print("DATASET:", dataset)
for layer in os.listdir(dataset_dir):
print("LAYER:", layer)
layer_dir = os.path.join(dataset_dir, layer)
for kernel in os.listdir(layer_dir):
kernel_dir = os.path.join(layer_dir, kernel)
print("Formatting Matrices for kernel", kernel, " of the layer",
layer, "of model", bench," trained on dataset", dataset)
with open(os.path.join(kernel_dir, "info.json"), 'r') as info_file:
info = json.load(info_file)
for tensor in info["input"]:
print("Formatting tensor", tensor["name"], "in format", tensor["format"])
tensor_name = tensor["name"]
tensor_formant = tensor["format"]
kernel_type = kernel_types[layer][kernel]
print("[KERNEL INFO]")
print("Name:", kernel)
print("Operation", kernel_type)
for tensor in os.listdir(kernel_dir):
# tensors are stored in npy file of the name {tensor_name}.npy
tensor_name = tensor.split(".")[0]
tensor_format = format_lookup(tensor_name, kernel_type)
print("Formatting tensor", tensor.split(".")[0], "in format", tensor_format)
format_script = os.path.join(basedir, "scripts/formatting/datastructure_sparse_ml.py")
formatting_env = os.environ.copy()
formatting_env["SPARSE_ML_TENSOR_PATH"] = os.path.join(kernel_dir, tensor_name + ".npy")
subprocess.run(["python",
str(format_script),
"-n", tensor_name,
"--hw",
"--format", tensor_formant,
"--format", tensor_format,
"-b", kernel,
"--modelname", bench,
"--datasetname", dataset,
Expand Down

0 comments on commit 4568766

Please sign in to comment.