diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index 055a945b..563cad6c 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -25,6 +25,10 @@ jobs: python -m virtualenv venv source venv/bin/activate pip install -r requirements.txt + git clone https://github.com/cdonovick/peak.git + pip install -e peak + git clone https://github.com/StanfordAHA/lassen.git + pip install -e lassen pip install -e . echo $VIRTUAL_ENV/bin >> $GITHUB_PATH - name: Test all (non SuiteSparse and Frostt) tests with pytest @@ -44,12 +48,12 @@ jobs: run: | conda install flake8 # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude venv + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude venv,peak,lassen # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude venv + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude venv,peak,lassen - name: Python style checking flake8 run: | - flake8 . --count --select=E,W --statistics --ignore=W503,W504 --max-line-length=127 --exclude venv + flake8 . --count --select=E,W --statistics --ignore=W503,W504 --max-line-length=127 --exclude venv,peak,lassen - name: Check SAM Simulator generating script run: | make tests diff --git a/compiler/sam-outputs/onyx-dot/mat_elemadd_leakyrelu_exp.gv b/compiler/sam-outputs/onyx-dot/mat_elemadd_leakyrelu_exp.gv new file mode 100644 index 00000000..aaad0587 --- /dev/null +++ b/compiler/sam-outputs/onyx-dot/mat_elemadd_leakyrelu_exp.gv @@ -0,0 +1,54 @@ +digraph SAM { + comment="X=ss01,B=ss01,C=ss01" + 10 [comment="type=fiberlookup,index=i,tensor=B,mode=0,format=compressed,src=true,root=true" label="FiberLookup i: B0\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="i" tensor="B" mode="0" format="compressed" src="true" root="true"] + 9 [comment="type=union,index=i" label="union i" color=purple shape=box style=filled type="union" index="i"] + 2 [comment="type=fiberwrite,index=i,tensor=X,mode=0,format=compressed,segsize=2,crdsize=B0_dim,sink=true" label="FiberWrite i: X0\ncompressed" color=green3 shape=box style=filled type="fiberwrite" index="i" tensor="X" mode="0" format="compressed" segsize="2" crdsize="B0_dim" sink="true"] + 7 [comment="type=fiberlookup,index=j,tensor=B,mode=1,format=compressed,src=true,root=false" label="FiberLookup j: B1\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="j" tensor="B" mode="1" format="compressed" src="true" root="false"] + 6 [comment="type=union,index=j" label="union j" color=purple shape=box style=filled type="union" index="j"] + 1 [comment="type=fiberwrite,index=j,tensor=X,mode=1,format=compressed,segsize=B0_dim+1,crdsize=B0_dim*B1_dim,sink=true" label="FiberWrite j: X1\ncompressed" color=green3 shape=box style=filled type="fiberwrite" index="j" tensor="X" mode="1" format="compressed" segsize="B0_dim+1" crdsize="B0_dim*B1_dim" sink="true"] + 4 [comment="type=arrayvals,tensor=B" label="Array Vals: B" color=green2 shape=box style=filled type="arrayvals" tensor="B"] + 3 [comment="type=fp_add" label="FP_Add" color=brown shape=box style=filled type="fp_add"] + 12 [comment="broadcast" shape=point style=invis type="broadcast"] + 13 [comment="type=fp_mul,rb_const=0.2" label="FP_Mul * 0.2" color=brown shape=box style=filled type="fp_mul" rb_const="0.2"] + 14 [comment="type=fp_max" label="FP_Max" color=brown shape=box style=filled type="fp_max"] + 15 [comment="type=fp_mul,rb_const=1.44269504089" label="FP_Mul * 1.44269504089" color=brown shape=box style=filled type="fp_mul" rb_const="1.44269504089"] + 16 [comment="type=broadcast" shape=point style=invis type="broadcast"] + 17 [comment="type=fgetfint" label="Fgetfint" color=brown shape=box style=filled type="fgetfint"] + 18 [comment="type=fgetffrac" label="Fgetffrac" color=brown shape=box style=filled type="fgetffrac"] + 19 [comment="type=and,rb_const=255" label="And 0x00FF" color=brown shape=box style=filled type="and" rb_const="255"] + 20 [comment="type=faddiexp" label="Faddiexp" color=brown shape=box style=filled type="faddiexp"] + 21 [comment="type=arrayvals,tensor=exp" label="Array Vals: exp" color=green2 shape=box style=filled type="arrayvals" tensor="exp"] + 0 [comment="type=fiberwrite,mode=vals,tensor=X,size=1*B0_dim*B1_dim,sink=true" label="FiberWrite Vals: X" color=green3 shape=box style=filled type="fiberwrite" tensor="X" mode="vals" size="1*B0_dim*B1_dim" sink="true"] + 5 [comment="type=arrayvals,tensor=C" label="Array Vals: C" color=green2 shape=box style=filled type="arrayvals" tensor="C"] + 8 [comment="type=fiberlookup,index=j,tensor=C,mode=1,format=compressed,src=true,root=false" label="FiberLookup j: C1\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="j" tensor="C" mode="1" format="compressed" src="true" root="false"] + 11 [comment="type=fiberlookup,index=i,tensor=C,mode=0,format=compressed,src=true,root=true" label="FiberLookup i: C0\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="i" tensor="C" mode="0" format="compressed" src="true" root="true"] + 10 -> 9 [label="crd_in-B" style=dashed type="crd" comment="in-B"] + 9 -> 2 [label="crd" style=dashed type="crd"] + 9 -> 7 [label="ref_out-B" style=bold type="ref" comment="out-B"] + 7 -> 6 [label="crd_in-B" style=dashed type="crd" comment="in-B"] + 6 -> 1 [label="crd" style=dashed type="crd"] + 6 -> 4 [label="ref_out-B" style=bold type="ref" comment="out-B"] + 4 -> 3 [label="val" type="val"] + 3 -> 12 [label="val" type="val"] + 12 -> 13 [label="val" type="val"] + 12 -> 14 [label="val" type="val"] + 13 -> 14 [label="val" type="val"] + 14 -> 15 [label="val" type="val"] + 15 -> 16 [label="val" type="val"] + 16 -> 17 [label="val" type="val"] + 16 -> 18 [label="val" type="val"] + 18 -> 19 [label="val" type="val"] + 19 -> 21 [label="ref" style=bold type="ref"] + 21 -> 20 [label="val" type="val" comment="fp"] + 17 -> 20 [label="val" type="val" comment="exp"] + 20 -> 0 [label="val" type="val"] + 6 -> 5 [label="ref_out-C" style=bold type="ref" comment="out-C"] + 5 -> 3 [label="val" type="val"] + 7 -> 6 [label="ref_in-B" style=bold type="ref" comment="in-B"] + 9 -> 8 [label="ref_out-C" style=bold type="ref" comment="out-C"] + 8 -> 6 [label="crd_in-C" style=dashed type="crd" comment="in-C"] + 8 -> 6 [label="ref_in-C" style=bold type="ref" comment="in-C"] + 10 -> 9 [label="ref_in-B" style=bold type="ref" comment="in-B"] + 11 -> 9 [label="crd_in-C" style=dashed type="crd" comment="in-C"] + 11 -> 9 [label="ref_in-C" style=bold type="ref" comment="in-C"] +} diff --git a/compiler/sam-outputs/onyx-dot/spmv.gv b/compiler/sam-outputs/onyx-dot/spmv.gv new file mode 100644 index 00000000..54311ebe --- /dev/null +++ b/compiler/sam-outputs/onyx-dot/spmv.gv @@ -0,0 +1,37 @@ +digraph SAM { + comment="x=s0,B=ss01,c=d0" + 14 [comment="type=fiberlookup,index=i,tensor=B,mode=0,format=compressed,src=true,root=true" label="FiberLookup i: B0\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="i" tensor="B" mode="0" format="compressed" src="true" root="true"] + 13 [comment="type=broadcast" shape=point style=invis type="broadcast"] + 7 [comment="type=crddrop,outer=i,inner=j" label="CrdDrop i,j" color=orange shape=box style=filled type="crddrop" outer="i" inner="j"] + 1 [comment="type=fiberwrite,index=i,tensor=x,mode=0,format=compressed,segsize=2,crdsize=B0_dim,sink=true" label="FiberWrite i: x0\ncompressed" color=green3 shape=box style=filled type="fiberwrite" index="i" tensor="x" mode="0" format="compressed" segsize="2" crdsize="B0_dim" sink="true"] + 12 [comment="type=repsiggen,index=i" label="RepeatSignalGenerator i" color=cyan3 shape=box style=filled type="repsiggen" index="i"] + 11 [comment="type=repeat,index=i,tensor=c,root=true" label="Repeat i: c" color=cyan2 shape=box style=filled type="repeat" index="i" tensor="c" root="true"] + 10 [comment="type=fiberlookup,index=j,tensor=c,mode=0,format=dense,src=true,root=false" label="FiberLookup j: c0\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="j" tensor="c" mode="0" format="dense" src="true" root="false"] + 8 [comment="type=intersect,index=j" label="intersect j" color=purple shape=box style=filled type="intersect" index="j"] + 5 [comment="type=arrayvals,tensor=B" label="Array Vals: B" color=green2 shape=box style=filled type="arrayvals" tensor="B"] + 4 [comment="type=mul" label="Mul" color=brown shape=box style=filled type="mul"] + 3 [comment="type=reduce" label="Reduce" color=brown shape=box style=filled type="reduce"] + 0 [comment="type=fiberwrite,mode=vals,tensor=x,size=1*B0_dim,sink=true" label="FiberWrite Vals: x" color=green3 shape=box style=filled type="fiberwrite" tensor="x" mode="vals" size="1*B0_dim" sink="true"] + 6 [comment="type=arrayvals,tensor=c" label="Array Vals: c" color=green2 shape=box style=filled type="arrayvals" tensor="c"] + 9 [comment="type=fiberlookup,index=j,tensor=B,mode=1,format=compressed,src=true,root=false" label="FiberLookup j: B1\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="j" tensor="B" mode="1" format="compressed" src="true" root="false"] + 14 -> 13 [label="crd" style=dashed type="crd" comment=""] + 13 -> 7 [label="crd_i" style=dashed type="crd" comment="i"] + 7 -> 1 [label="crd_outer-i" style=dashed type="crd" comment="outer-i"] + 13 -> 12 [label="crd" style=dashed type="crd" comment=""] + 12 -> 11 [label="repsig" style=dotted type="repsig"] + 11 -> 10 [label="ref" style=bold type="ref"] + 10 -> 8 [label="crd_in-c" style=dashed type="crd" comment="in-c"] + 8 -> 5 [label="ref_out-B" style=bold type="ref" comment="out-B"] + 5 -> 4 [label="val" type="val"] + 3 -> 0 [label="val" type="val"] + 8 -> 6 [label="ref_out-c" style=bold type="ref" comment="out-c"] + 6 -> 4 [label="val" type="val"] + 10 -> 8 [label="ref_in-c" style=bold type="ref" comment="in-c"] + 14 -> 9 [label="ref" style=bold type="ref" comment=""] + 9 -> 8 [label="crd_in-B" style=dashed type="crd" comment="in-B"] + 9 -> 8 [label="ref_in-B" style=bold type="ref" comment="in-B"] + + 8 -> 7 [label="crd_in-j" style=dashed type="crd" comment="in-j"] + 4 -> 3 [label="val" type="val"] + +} diff --git a/compiler/sam-outputs/onyx-dot/spmv_relu.gv b/compiler/sam-outputs/onyx-dot/spmv_relu.gv new file mode 100644 index 00000000..55d2bfe1 --- /dev/null +++ b/compiler/sam-outputs/onyx-dot/spmv_relu.gv @@ -0,0 +1,47 @@ +digraph SAM { + comment="x=s0,B=ss01,c=d0" + 14 [comment="type=fiberlookup,index=i,tensor=B,mode=0,format=compressed,src=true,root=true" label="FiberLookup i: B0\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="i" tensor="B" mode="0" format="compressed" src="true" root="true"] + 13 [comment="type=broadcast" shape=point style=invis type="broadcast"] + 12 [comment="type=repsiggen,index=i" label="RepeatSignalGenerator i" color=cyan3 shape=box style=filled type="repsiggen" index="i"] + 11 [comment="type=repeat,index=i,tensor=c,root=true" label="Repeat i: c" color=cyan2 shape=box style=filled type="repeat" index="i" tensor="c" root="true"] + 10 [comment="type=fiberlookup,index=j,tensor=c,mode=0,format=dense,src=true,root=false" label="FiberLookup j: c0\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="j" tensor="c" mode="0" format="dense" src="true" root="false"] + 8 [comment="type=intersect,index=j" label="intersect j" color=purple shape=box style=filled type="intersect" index="j"] + 5 [comment="type=arrayvals,tensor=B" label="Array Vals: B" color=green2 shape=box style=filled type="arrayvals" tensor="B"] + 4 [comment="type=mul" label="Mul" color=brown shape=box style=filled type="mul"] + 3 [comment="type=reduce" label="Reduce" color=brown shape=box style=filled type="reduce"] + 6 [comment="type=arrayvals,tensor=c" label="Array Vals: c" color=green2 shape=box style=filled type="arrayvals" tensor="c"] + 9 [comment="type=fiberlookup,index=j,tensor=B,mode=1,format=compressed,src=true,root=false" label="FiberLookup j: B1\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="j" tensor="B" mode="1" format="compressed" src="true" root="false"] + + + 20 [comment="type=max" label="Max 0" color=brown shape=box style=filled type="max"] + 0 [comment="type=fiberwrite,mode=vals,tensor=x,size=1*B0_dim,sink=true" label="FiberWrite Vals: x" color=green3 shape=box style=filled type="fiberwrite" tensor="x" mode="vals" size="1*B0_dim" sink="true"] + 21 [comment="type=crddrop,outer=i,inner=val,mode=0" label="CrdDrop Compression i, val" color=orange style=filled type="crddrop" outer="i" inner="val" mode="0"] + 2 [comment="type=fiberwrite,index=i,tensor=x,mode=0,format=compressed,segsize=2,crdsize=B0_dim,sink=true" label="FiberWrite i: x0\ncompressed" color=green3 shape=box style=filled type="fiberwrite" index="i" tensor="x" mode="0" format="compressed" segsize="2" crdsize="B0_dim" sink="true"] + + + + 14 -> 13 [label="crd" style=dashed type="crd" comment=""] + 13 -> 12 [label="crd" style=dashed type="crd" comment=""] + 12 -> 11 [label="repsig" style=dotted type="repsig"] + 11 -> 10 [label="ref" style=bold type="ref"] + 10 -> 8 [label="crd_in-c" style=dashed type="crd" comment="in-c"] + 8 -> 5 [label="ref_out-B" style=bold type="ref" comment="out-B"] + 5 -> 4 [label="val" type="val"] + 8 -> 6 [label="ref_out-c" style=bold type="ref" comment="out-c"] + 6 -> 4 [label="val" type="val"] + 10 -> 8 [label="ref_in-c" style=bold type="ref" comment="in-c"] + 14 -> 9 [label="ref" style=bold type="ref" comment=""] + 9 -> 8 [label="crd_in-B" style=dashed type="crd" comment="in-B"] + 9 -> 8 [label="ref_in-B" style=bold type="ref" comment="in-B"] + + + 4 -> 3 [label="val" type="val"] + + + 3 -> 20 [label="val" type="val" comment="val"] + 20 -> 21 [label="val" type="val" comment="inner-val"] + 13 -> 21 [label="crd_i" style=dashed type="crd" comment="i"] + 21 -> 0 [label="val" type="val", comment="val"] + 21 -> 2 [label="crd_outer-i" style=dashed type="crd" comment="outer-i"] + +} diff --git a/requirements.txt b/requirements.txt index 8671a953..5a64e46f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,4 +27,4 @@ scipy==1.10.0 six==1.16.0 sparse==0.13.0 tomli==2.0.1 -tqdm==4.64.1 +tqdm==4.64.1 \ No newline at end of file diff --git a/sam/onyx/fiber_tree.py b/sam/onyx/fiber_tree.py index 9736f0d2..66525311 100644 --- a/sam/onyx/fiber_tree.py +++ b/sam/onyx/fiber_tree.py @@ -38,7 +38,6 @@ def get_root(self): return self.root_fiber def populate_fiber(self, fiber, sub_tensor): - # Last level detection if len(sub_tensor.shape) == 1: # Finally have just a row, this is the base case...(could be a scalar) diff --git a/sam/onyx/generate_matrices.py b/sam/onyx/generate_matrices.py index 016d94a8..83c7443f 100644 --- a/sam/onyx/generate_matrices.py +++ b/sam/onyx/generate_matrices.py @@ -11,11 +11,12 @@ import csv import os from sam.sim.test.test import * +from lassen.utils import bfbin2float, float2bfbin class MatrixGenerator: def __init__(self, name='B', shape=None, sparsity=0.6, format='CSF', dump_dir=None, - tensor=None, value_cap=None, clean=True) -> None: + tensor=None, value_cap=None, clean=True, use_fp=False) -> None: # assert dimension is not None # self.dimension = dimension @@ -24,6 +25,7 @@ def __init__(self, name='B', shape=None, sparsity=0.6, format='CSF', dump_dir=No self.sparsity = sparsity self.format = format self.name = name + self.use_fp = use_fp if value_cap is None: self.value_cap = int(math.pow(2, 8)) - 1 else: @@ -43,8 +45,16 @@ def __init__(self, name='B', shape=None, sparsity=0.6, format='CSF', dump_dir=No self.dump_dir = tempfile.gettempdir() if tensor is not None: - self.array = tensor - self.shape = self.array.shape + if not tensor.dtype == numpy.float32: + self.array = tensor + self.shape = self.array.shape + else: + self.array = tensor + for idx, x in numpy.ndenumerate(self.array): + if x == 0.0: + continue + self.array[idx] = bfbin2float(float2bfbin(x)) + self.shape = self.array.shape else: assert shape is not None self._create_matrix(value_cap=self.value_cap) @@ -54,7 +64,18 @@ def _create_matrix(self, value_cap=int(math.pow(2, 8)) - 1): ''' Routine to create the actual matrix from the dimension/shape ''' - self.array = numpy.random.randint(low=-1 * value_cap / 2, high=value_cap / 2, size=self.shape) + self.array = numpy.random.uniform(low=-1 * value_cap / 2, high=value_cap / 2, size=self.shape) + # convert to float32 for ease of conversion to bfloat16 + self.array = self.array.astype(numpy.float32) + if not self.use_fp: + self.array = self.array.astype(int) + else: + # convert to bfloat16 by truncating the trailing fraction bits + # converting it to floating point number + for idx, x in numpy.ndenumerate(self.array): + bfval = bfbin2float(float2bfbin(x)) + self.array[idx] = bfval + assert self.array.dtype == numpy.float32 for idx, x in numpy.ndenumerate(self.array): if random.random() < self.sparsity: self.array[idx] = 0 @@ -102,19 +123,19 @@ def dump_outputs(self, format=None, tpose=False, dump_shape=True, if glb_override: lines = [len(fake_lines_seg), *fake_lines_seg, len(fake_lines_crd), *fake_lines_crd] self.write_array(lines, name=f"tensor_{self.name}_mode_{mode}{suffix}", dump_dir=use_dir, - hex=print_hex) + dump_hex=print_hex) else: self.write_array(fake_lines_seg, name=f"tensor_{self.name}_mode_{mode}_seg{suffix}", - dump_dir=use_dir, hex=print_hex) + dump_dir=use_dir, dump_hex=print_hex) self.write_array(fake_lines_crd, name=f"tensor_{self.name}_mode_{mode}_crd{suffix}", - dump_dir=use_dir, hex=print_hex) + dump_dir=use_dir, dump_hex=print_hex) if glb_override: lines = [len(fake_lines_val), *fake_lines_val] self.write_array(fake_lines_val, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir, - hex=print_hex) + dump_hex=print_hex) else: self.write_array(fake_lines_val, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir, - hex=print_hex) + dump_hex=print_hex) return @@ -124,12 +145,12 @@ def dump_outputs(self, format=None, tpose=False, dump_shape=True, seg_arr, coord_arr = self._dump_csf(tmp_lvl_list) if glb_override: lines = [len(seg_arr), *seg_arr, len(coord_arr), *coord_arr] - self.write_array(lines, name=f"tensor_{self.name}_mode_0{suffix}", dump_dir=use_dir, hex=print_hex) + self.write_array(lines, name=f"tensor_{self.name}_mode_0{suffix}", dump_dir=use_dir, dump_hex=print_hex) else: self.write_array(seg_arr, name=f"tensor_{self.name}_mode_0_seg{suffix}", dump_dir=use_dir, - hex=print_hex) + dump_hex=print_hex) self.write_array(coord_arr, name=f"tensor_{self.name}_mode_0_crd{suffix}", dump_dir=use_dir, - hex=print_hex) + dump_hex=print_hex) at_vals = False i = 1 @@ -152,21 +173,21 @@ def dump_outputs(self, format=None, tpose=False, dump_shape=True, lines = [len(tmp_lvl_list), *tmp_lvl_list] # self.write_array(tmp_lvl_list, name=f"tensor_{self.name}_mode_vals" dump_dir=use_dir) self.write_array(lines, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir, - hex=print_hex) + dump_hex=print_hex, is_val=True) else: self.write_array(tmp_lvl_list, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir, - hex=print_hex) + dump_hex=print_hex, is_val=True) else: seg_arr, coord_arr = self._dump_csf(tmp_lvl_list) if glb_override: lines = [len(seg_arr), *seg_arr, len(coord_arr), *coord_arr] self.write_array(lines, name=f"tensor_{self.name}_mode_{i}{suffix}", dump_dir=use_dir, - hex=print_hex) + dump_hex=print_hex) else: self.write_array(seg_arr, name=f"tensor_{self.name}_mode_{i}_seg{suffix}", dump_dir=use_dir, - hex=print_hex) + dump_hex=print_hex) self.write_array(coord_arr, name=f"tensor_{self.name}_mode_{i}_crd{suffix}", dump_dir=use_dir, - hex=print_hex) + dump_hex=print_hex) i = i + 1 elif self.format == "UNC": flat_array = [] @@ -174,10 +195,10 @@ def dump_outputs(self, format=None, tpose=False, dump_shape=True, flat_array.append(val) if glb_override: lines = [len(flat_array), *flat_array] - self.write_array(lines, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir, hex=print_hex) + self.write_array(lines, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir, dump_hex=print_hex) else: self.write_array(flat_array, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir, - hex=print_hex) + dump_hex=print_hex) elif self.format == "COO": crd_dict = dict() order = len(self.array.shape) @@ -195,24 +216,24 @@ def dump_outputs(self, format=None, tpose=False, dump_shape=True, if glb_override: lines = [len(crd_dict[key]), *crd_dict[key]] self.write_array(lines, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir, - hex=print_hex) + dump_hex=print_hex) else: self.write_array(crd_dict[key], name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir, - hex=print_hex) + dump_hex=print_hex) else: if glb_override: lines = [len(crd_dict[key]), *crd_dict[key]] self.write_array(lines, name=f"tensor_{self.name}_mode_{key}_crd{suffix}", dump_dir=use_dir, - hex=print_hex) + dump_hex=print_hex) else: self.write_array(crd_dict[key], name=f"tensor_{self.name}_mode_{key}_crd{suffix}", dump_dir=use_dir, - hex=print_hex) + dump_hex=print_hex) if dump_shape: self.write_array(self.array.shape, name=f"tensor_{self.name}_mode_shape{suffix}", dump_dir=use_dir, - hex=print_hex) + dump_hex=print_hex) # Transpose it back if tpose is True: @@ -241,7 +262,7 @@ def _dump_csf(self, level_list): return seg_arr, coord_arr - def write_array(self, str_list, name, dump_dir=None, hex=False): + def write_array(self, str_list, name, dump_dir=None, dump_hex=False, is_val=False): """Write an array/list to a file Args: @@ -254,11 +275,18 @@ def write_array(self, str_list, name, dump_dir=None, hex=False): full_path = dump_dir + "/" + name with open(full_path, "w+") as wr_file: for item in str_list: - item_int = int(item) - if hex: - wr_file.write(f"{item_int:04X}\n") + data = item + if not is_val: + data = int(item) + if dump_hex: + if not isinstance(data, numpy.float32): + wr_file.write(f"{data:04X}\n") + else: + # converting result to bf16 hexidecimal representation + data = hex(int(float2bfbin(data), 2))[2:].zfill(4) + wr_file.write(f"{data}\n") else: - wr_file.write(f"{item_int}\n") + wr_file.write(f"{data}\n") def get_shape(self): return self.shape @@ -427,7 +455,7 @@ def run_statistics(name, seed, shape, dump_dir, sparsity): return (avg1, avg2) -def create_matrix_from_point_list(name, pt_list, shape) -> MatrixGenerator: +def create_matrix_from_point_list(name, pt_list, shape, use_fp=False) -> MatrixGenerator: mat_base = numpy.zeros(shape) dims = len(shape) for pt_idx in range(len(pt_list[0])): @@ -436,6 +464,19 @@ def create_matrix_from_point_list(name, pt_list, shape) -> MatrixGenerator: pt_base.append(pt_list[i][pt_idx]) mat_base[tuple(pt_base)] = pt_list[dims][pt_idx] + # Convert the input matrix to MatrixGenerator according to specified use_fp + if use_fp: + mat_base = mat_base.astype(numpy.float32) + for idx, x in numpy.ndenumerate(mat_base): + if x == 0.0: + # don't need to truncate if it is already a zero + continue + # Convert the input from int to bfloat16 + tmp_x = bin(int(x))[2:].zfill(16) + mat_base[idx] = bfbin2float(tmp_x) + else: + mat_base = mat_base.astype(numpy.uint16, casting='unsafe') + mg = MatrixGenerator(name=f"{name}", shape=shape, sparsity=0.7, format='CSF', dump_dir=None, tensor=mat_base) return mg @@ -480,7 +521,7 @@ def convert_aha_glb_output_file(glbfile, output_dir, tiles): def get_tensor_from_files(name, files_dir, shape, base=10, format='CSF', early_terminate=None, tensor_ordering=None, - suffix="", positive_only=True) -> MatrixGenerator: + suffix="", positive_only=True, use_fp=False) -> MatrixGenerator: all_files = os.listdir(files_dir) dims = len(shape) @@ -522,7 +563,7 @@ def get_tensor_from_files(name, files_dir, shape, base=10, segs.append(seg_t_) # Empty matrix... if len(seg_t_) == 2 and seg_t_[0] == 0 and seg_t_[1] == 0: - mg = MatrixGenerator(name=name, shape=shape, sparsity=1.0) + mg = MatrixGenerator(name=name, shape=shape, sparsity=1.0, use_fp=use_fp) created_empty = True break crd_t_ = read_inputs(f"{files_dir}/{crd_f}", intype=int, base=base, early_terminate=early_terminate, @@ -530,7 +571,7 @@ def get_tensor_from_files(name, files_dir, shape, base=10, crds.append(crd_t_) if not created_empty: pt_list = get_point_list(crds, segs, val_arr=vals) - mg = create_matrix_from_point_list(name, pt_list, shape_reordered) + mg = create_matrix_from_point_list(name, pt_list, shape_reordered, use_fp=use_fp) elif format == 'COO': crds = [] for mode in range(dims): diff --git a/sam/onyx/hw_nodes/compute_node.py b/sam/onyx/hw_nodes/compute_node.py index 96e492bb..9c6af1e3 100644 --- a/sam/onyx/hw_nodes/compute_node.py +++ b/sam/onyx/hw_nodes/compute_node.py @@ -1,4 +1,5 @@ from sam.onyx.hw_nodes.hw_node import * +from lassen.utils import float2bfbin class ComputeNode(HWNode): @@ -8,7 +9,6 @@ def __init__(self, name=None, op=None) -> None: self.num_outputs = 1 self.num_inputs_connected = 0 self.num_outputs_connected = 0 - self.op = op def connect(self, other, edge, kwargs=None): @@ -119,10 +119,16 @@ def connect(self, other, edge, kwargs=None): other_conn = other.get_num_inputs() pe = self.get_name() # TODO: remove hack eventually - if 'Max' in other.op: + if 'Max 0' in other.op: other_conn = 1 - else: - other_conn = other.get_num_inputs() + elif 'Faddiexp' in other.op: + comment = edge.get_attributes()["comment"].strip('"') + if 'fp' in comment: + other_conn = 0 + elif 'exp' in comment: + other_conn = 1 + else: + assert 0 & "edge connected to faddiexp has to have comment specified to either 'exp' or 'fp'" new_conns = { f'pe_to_pe_{other_conn}': [ ([(pe, "res"), (other_pe, f"data{other_conn}")], 17), @@ -176,10 +182,38 @@ def configure(self, attributes): op_code = 2 elif c_op == 'max': op_code = 4 + elif c_op == 'and': + op_code = 5 + elif c_op == 'fp_mul': + op_code = 6 + elif c_op == 'fgetfint': + op_code = 7 + elif c_op == 'fgetffrac': + op_code = 8 + elif c_op == 'faddiexp': + op_code = 9 + elif c_op == 'fp_max': + op_code = 10 + elif c_op == 'fp_add': + op_code = 11 + + rb_const = None + if "rb_const" in attributes: + # the b operand of the op is a constant + rb_const = attributes["rb_const"].strip('"') + if "." in rb_const: + # constant is a floating point + rb_const = float(rb_const) + rb_const = int(float2bfbin(rb_const), 2) + else: + # it is a int + rb_const = int(rb_const) + cfg_kwargs = { 'op': op_code, 'use_dense': use_dense, 'pe_only': pe_only, - 'pe_in_external': pe_in_external + 'pe_in_external': pe_in_external, + 'rb_const': rb_const } - return (op_code, use_dense, pe_only, pe_in_external), cfg_kwargs + return (op_code, use_dense, pe_only, pe_in_external, rb_const), cfg_kwargs diff --git a/sam/onyx/hw_nodes/read_scanner_node.py b/sam/onyx/hw_nodes/read_scanner_node.py index 9c1d0d54..02acf89b 100644 --- a/sam/onyx/hw_nodes/read_scanner_node.py +++ b/sam/onyx/hw_nodes/read_scanner_node.py @@ -204,7 +204,15 @@ def connect(self, other, edge, kwargs=None): # Can use dynamic information to assign inputs to compute nodes # since add/mul are commutative compute_conn = other.get_num_inputs() - + # TODO: get rid of this hack + if 'Faddiexp' in other.op: + comment = edge.get_attributes()["comment"].strip('"') + if 'fp' in comment: + compute_conn = 0 + elif 'exp' in comment: + compute_conn = 1 + else: + assert 0 & "edge connected to faddiexp has to have comment specified to either 'exp' or 'fp'" new_conns = { f'rd_scan_to_compute_{compute_conn}': [ ([(rd_scan, "coord_out"), (compute, f"data{compute_conn}")], 17), diff --git a/sam/onyx/hw_nodes/reduce_node.py b/sam/onyx/hw_nodes/reduce_node.py index 2c904309..b19cf5e2 100644 --- a/sam/onyx/hw_nodes/reduce_node.py +++ b/sam/onyx/hw_nodes/reduce_node.py @@ -68,7 +68,7 @@ def connect(self, other, edge, kwargs=None): raise NotImplementedError(f'Cannot connect ReduceNode to {other_type}') elif other_type == ComputeNode: pe = other.get_name() - if 'Max' in other.op: + if 'Max 0' in other.op: other_conn = 1 else: other_conn = other.get_num_inputs() diff --git a/sam/onyx/parse_dot.py b/sam/onyx/parse_dot.py index a952b423..fa18a557 100644 --- a/sam/onyx/parse_dot.py +++ b/sam/onyx/parse_dot.py @@ -99,13 +99,15 @@ def map_nodes(self): hw_nt = f"HWNodeType.RepSigGen" elif n_type == "repeat": hw_nt = f"HWNodeType.Repeat" - elif n_type == "mul" or n_type == "add" or n_type == "max": + elif n_type == "mul" or n_type == "add" or n_type == "max" or n_type == "and": + hw_nt = f"HWNodeType.Compute" + elif n_type == "fgetfint" or n_type == "fgetffrac" or n_type == "faddiexp": + hw_nt = f"HWNodeType.Compute" + elif n_type == "fp_mul" or n_type == "fp_max" or n_type == "fp_add": hw_nt = f"HWNodeType.Compute" elif n_type == "reduce": hw_nt = f"HWNodeType.Reduce" elif n_type == "intersect" or n_type == "union": - if n_type == "union": - print("UNION BLOCK") hw_nt = f"HWNodeType.Intersect" elif n_type == "crddrop": hw_nt = f"HWNodeType.Merge" @@ -1016,7 +1018,7 @@ def duplicate_graph(self, tensor, factor, output='x'): def annotate_IO_nodes(self): original_nodes = self.graph.get_nodes() output_nodes = ['x', 'X'] - input_nodes = ['c', 'C', 'b', 'B', 'd', 'D', 'e', 'E', 'f', 'F'] + input_nodes = ['c', 'C', 'b', 'B', 'd', 'D', 'e', 'E', 'f', 'F', 'exp'] exclude_nodes = ['b', 'B'] for node in original_nodes: node_attrs = node.get_attributes()