diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index 055a945b..563cad6c 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -25,6 +25,10 @@ jobs:
         python -m virtualenv venv
         source venv/bin/activate
         pip install -r requirements.txt
+        git clone https://github.com/cdonovick/peak.git
+        pip install -e peak
+        git clone https://github.com/StanfordAHA/lassen.git
+        pip install -e lassen
         pip install -e .
         echo $VIRTUAL_ENV/bin >> $GITHUB_PATH
     - name: Test all (non SuiteSparse and Frostt) tests with pytest
@@ -44,12 +48,12 @@ jobs:
       run: |
         conda install flake8
         # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude venv
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude venv,peak,lassen
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude venv
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude venv,peak,lassen
     - name: Python style checking flake8
       run: |
-        flake8 . --count --select=E,W --statistics --ignore=W503,W504 --max-line-length=127 --exclude venv
+        flake8 . --count --select=E,W --statistics --ignore=W503,W504 --max-line-length=127 --exclude venv,peak,lassen
     - name: Check SAM Simulator generating script
       run: |
         make tests
diff --git a/compiler/sam-outputs/onyx-dot/mat_elemadd_leakyrelu_exp.gv b/compiler/sam-outputs/onyx-dot/mat_elemadd_leakyrelu_exp.gv
new file mode 100644
index 00000000..aaad0587
--- /dev/null
+++ b/compiler/sam-outputs/onyx-dot/mat_elemadd_leakyrelu_exp.gv
@@ -0,0 +1,54 @@
+digraph SAM {
+    comment="X=ss01,B=ss01,C=ss01"
+    10 [comment="type=fiberlookup,index=i,tensor=B,mode=0,format=compressed,src=true,root=true" label="FiberLookup i: B0\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="i" tensor="B" mode="0" format="compressed" src="true" root="true"]
+    9 [comment="type=union,index=i" label="union i" color=purple shape=box style=filled type="union" index="i"]
+    2 [comment="type=fiberwrite,index=i,tensor=X,mode=0,format=compressed,segsize=2,crdsize=B0_dim,sink=true" label="FiberWrite i: X0\ncompressed" color=green3 shape=box style=filled type="fiberwrite" index="i" tensor="X" mode="0" format="compressed" segsize="2" crdsize="B0_dim" sink="true"]
+    7 [comment="type=fiberlookup,index=j,tensor=B,mode=1,format=compressed,src=true,root=false" label="FiberLookup j: B1\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="j" tensor="B" mode="1" format="compressed" src="true" root="false"]
+    6 [comment="type=union,index=j" label="union j" color=purple shape=box style=filled type="union" index="j"]
+    1 [comment="type=fiberwrite,index=j,tensor=X,mode=1,format=compressed,segsize=B0_dim+1,crdsize=B0_dim*B1_dim,sink=true" label="FiberWrite j: X1\ncompressed" color=green3 shape=box style=filled type="fiberwrite" index="j" tensor="X" mode="1" format="compressed" segsize="B0_dim+1" crdsize="B0_dim*B1_dim" sink="true"]
+    4 [comment="type=arrayvals,tensor=B" label="Array Vals: B" color=green2 shape=box style=filled type="arrayvals" tensor="B"]
+    3 [comment="type=fp_add" label="FP_Add" color=brown shape=box style=filled type="fp_add"]
+    12 [comment="broadcast" shape=point style=invis type="broadcast"]
+    13 [comment="type=fp_mul,rb_const=0.2" label="FP_Mul * 0.2" color=brown shape=box style=filled type="fp_mul" rb_const="0.2"]
+    14 [comment="type=fp_max" label="FP_Max" color=brown shape=box style=filled type="fp_max"]
+    15 [comment="type=fp_mul,rb_const=1.44269504089" label="FP_Mul * 1.44269504089" color=brown shape=box style=filled type="fp_mul" rb_const="1.44269504089"]
+    16 [comment="type=broadcast" shape=point style=invis type="broadcast"]
+    17 [comment="type=fgetfint" label="Fgetfint" color=brown shape=box style=filled type="fgetfint"]
+    18 [comment="type=fgetffrac" label="Fgetffrac" color=brown shape=box style=filled type="fgetffrac"]
+    19 [comment="type=and,rb_const=255" label="And 0x00FF" color=brown shape=box style=filled type="and" rb_const="255"]
+    20 [comment="type=faddiexp" label="Faddiexp" color=brown shape=box style=filled type="faddiexp"]
+    21 [comment="type=arrayvals,tensor=exp" label="Array Vals: exp" color=green2 shape=box style=filled type="arrayvals" tensor="exp"]
+    0 [comment="type=fiberwrite,mode=vals,tensor=X,size=1*B0_dim*B1_dim,sink=true" label="FiberWrite Vals: X" color=green3 shape=box style=filled type="fiberwrite" tensor="X" mode="vals" size="1*B0_dim*B1_dim" sink="true"]
+    5 [comment="type=arrayvals,tensor=C" label="Array Vals: C" color=green2 shape=box style=filled type="arrayvals" tensor="C"]
+    8 [comment="type=fiberlookup,index=j,tensor=C,mode=1,format=compressed,src=true,root=false" label="FiberLookup j: C1\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="j" tensor="C" mode="1" format="compressed" src="true" root="false"]
+    11 [comment="type=fiberlookup,index=i,tensor=C,mode=0,format=compressed,src=true,root=true" label="FiberLookup i: C0\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="i" tensor="C" mode="0" format="compressed" src="true" root="true"]
+    10 -> 9 [label="crd_in-B" style=dashed type="crd" comment="in-B"]
+    9 -> 2 [label="crd" style=dashed type="crd"]
+    9 -> 7 [label="ref_out-B" style=bold type="ref" comment="out-B"]
+    7 -> 6 [label="crd_in-B" style=dashed type="crd" comment="in-B"]
+    6 -> 1 [label="crd" style=dashed type="crd"]
+    6 -> 4 [label="ref_out-B" style=bold type="ref" comment="out-B"]
+    4 -> 3 [label="val" type="val"]
+    3 -> 12 [label="val" type="val"]
+    12 -> 13 [label="val" type="val"]
+    12 -> 14 [label="val" type="val"]
+    13 -> 14 [label="val" type="val"]
+    14 -> 15 [label="val" type="val"]
+    15 -> 16 [label="val" type="val"]
+    16 -> 17 [label="val" type="val"]
+    16 -> 18 [label="val" type="val"]
+    18 -> 19 [label="val" type="val"]
+    19 -> 21 [label="ref" style=bold type="ref"]
+    21 -> 20 [label="val" type="val" comment="fp"]
+    17 -> 20 [label="val" type="val" comment="exp"]
+    20 -> 0 [label="val" type="val"]
+    6 -> 5 [label="ref_out-C" style=bold type="ref" comment="out-C"]
+    5 -> 3 [label="val" type="val"]
+    7 -> 6 [label="ref_in-B" style=bold type="ref" comment="in-B"]
+    9 -> 8 [label="ref_out-C" style=bold type="ref" comment="out-C"]
+    8 -> 6 [label="crd_in-C" style=dashed type="crd" comment="in-C"]
+    8 -> 6 [label="ref_in-C" style=bold type="ref" comment="in-C"]
+    10 -> 9 [label="ref_in-B" style=bold type="ref" comment="in-B"]
+    11 -> 9 [label="crd_in-C" style=dashed type="crd" comment="in-C"]
+    11 -> 9 [label="ref_in-C" style=bold type="ref" comment="in-C"]
+}
diff --git a/compiler/sam-outputs/onyx-dot/spmv.gv b/compiler/sam-outputs/onyx-dot/spmv.gv
new file mode 100644
index 00000000..54311ebe
--- /dev/null
+++ b/compiler/sam-outputs/onyx-dot/spmv.gv
@@ -0,0 +1,37 @@
+digraph SAM {
+    comment="x=s0,B=ss01,c=d0"
+    14 [comment="type=fiberlookup,index=i,tensor=B,mode=0,format=compressed,src=true,root=true" label="FiberLookup i: B0\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="i" tensor="B" mode="0" format="compressed" src="true" root="true"]
+    13 [comment="type=broadcast" shape=point style=invis type="broadcast"]
+    7 [comment="type=crddrop,outer=i,inner=j" label="CrdDrop i,j" color=orange shape=box style=filled type="crddrop" outer="i" inner="j"]
+    1 [comment="type=fiberwrite,index=i,tensor=x,mode=0,format=compressed,segsize=2,crdsize=B0_dim,sink=true" label="FiberWrite i: x0\ncompressed" color=green3 shape=box style=filled type="fiberwrite" index="i" tensor="x" mode="0" format="compressed" segsize="2" crdsize="B0_dim" sink="true"]
+    12 [comment="type=repsiggen,index=i" label="RepeatSignalGenerator i" color=cyan3 shape=box style=filled type="repsiggen" index="i"]
+    11 [comment="type=repeat,index=i,tensor=c,root=true" label="Repeat i: c" color=cyan2 shape=box style=filled type="repeat" index="i" tensor="c" root="true"]
+    10 [comment="type=fiberlookup,index=j,tensor=c,mode=0,format=dense,src=true,root=false" label="FiberLookup j: c0\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="j" tensor="c" mode="0" format="dense" src="true" root="false"]
+    8 [comment="type=intersect,index=j" label="intersect j" color=purple shape=box style=filled type="intersect" index="j"]
+    5 [comment="type=arrayvals,tensor=B" label="Array Vals: B" color=green2 shape=box style=filled type="arrayvals" tensor="B"]
+    4 [comment="type=mul" label="Mul" color=brown shape=box style=filled type="mul"]
+    3 [comment="type=reduce" label="Reduce" color=brown shape=box style=filled type="reduce"]
+    0 [comment="type=fiberwrite,mode=vals,tensor=x,size=1*B0_dim,sink=true" label="FiberWrite Vals: x" color=green3 shape=box style=filled type="fiberwrite" tensor="x" mode="vals" size="1*B0_dim" sink="true"]
+    6 [comment="type=arrayvals,tensor=c" label="Array Vals: c" color=green2 shape=box style=filled type="arrayvals" tensor="c"]
+    9 [comment="type=fiberlookup,index=j,tensor=B,mode=1,format=compressed,src=true,root=false" label="FiberLookup j: B1\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="j" tensor="B" mode="1" format="compressed" src="true" root="false"]
+    14 -> 13 [label="crd" style=dashed type="crd" comment=""]
+    13 -> 7 [label="crd_i" style=dashed type="crd" comment="i"]
+    7 -> 1 [label="crd_outer-i" style=dashed type="crd" comment="outer-i"]
+    13 -> 12 [label="crd" style=dashed type="crd" comment=""]
+    12 -> 11 [label="repsig" style=dotted type="repsig"]
+    11 -> 10 [label="ref" style=bold type="ref"]
+    10 -> 8 [label="crd_in-c" style=dashed type="crd" comment="in-c"]
+    8 -> 5 [label="ref_out-B" style=bold type="ref" comment="out-B"]
+    5 -> 4 [label="val" type="val"]
+    3 -> 0 [label="val" type="val"]
+    8 -> 6 [label="ref_out-c" style=bold type="ref" comment="out-c"]
+    6 -> 4 [label="val" type="val"]
+    10 -> 8 [label="ref_in-c" style=bold type="ref" comment="in-c"]
+    14 -> 9 [label="ref" style=bold type="ref" comment=""]
+    9 -> 8 [label="crd_in-B" style=dashed type="crd" comment="in-B"]
+    9 -> 8 [label="ref_in-B" style=bold type="ref" comment="in-B"]
+
+    8 -> 7 [label="crd_in-j" style=dashed type="crd" comment="in-j"]
+    4 -> 3 [label="val" type="val"]
+
+}
diff --git a/compiler/sam-outputs/onyx-dot/spmv_relu.gv b/compiler/sam-outputs/onyx-dot/spmv_relu.gv
new file mode 100644
index 00000000..55d2bfe1
--- /dev/null
+++ b/compiler/sam-outputs/onyx-dot/spmv_relu.gv
@@ -0,0 +1,47 @@
+digraph SAM {
+    comment="x=s0,B=ss01,c=d0"
+    14 [comment="type=fiberlookup,index=i,tensor=B,mode=0,format=compressed,src=true,root=true" label="FiberLookup i: B0\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="i" tensor="B" mode="0" format="compressed" src="true" root="true"]
+    13 [comment="type=broadcast" shape=point style=invis type="broadcast"]
+    12 [comment="type=repsiggen,index=i" label="RepeatSignalGenerator i" color=cyan3 shape=box style=filled type="repsiggen" index="i"]
+    11 [comment="type=repeat,index=i,tensor=c,root=true" label="Repeat i: c" color=cyan2 shape=box style=filled type="repeat" index="i" tensor="c" root="true"]
+    10 [comment="type=fiberlookup,index=j,tensor=c,mode=0,format=dense,src=true,root=false" label="FiberLookup j: c0\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="j" tensor="c" mode="0" format="dense" src="true" root="false"]
+    8 [comment="type=intersect,index=j" label="intersect j" color=purple shape=box style=filled type="intersect" index="j"]
+    5 [comment="type=arrayvals,tensor=B" label="Array Vals: B" color=green2 shape=box style=filled type="arrayvals" tensor="B"]
+    4 [comment="type=mul" label="Mul" color=brown shape=box style=filled type="mul"]
+    3 [comment="type=reduce" label="Reduce" color=brown shape=box style=filled type="reduce"]
+    6 [comment="type=arrayvals,tensor=c" label="Array Vals: c" color=green2 shape=box style=filled type="arrayvals" tensor="c"]
+    9 [comment="type=fiberlookup,index=j,tensor=B,mode=1,format=compressed,src=true,root=false" label="FiberLookup j: B1\ncompressed" color=green4 shape=box style=filled type="fiberlookup" index="j" tensor="B" mode="1" format="compressed" src="true" root="false"]
+    
+
+    20 [comment="type=max" label="Max 0" color=brown shape=box style=filled type="max"]
+    0 [comment="type=fiberwrite,mode=vals,tensor=x,size=1*B0_dim,sink=true" label="FiberWrite Vals: x" color=green3 shape=box style=filled type="fiberwrite" tensor="x" mode="vals" size="1*B0_dim" sink="true"]
+    21 [comment="type=crddrop,outer=i,inner=val,mode=0" label="CrdDrop Compression i, val" color=orange style=filled type="crddrop" outer="i" inner="val" mode="0"]
+    2 [comment="type=fiberwrite,index=i,tensor=x,mode=0,format=compressed,segsize=2,crdsize=B0_dim,sink=true" label="FiberWrite i: x0\ncompressed" color=green3 shape=box style=filled type="fiberwrite" index="i" tensor="x" mode="0" format="compressed" segsize="2" crdsize="B0_dim" sink="true"]
+
+
+
+    14 -> 13 [label="crd" style=dashed type="crd" comment=""]
+    13 -> 12 [label="crd" style=dashed type="crd" comment=""]
+    12 -> 11 [label="repsig" style=dotted type="repsig"]
+    11 -> 10 [label="ref" style=bold type="ref"]
+    10 -> 8 [label="crd_in-c" style=dashed type="crd" comment="in-c"]
+    8 -> 5 [label="ref_out-B" style=bold type="ref" comment="out-B"]
+    5 -> 4 [label="val" type="val"]
+    8 -> 6 [label="ref_out-c" style=bold type="ref" comment="out-c"]
+    6 -> 4 [label="val" type="val"]
+    10 -> 8 [label="ref_in-c" style=bold type="ref" comment="in-c"]
+    14 -> 9 [label="ref" style=bold type="ref" comment=""]
+    9 -> 8 [label="crd_in-B" style=dashed type="crd" comment="in-B"]
+    9 -> 8 [label="ref_in-B" style=bold type="ref" comment="in-B"]
+    
+
+    4 -> 3 [label="val" type="val"]
+
+
+    3 -> 20 [label="val" type="val" comment="val"]
+    20 -> 21 [label="val" type="val" comment="inner-val"]
+    13 -> 21 [label="crd_i" style=dashed type="crd" comment="i"]
+    21 -> 0 [label="val" type="val", comment="val"]
+    21 -> 2 [label="crd_outer-i" style=dashed type="crd" comment="outer-i"]
+
+}
diff --git a/requirements.txt b/requirements.txt
index 8671a953..5a64e46f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -27,4 +27,4 @@ scipy==1.10.0
 six==1.16.0
 sparse==0.13.0
 tomli==2.0.1
-tqdm==4.64.1
+tqdm==4.64.1
\ No newline at end of file
diff --git a/sam/onyx/fiber_tree.py b/sam/onyx/fiber_tree.py
index 9736f0d2..66525311 100644
--- a/sam/onyx/fiber_tree.py
+++ b/sam/onyx/fiber_tree.py
@@ -38,7 +38,6 @@ def get_root(self):
         return self.root_fiber
 
     def populate_fiber(self, fiber, sub_tensor):
-
         # Last level detection
         if len(sub_tensor.shape) == 1:
             # Finally have just a row, this is the base case...(could be a scalar)
diff --git a/sam/onyx/generate_matrices.py b/sam/onyx/generate_matrices.py
index 016d94a8..83c7443f 100644
--- a/sam/onyx/generate_matrices.py
+++ b/sam/onyx/generate_matrices.py
@@ -11,11 +11,12 @@
 import csv
 import os
 from sam.sim.test.test import *
+from lassen.utils import bfbin2float, float2bfbin
 
 
 class MatrixGenerator:
     def __init__(self, name='B', shape=None, sparsity=0.6, format='CSF', dump_dir=None,
-                 tensor=None, value_cap=None, clean=True) -> None:
+                 tensor=None, value_cap=None, clean=True, use_fp=False) -> None:
 
         # assert dimension is not None
         # self.dimension = dimension
@@ -24,6 +25,7 @@ def __init__(self, name='B', shape=None, sparsity=0.6, format='CSF', dump_dir=No
         self.sparsity = sparsity
         self.format = format
         self.name = name
+        self.use_fp = use_fp
         if value_cap is None:
             self.value_cap = int(math.pow(2, 8)) - 1
         else:
@@ -43,8 +45,16 @@ def __init__(self, name='B', shape=None, sparsity=0.6, format='CSF', dump_dir=No
             self.dump_dir = tempfile.gettempdir()
 
         if tensor is not None:
-            self.array = tensor
-            self.shape = self.array.shape
+            if not tensor.dtype == numpy.float32:
+                self.array = tensor
+                self.shape = self.array.shape
+            else:
+                self.array = tensor
+                for idx, x in numpy.ndenumerate(self.array):
+                    if x == 0.0:
+                        continue
+                    self.array[idx] = bfbin2float(float2bfbin(x))
+                self.shape = self.array.shape
         else:
             assert shape is not None
             self._create_matrix(value_cap=self.value_cap)
@@ -54,7 +64,18 @@ def _create_matrix(self, value_cap=int(math.pow(2, 8)) - 1):
         '''
         Routine to create the actual matrix from the dimension/shape
         '''
-        self.array = numpy.random.randint(low=-1 * value_cap / 2, high=value_cap / 2, size=self.shape)
+        self.array = numpy.random.uniform(low=-1 * value_cap / 2, high=value_cap / 2, size=self.shape)
+        # convert to float32 for ease of conversion to bfloat16
+        self.array = self.array.astype(numpy.float32)
+        if not self.use_fp:
+            self.array = self.array.astype(int)
+        else:
+            # convert to bfloat16 by truncating the trailing fraction bits
+            # converting it to floating point number
+            for idx, x in numpy.ndenumerate(self.array):
+                bfval = bfbin2float(float2bfbin(x))
+                self.array[idx] = bfval
+            assert self.array.dtype == numpy.float32
         for idx, x in numpy.ndenumerate(self.array):
             if random.random() < self.sparsity:
                 self.array[idx] = 0
@@ -102,19 +123,19 @@ def dump_outputs(self, format=None, tpose=False, dump_shape=True,
                     if glb_override:
                         lines = [len(fake_lines_seg), *fake_lines_seg, len(fake_lines_crd), *fake_lines_crd]
                         self.write_array(lines, name=f"tensor_{self.name}_mode_{mode}{suffix}", dump_dir=use_dir,
-                                         hex=print_hex)
+                                         dump_hex=print_hex)
                     else:
                         self.write_array(fake_lines_seg, name=f"tensor_{self.name}_mode_{mode}_seg{suffix}",
-                                         dump_dir=use_dir, hex=print_hex)
+                                         dump_dir=use_dir, dump_hex=print_hex)
                         self.write_array(fake_lines_crd, name=f"tensor_{self.name}_mode_{mode}_crd{suffix}",
-                                         dump_dir=use_dir, hex=print_hex)
+                                         dump_dir=use_dir, dump_hex=print_hex)
                 if glb_override:
                     lines = [len(fake_lines_val), *fake_lines_val]
                     self.write_array(fake_lines_val, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir,
-                                     hex=print_hex)
+                                     dump_hex=print_hex)
                 else:
                     self.write_array(fake_lines_val, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir,
-                                     hex=print_hex)
+                                     dump_hex=print_hex)
 
                 return
 
@@ -124,12 +145,12 @@ def dump_outputs(self, format=None, tpose=False, dump_shape=True,
             seg_arr, coord_arr = self._dump_csf(tmp_lvl_list)
             if glb_override:
                 lines = [len(seg_arr), *seg_arr, len(coord_arr), *coord_arr]
-                self.write_array(lines, name=f"tensor_{self.name}_mode_0{suffix}", dump_dir=use_dir, hex=print_hex)
+                self.write_array(lines, name=f"tensor_{self.name}_mode_0{suffix}", dump_dir=use_dir, dump_hex=print_hex)
             else:
                 self.write_array(seg_arr, name=f"tensor_{self.name}_mode_0_seg{suffix}", dump_dir=use_dir,
-                                 hex=print_hex)
+                                 dump_hex=print_hex)
                 self.write_array(coord_arr, name=f"tensor_{self.name}_mode_0_crd{suffix}", dump_dir=use_dir,
-                                 hex=print_hex)
+                                 dump_hex=print_hex)
 
             at_vals = False
             i = 1
@@ -152,21 +173,21 @@ def dump_outputs(self, format=None, tpose=False, dump_shape=True,
                         lines = [len(tmp_lvl_list), *tmp_lvl_list]
                         # self.write_array(tmp_lvl_list, name=f"tensor_{self.name}_mode_vals" dump_dir=use_dir)
                         self.write_array(lines, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir,
-                                         hex=print_hex)
+                                         dump_hex=print_hex, is_val=True)
                     else:
                         self.write_array(tmp_lvl_list, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir,
-                                         hex=print_hex)
+                                         dump_hex=print_hex, is_val=True)
                 else:
                     seg_arr, coord_arr = self._dump_csf(tmp_lvl_list)
                     if glb_override:
                         lines = [len(seg_arr), *seg_arr, len(coord_arr), *coord_arr]
                         self.write_array(lines, name=f"tensor_{self.name}_mode_{i}{suffix}", dump_dir=use_dir,
-                                         hex=print_hex)
+                                         dump_hex=print_hex)
                     else:
                         self.write_array(seg_arr, name=f"tensor_{self.name}_mode_{i}_seg{suffix}", dump_dir=use_dir,
-                                         hex=print_hex)
+                                         dump_hex=print_hex)
                         self.write_array(coord_arr, name=f"tensor_{self.name}_mode_{i}_crd{suffix}", dump_dir=use_dir,
-                                         hex=print_hex)
+                                         dump_hex=print_hex)
                 i = i + 1
         elif self.format == "UNC":
             flat_array = []
@@ -174,10 +195,10 @@ def dump_outputs(self, format=None, tpose=False, dump_shape=True,
                 flat_array.append(val)
             if glb_override:
                 lines = [len(flat_array), *flat_array]
-                self.write_array(lines, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir, hex=print_hex)
+                self.write_array(lines, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir, dump_hex=print_hex)
             else:
                 self.write_array(flat_array, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir,
-                                 hex=print_hex)
+                                 dump_hex=print_hex)
         elif self.format == "COO":
             crd_dict = dict()
             order = len(self.array.shape)
@@ -195,24 +216,24 @@ def dump_outputs(self, format=None, tpose=False, dump_shape=True,
                     if glb_override:
                         lines = [len(crd_dict[key]), *crd_dict[key]]
                         self.write_array(lines, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir,
-                                         hex=print_hex)
+                                         dump_hex=print_hex)
                     else:
                         self.write_array(crd_dict[key], name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir,
-                                         hex=print_hex)
+                                         dump_hex=print_hex)
                 else:
                     if glb_override:
                         lines = [len(crd_dict[key]), *crd_dict[key]]
                         self.write_array(lines, name=f"tensor_{self.name}_mode_{key}_crd{suffix}", dump_dir=use_dir,
-                                         hex=print_hex)
+                                         dump_hex=print_hex)
                     else:
                         self.write_array(crd_dict[key],
                                          name=f"tensor_{self.name}_mode_{key}_crd{suffix}",
                                          dump_dir=use_dir,
-                                         hex=print_hex)
+                                         dump_hex=print_hex)
 
         if dump_shape:
             self.write_array(self.array.shape, name=f"tensor_{self.name}_mode_shape{suffix}", dump_dir=use_dir,
-                             hex=print_hex)
+                             dump_hex=print_hex)
 
         # Transpose it back
         if tpose is True:
@@ -241,7 +262,7 @@ def _dump_csf(self, level_list):
 
         return seg_arr, coord_arr
 
-    def write_array(self, str_list, name, dump_dir=None, hex=False):
+    def write_array(self, str_list, name, dump_dir=None, dump_hex=False, is_val=False):
         """Write an array/list to a file
 
         Args:
@@ -254,11 +275,18 @@ def write_array(self, str_list, name, dump_dir=None, hex=False):
         full_path = dump_dir + "/" + name
         with open(full_path, "w+") as wr_file:
             for item in str_list:
-                item_int = int(item)
-                if hex:
-                    wr_file.write(f"{item_int:04X}\n")
+                data = item
+                if not is_val:
+                    data = int(item)
+                if dump_hex:
+                    if not isinstance(data, numpy.float32):
+                        wr_file.write(f"{data:04X}\n")
+                    else:
+                        # converting result to bf16 hexidecimal representation
+                        data = hex(int(float2bfbin(data), 2))[2:].zfill(4)
+                        wr_file.write(f"{data}\n")
                 else:
-                    wr_file.write(f"{item_int}\n")
+                    wr_file.write(f"{data}\n")
 
     def get_shape(self):
         return self.shape
@@ -427,7 +455,7 @@ def run_statistics(name, seed, shape, dump_dir, sparsity):
     return (avg1, avg2)
 
 
-def create_matrix_from_point_list(name, pt_list, shape) -> MatrixGenerator:
+def create_matrix_from_point_list(name, pt_list, shape, use_fp=False) -> MatrixGenerator:
     mat_base = numpy.zeros(shape)
     dims = len(shape)
     for pt_idx in range(len(pt_list[0])):
@@ -436,6 +464,19 @@ def create_matrix_from_point_list(name, pt_list, shape) -> MatrixGenerator:
             pt_base.append(pt_list[i][pt_idx])
         mat_base[tuple(pt_base)] = pt_list[dims][pt_idx]
 
+    # Convert the input matrix to MatrixGenerator according to specified use_fp
+    if use_fp:
+        mat_base = mat_base.astype(numpy.float32)
+        for idx, x in numpy.ndenumerate(mat_base):
+            if x == 0.0:
+                # don't need to truncate if it is already a zero
+                continue
+            # Convert the input from int to bfloat16
+            tmp_x = bin(int(x))[2:].zfill(16)
+            mat_base[idx] = bfbin2float(tmp_x)
+    else:
+        mat_base = mat_base.astype(numpy.uint16, casting='unsafe')
+
     mg = MatrixGenerator(name=f"{name}", shape=shape, sparsity=0.7, format='CSF', dump_dir=None, tensor=mat_base)
     return mg
 
@@ -480,7 +521,7 @@ def convert_aha_glb_output_file(glbfile, output_dir, tiles):
 
 def get_tensor_from_files(name, files_dir, shape, base=10,
                           format='CSF', early_terminate=None, tensor_ordering=None,
-                          suffix="", positive_only=True) -> MatrixGenerator:
+                          suffix="", positive_only=True, use_fp=False) -> MatrixGenerator:
     all_files = os.listdir(files_dir)
     dims = len(shape)
 
@@ -522,7 +563,7 @@ def get_tensor_from_files(name, files_dir, shape, base=10,
             segs.append(seg_t_)
             # Empty matrix...
             if len(seg_t_) == 2 and seg_t_[0] == 0 and seg_t_[1] == 0:
-                mg = MatrixGenerator(name=name, shape=shape, sparsity=1.0)
+                mg = MatrixGenerator(name=name, shape=shape, sparsity=1.0, use_fp=use_fp)
                 created_empty = True
                 break
             crd_t_ = read_inputs(f"{files_dir}/{crd_f}", intype=int, base=base, early_terminate=early_terminate,
@@ -530,7 +571,7 @@ def get_tensor_from_files(name, files_dir, shape, base=10,
             crds.append(crd_t_)
         if not created_empty:
             pt_list = get_point_list(crds, segs, val_arr=vals)
-            mg = create_matrix_from_point_list(name, pt_list, shape_reordered)
+            mg = create_matrix_from_point_list(name, pt_list, shape_reordered, use_fp=use_fp)
     elif format == 'COO':
         crds = []
         for mode in range(dims):
diff --git a/sam/onyx/hw_nodes/compute_node.py b/sam/onyx/hw_nodes/compute_node.py
index 96e492bb..9c6af1e3 100644
--- a/sam/onyx/hw_nodes/compute_node.py
+++ b/sam/onyx/hw_nodes/compute_node.py
@@ -1,4 +1,5 @@
 from sam.onyx.hw_nodes.hw_node import *
+from lassen.utils import float2bfbin
 
 
 class ComputeNode(HWNode):
@@ -8,7 +9,6 @@ def __init__(self, name=None, op=None) -> None:
         self.num_outputs = 1
         self.num_inputs_connected = 0
         self.num_outputs_connected = 0
-
         self.op = op
 
     def connect(self, other, edge, kwargs=None):
@@ -119,10 +119,16 @@ def connect(self, other, edge, kwargs=None):
             other_conn = other.get_num_inputs()
             pe = self.get_name()
             # TODO: remove hack eventually
-            if 'Max' in other.op:
+            if 'Max 0' in other.op:
                 other_conn = 1
-            else:
-                other_conn = other.get_num_inputs()
+            elif 'Faddiexp' in other.op:
+                comment = edge.get_attributes()["comment"].strip('"')
+                if 'fp' in comment:
+                    other_conn = 0
+                elif 'exp' in comment:
+                    other_conn = 1
+                else:
+                    assert 0 & "edge connected to faddiexp has to have comment specified to either 'exp' or 'fp'"
             new_conns = {
                 f'pe_to_pe_{other_conn}': [
                     ([(pe, "res"), (other_pe, f"data{other_conn}")], 17),
@@ -176,10 +182,38 @@ def configure(self, attributes):
             op_code = 2
         elif c_op == 'max':
             op_code = 4
+        elif c_op == 'and':
+            op_code = 5
+        elif c_op == 'fp_mul':
+            op_code = 6
+        elif c_op == 'fgetfint':
+            op_code = 7
+        elif c_op == 'fgetffrac':
+            op_code = 8
+        elif c_op == 'faddiexp':
+            op_code = 9
+        elif c_op == 'fp_max':
+            op_code = 10
+        elif c_op == 'fp_add':
+            op_code = 11
+
+        rb_const = None
+        if "rb_const" in attributes:
+            # the b operand of the op is a constant
+            rb_const = attributes["rb_const"].strip('"')
+            if "." in rb_const:
+                # constant is a floating point
+                rb_const = float(rb_const)
+                rb_const = int(float2bfbin(rb_const), 2)
+            else:
+                # it is a int
+                rb_const = int(rb_const)
+
         cfg_kwargs = {
             'op': op_code,
             'use_dense': use_dense,
             'pe_only': pe_only,
-            'pe_in_external': pe_in_external
+            'pe_in_external': pe_in_external,
+            'rb_const': rb_const
         }
-        return (op_code, use_dense, pe_only, pe_in_external), cfg_kwargs
+        return (op_code, use_dense, pe_only, pe_in_external, rb_const), cfg_kwargs
diff --git a/sam/onyx/hw_nodes/read_scanner_node.py b/sam/onyx/hw_nodes/read_scanner_node.py
index 9c1d0d54..02acf89b 100644
--- a/sam/onyx/hw_nodes/read_scanner_node.py
+++ b/sam/onyx/hw_nodes/read_scanner_node.py
@@ -204,7 +204,15 @@ def connect(self, other, edge, kwargs=None):
             # Can use dynamic information to assign inputs to compute nodes
             # since add/mul are commutative
             compute_conn = other.get_num_inputs()
-
+            # TODO: get rid of this hack
+            if 'Faddiexp' in other.op:
+                comment = edge.get_attributes()["comment"].strip('"')
+                if 'fp' in comment:
+                    compute_conn = 0
+                elif 'exp' in comment:
+                    compute_conn = 1
+                else:
+                    assert 0 & "edge connected to faddiexp has to have comment specified to either 'exp' or 'fp'"
             new_conns = {
                 f'rd_scan_to_compute_{compute_conn}': [
                     ([(rd_scan, "coord_out"), (compute, f"data{compute_conn}")], 17),
diff --git a/sam/onyx/hw_nodes/reduce_node.py b/sam/onyx/hw_nodes/reduce_node.py
index 2c904309..b19cf5e2 100644
--- a/sam/onyx/hw_nodes/reduce_node.py
+++ b/sam/onyx/hw_nodes/reduce_node.py
@@ -68,7 +68,7 @@ def connect(self, other, edge, kwargs=None):
             raise NotImplementedError(f'Cannot connect ReduceNode to {other_type}')
         elif other_type == ComputeNode:
             pe = other.get_name()
-            if 'Max' in other.op:
+            if 'Max 0' in other.op:
                 other_conn = 1
             else:
                 other_conn = other.get_num_inputs()
diff --git a/sam/onyx/parse_dot.py b/sam/onyx/parse_dot.py
index a952b423..fa18a557 100644
--- a/sam/onyx/parse_dot.py
+++ b/sam/onyx/parse_dot.py
@@ -99,13 +99,15 @@ def map_nodes(self):
                     hw_nt = f"HWNodeType.RepSigGen"
                 elif n_type == "repeat":
                     hw_nt = f"HWNodeType.Repeat"
-                elif n_type == "mul" or n_type == "add" or n_type == "max":
+                elif n_type == "mul" or n_type == "add" or n_type == "max" or n_type == "and":
+                    hw_nt = f"HWNodeType.Compute"
+                elif n_type == "fgetfint" or n_type == "fgetffrac" or n_type == "faddiexp":
+                    hw_nt = f"HWNodeType.Compute"
+                elif n_type == "fp_mul" or n_type == "fp_max" or n_type == "fp_add":
                     hw_nt = f"HWNodeType.Compute"
                 elif n_type == "reduce":
                     hw_nt = f"HWNodeType.Reduce"
                 elif n_type == "intersect" or n_type == "union":
-                    if n_type == "union":
-                        print("UNION BLOCK")
                     hw_nt = f"HWNodeType.Intersect"
                 elif n_type == "crddrop":
                     hw_nt = f"HWNodeType.Merge"
@@ -1016,7 +1018,7 @@ def duplicate_graph(self, tensor, factor, output='x'):
     def annotate_IO_nodes(self):
         original_nodes = self.graph.get_nodes()
         output_nodes = ['x', 'X']
-        input_nodes = ['c', 'C', 'b', 'B', 'd', 'D', 'e', 'E', 'f', 'F']
+        input_nodes = ['c', 'C', 'b', 'B', 'd', 'D', 'e', 'E', 'f', 'F', 'exp']
         exclude_nodes = ['b', 'B']
         for node in original_nodes:
             node_attrs = node.get_attributes()