Skip to content

Commit

Permalink
added support for genearting and dumping fp16 matrices
Browse files Browse the repository at this point in the history
  • Loading branch information
bobcheng15 committed Nov 9, 2023
1 parent 0dafd00 commit 6a0b322
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 30 deletions.
1 change: 0 additions & 1 deletion sam/onyx/fiber_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def get_root(self):
return self.root_fiber

def populate_fiber(self, fiber, sub_tensor):

# Last level detection
if len(sub_tensor.shape) == 1:
# Finally have just a row, this is the base case...(could be a scalar)
Expand Down
79 changes: 50 additions & 29 deletions sam/onyx/generate_matrices.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import csv
import os
from sam.sim.test.test import *
from lassen.utils import bfbin2float, float2bfbin


class MatrixGenerator:
Expand Down Expand Up @@ -44,8 +45,14 @@ def __init__(self, name='B', shape=None, sparsity=0.6, format='CSF', dump_dir=No
self.dump_dir = tempfile.gettempdir()

if tensor is not None:
self.array = tensor
self.shape = self.array.shape
if not tensor.dtype == numpy.float32:
self.array = tensor
self.shape = self.array.shape
else:
self.array = tensor
for idx, x in numpy.ndenumerate(self.array):
self.array[idx] = bfbin2float(float2bfbin(x))
self.shape = self.array.shape
else:
assert shape is not None
self._create_matrix(value_cap=self.value_cap)
Expand All @@ -56,10 +63,17 @@ def _create_matrix(self, value_cap=int(math.pow(2, 8)) - 1):
Routine to create the actual matrix from the dimension/shape
'''
self.array = numpy.random.uniform(low=-1 * value_cap / 2, high=value_cap / 2, size=self.shape)
# convert to float32 for ease of conversion to bfloat16
self.array = self.array.astype(numpy.float32)
if not self.use_fp:
self.array = self.array.astype(int)
print(self.array.dtype)
breakpoint()
else:
# convert to bfloat16 by truncating the trailing fraction bits
# converting it to floating point number
for idx, x in numpy.ndenumerate(self.array):
bfval = bfbin2float(float2bfbin(x))
self.array[idx] = bfval
assert self.array.dtype == numpy.float32
for idx, x in numpy.ndenumerate(self.array):
if random.random() < self.sparsity:
self.array[idx] = 0
Expand Down Expand Up @@ -107,19 +121,19 @@ def dump_outputs(self, format=None, tpose=False, dump_shape=True,
if glb_override:
lines = [len(fake_lines_seg), *fake_lines_seg, len(fake_lines_crd), *fake_lines_crd]
self.write_array(lines, name=f"tensor_{self.name}_mode_{mode}{suffix}", dump_dir=use_dir,
hex=print_hex)
dump_hex=print_hex)
else:
self.write_array(fake_lines_seg, name=f"tensor_{self.name}_mode_{mode}_seg{suffix}",
dump_dir=use_dir, hex=print_hex)
dump_dir=use_dir, dump_hex=print_hex)
self.write_array(fake_lines_crd, name=f"tensor_{self.name}_mode_{mode}_crd{suffix}",
dump_dir=use_dir, hex=print_hex)
dump_dir=use_dir, dump_hex=print_hex)
if glb_override:
lines = [len(fake_lines_val), *fake_lines_val]
self.write_array(fake_lines_val, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir,
hex=print_hex)
dump_hex=print_hex)
else:
self.write_array(fake_lines_val, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir,
hex=print_hex)
dump_hex=print_hex)

return

Expand All @@ -129,12 +143,12 @@ def dump_outputs(self, format=None, tpose=False, dump_shape=True,
seg_arr, coord_arr = self._dump_csf(tmp_lvl_list)
if glb_override:
lines = [len(seg_arr), *seg_arr, len(coord_arr), *coord_arr]
self.write_array(lines, name=f"tensor_{self.name}_mode_0{suffix}", dump_dir=use_dir, hex=print_hex)
self.write_array(lines, name=f"tensor_{self.name}_mode_0{suffix}", dump_dir=use_dir, dump_hex=print_hex)
else:
self.write_array(seg_arr, name=f"tensor_{self.name}_mode_0_seg{suffix}", dump_dir=use_dir,
hex=print_hex)
dump_hex=print_hex)
self.write_array(coord_arr, name=f"tensor_{self.name}_mode_0_crd{suffix}", dump_dir=use_dir,
hex=print_hex)
dump_hex=print_hex)

at_vals = False
i = 1
Expand All @@ -157,32 +171,32 @@ def dump_outputs(self, format=None, tpose=False, dump_shape=True,
lines = [len(tmp_lvl_list), *tmp_lvl_list]
# self.write_array(tmp_lvl_list, name=f"tensor_{self.name}_mode_vals" dump_dir=use_dir)
self.write_array(lines, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir,
hex=print_hex)
dump_hex=print_hex, is_val=True)
else:
self.write_array(tmp_lvl_list, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir,
hex=print_hex)
dump_hex=print_hex, is_val=True)
else:
seg_arr, coord_arr = self._dump_csf(tmp_lvl_list)
if glb_override:
lines = [len(seg_arr), *seg_arr, len(coord_arr), *coord_arr]
self.write_array(lines, name=f"tensor_{self.name}_mode_{i}{suffix}", dump_dir=use_dir,
hex=print_hex)
dump_hex=print_hex)
else:
self.write_array(seg_arr, name=f"tensor_{self.name}_mode_{i}_seg{suffix}", dump_dir=use_dir,
hex=print_hex)
dump_hex=print_hex)
self.write_array(coord_arr, name=f"tensor_{self.name}_mode_{i}_crd{suffix}", dump_dir=use_dir,
hex=print_hex)
dump_hex=print_hex)
i = i + 1
elif self.format == "UNC":
flat_array = []
for val in numpy.nditer(self.array):
flat_array.append(val)
if glb_override:
lines = [len(flat_array), *flat_array]
self.write_array(lines, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir, hex=print_hex)
self.write_array(lines, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir, dump_hex=print_hex)
else:
self.write_array(flat_array, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir,
hex=print_hex)
dump_hex=print_hex)
elif self.format == "COO":
crd_dict = dict()
order = len(self.array.shape)
Expand All @@ -200,24 +214,24 @@ def dump_outputs(self, format=None, tpose=False, dump_shape=True,
if glb_override:
lines = [len(crd_dict[key]), *crd_dict[key]]
self.write_array(lines, name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir,
hex=print_hex)
dump_hex=print_hex)
else:
self.write_array(crd_dict[key], name=f"tensor_{self.name}_mode_vals{suffix}", dump_dir=use_dir,
hex=print_hex)
dump_hex=print_hex)
else:
if glb_override:
lines = [len(crd_dict[key]), *crd_dict[key]]
self.write_array(lines, name=f"tensor_{self.name}_mode_{key}_crd{suffix}", dump_dir=use_dir,
hex=print_hex)
dump_hex=print_hex)
else:
self.write_array(crd_dict[key],
name=f"tensor_{self.name}_mode_{key}_crd{suffix}",
dump_dir=use_dir,
hex=print_hex)
dump_hex=print_hex)

if dump_shape:
self.write_array(self.array.shape, name=f"tensor_{self.name}_mode_shape{suffix}", dump_dir=use_dir,
hex=print_hex)
dump_hex=print_hex)

# Transpose it back
if tpose is True:
Expand Down Expand Up @@ -246,7 +260,7 @@ def _dump_csf(self, level_list):

return seg_arr, coord_arr

def write_array(self, str_list, name, dump_dir=None, hex=False):
def write_array(self, str_list, name, dump_dir=None, dump_hex=False, is_val=False):
"""Write an array/list to a file
Args:
Expand All @@ -259,11 +273,18 @@ def write_array(self, str_list, name, dump_dir=None, hex=False):
full_path = dump_dir + "/" + name
with open(full_path, "w+") as wr_file:
for item in str_list:
item_int = int(item)
if hex:
wr_file.write(f"{item_int:04X}\n")
data = item
if not is_val:
data = int(item)
if dump_hex:
if not type(data) == numpy.float32:
wr_file.write(f"{data:04X}\n")
else:
# converting result to bf16 hexidecimal representation
data = hex(int(float2bfbin(data), 2))[2:].zfill(4)
wr_file.write(f"{data}\n")
else:
wr_file.write(f"{item_int}\n")
wr_file.write(f"{data}\n")

def get_shape(self):
return self.shape
Expand Down

0 comments on commit 6a0b322

Please sign in to comment.