From 0673d9ff7d4055e9ffdb6ab8aaffae7be1a731c1 Mon Sep 17 00:00:00 2001 From: Bo Wun Cheng Date: Thu, 13 Jun 2024 17:38:19 -0700 Subject: [PATCH 1/4] added formatting code for sparse ml --- sam/util.py | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/sam/util.py b/sam/util.py index b6d88edb..a229b62e 100644 --- a/sam/util.py +++ b/sam/util.py @@ -11,6 +11,7 @@ import scipy.io import scipy.sparse import sparse +import struct # All environment variables for SAM should live here or in make file cwd = os.getcwd() @@ -19,6 +20,8 @@ SUITESPARSE_PATH = os.getenv('SUITESPARSE_PATH', default=os.path.join(SAM_HOME, "data", "suitesparse")) SUITESPARSE_FORMATTED_PATH = os.getenv('SUITESPARSE_FORMATTED_PATH', default=os.path.join(SAM_HOME, "data", "suitesparse-formatted")) +SPARSEML_PATH = os.getenv('SPARSE_ML_PATH', default=os.path.join(SAM_HOME, "data", "sparseml")) +SPARSEML_PATH_FORMATTED = os.getenv('SPARSE_ML_PATH_FORMATTED', default=os.path.join(SAM_HOME, "data", "sparseml-formatted")) FROSTT_PATH = os.getenv('FROSTT_PATH', default=os.path.join(SAM_HOME, "data", "frostt")) VALIDATION_OUTPUT_PATH = os.getenv('VALIDATION_OUTPUT_PATH', default=os.path.join(SAM_HOME, "data", "gold")) @@ -35,6 +38,67 @@ def safeCastScipyTensorToInts(tensor): data[i] = round_sparse(tensor.data[i]) return scipy.sparse.coo_matrix(tensor.coords, data, tensor.shape) +def bfbin2float(bfstr): + sign = bfstr[0] + exp = bfstr[1:9] + lfrac = bfstr[9:16] + if sign == "0" and exp == "11111111" and lfrac != "0000000": + return float('nan') + elif sign == "1" and exp == "11111111" and lfrac != "0000000": + return -float('nan') + elif sign == "0" and exp == "11111111" and lfrac == "0000000": + return float('inf') + elif sign == "1" and exp == "11111111" and lfrac == "0000000": + return -float('inf') + elif sign == "0" and exp == "00000000" and lfrac == "0000000": + return float(0) + elif sign == "1" and exp == "00000000" and lfrac == "0000000": + return -float(0) + else: + mult = 1 + if sign == "1": + mult = -1 + nexp = int(exp, 2) - 127 + if exp != 0: + lfrac = "1" + lfrac + else: + lfrac = "0" + lfrac + nfrac = int(lfrac, 2) + return mult * nfrac * (2 ** (nexp - 7)) + + +def float2bfbin(fnum): + if fnum == "NaN": + sign = "0" + exp = "11111111" + lfrac = "11111111" + elif fnum == "-NaN": + sign = "1" + exp = "11111111" + lfrac = "11111111" + elif fnum == "Inf" or fnum > 3.402823466e+38: + sign = "0" + exp = "11111111" + lfrac = "00000000" + elif fnum == "-Inf" or fnum < -3.402823466e+38: + sign = "1" + exp = "11111111" + lfrac = "00000000" + else: + fstr = "".join("{:08b}".format(elem) for elem in struct.pack("!f", fnum)) + sign = fstr[0] + exp = fstr[1:9] + lfrac = "0" + fstr[9:16] + hfrac = fstr[16:] + # Enable rounding + if (hfrac[0] == "1" and (hfrac[1] == "1" or hfrac[2] == "1")) or (lfrac[7] == "1" and hfrac[0] == "1"): + # bit 8 of the float mantissa is set, so round up + if lfrac[1:8] == "1111111": # roll over mantissa and increase exp if needed + exp = "{:08b}".format((int(exp, 2) + 1)) # exp overflow? + lfrac = "{:08b}".format((int(lfrac, 2) + 1)) + + return sign + exp + lfrac[1:8] + # ScipyTensorShifter shifts all elements in the last mode # of the input scipy/sparse tensor by one. @@ -242,6 +306,20 @@ def load(self, path): return coo +class NumpyNPYArrayLoader: + def __init__(self): + pass + + def load(self, path): + np_array = numpy.load(path) + if (np_array.dtype == numpy.dtype('S16')): + input_fp_array = numpy.empty_like(np_array, dtype=numpy.float32) + for idx, val in numpy.ndenumerate(np_array): + input_fp_array[idx] = bfbin2float(str(val).split("'")[1]) + coo = scipy.sparse.coo_array(input_fp_array) + return coo + + def shape_str(shape): return str(shape[0]) + " " + str(shape[1]) @@ -282,6 +360,23 @@ def load(self, tensor, cast): self.tensor = self.lastLoaded return self.tensor +class InputCacheSparseML: + def __init__(self): + self.lastLoaded = None + self.lastName = None + self.tensor = None + + def load(self, tensor, cast): + if self.lastName == str(tensor): + return self.tensor + else: + self.lastLoaded = tensor.load(NumpyNPYArrayLoader()) + self.lastName = str(tensor) + if cast: + self.tensor = self.lastLoaded + else: + self.tensor = self.lastLoaded + return self.tensor class FormatWriter: def __init__(self, cast_int=True): @@ -598,6 +693,18 @@ def __str__(self): def load(self, loader): return loader.load(self.path) + +class SparseMLTensor: + def __init__(self, path): + self.path = path + self.__name__ = self.__str__() + + def __str__(self): + f = os.path.split(self.path)[1] + return f.replace(".npy", "") + + def load(self, loader): + return loader.load(self.path) # TensorCollectionSuiteSparse represents the set of all downloaded From 1b52bcf3a7458f1db448edc19a56c8fa41006e6c Mon Sep 17 00:00:00 2001 From: Bo Wun Cheng Date: Thu, 13 Jun 2024 17:47:29 -0700 Subject: [PATCH 2/4] fix code style --- sam/util.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sam/util.py b/sam/util.py index a229b62e..d2f41cf1 100644 --- a/sam/util.py +++ b/sam/util.py @@ -38,6 +38,7 @@ def safeCastScipyTensorToInts(tensor): data[i] = round_sparse(tensor.data[i]) return scipy.sparse.coo_matrix(tensor.coords, data, tensor.shape) + def bfbin2float(bfstr): sign = bfstr[0] exp = bfstr[1:9] @@ -91,7 +92,7 @@ def float2bfbin(fnum): lfrac = "0" + fstr[9:16] hfrac = fstr[16:] # Enable rounding - if (hfrac[0] == "1" and (hfrac[1] == "1" or hfrac[2] == "1")) or (lfrac[7] == "1" and hfrac[0] == "1"): + if (hfrac[0] == "1" and (hfrac[1] == "1" or hfrac[2] == "1")) or (lfrac[7] == "1" and hfrac[0] == "1"): # bit 8 of the float mantissa is set, so round up if lfrac[1:8] == "1111111": # roll over mantissa and increase exp if needed exp = "{:08b}".format((int(exp, 2) + 1)) # exp overflow? @@ -360,6 +361,7 @@ def load(self, tensor, cast): self.tensor = self.lastLoaded return self.tensor + class InputCacheSparseML: def __init__(self): self.lastLoaded = None @@ -378,6 +380,7 @@ def load(self, tensor, cast): self.tensor = self.lastLoaded return self.tensor + class FormatWriter: def __init__(self, cast_int=True): self.cast = cast_int @@ -693,7 +696,8 @@ def __str__(self): def load(self, loader): return loader.load(self.path) - + + class SparseMLTensor: def __init__(self, path): self.path = path From d830b6f93e402f529004530917b0f734de0bb9d8 Mon Sep 17 00:00:00 2001 From: Bo Wun Cheng Date: Sat, 15 Jun 2024 11:11:39 -0700 Subject: [PATCH 3/4] added todo msg for bfbin2float and float2bfbin functions --- sam/util.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sam/util.py b/sam/util.py index d2f41cf1..fb546445 100644 --- a/sam/util.py +++ b/sam/util.py @@ -38,7 +38,8 @@ def safeCastScipyTensorToInts(tensor): data[i] = round_sparse(tensor.data[i]) return scipy.sparse.coo_matrix(tensor.coords, data, tensor.shape) - +# TODO: this function is duplicated multiple times across aha repository +# and should be moved to a common location def bfbin2float(bfstr): sign = bfstr[0] exp = bfstr[1:9] @@ -67,7 +68,8 @@ def bfbin2float(bfstr): nfrac = int(lfrac, 2) return mult * nfrac * (2 ** (nexp - 7)) - +# TODO: this function is duplicated multiple times across aha repository +# and should be moved to a common location def float2bfbin(fnum): if fnum == "NaN": sign = "0" From a2da9893a83c51a9576c6de40ed993e932a428b4 Mon Sep 17 00:00:00 2001 From: Bo Wun Cheng Date: Sat, 15 Jun 2024 11:25:53 -0700 Subject: [PATCH 4/4] fix code style --- sam/util.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sam/util.py b/sam/util.py index fb546445..f349d996 100644 --- a/sam/util.py +++ b/sam/util.py @@ -38,6 +38,7 @@ def safeCastScipyTensorToInts(tensor): data[i] = round_sparse(tensor.data[i]) return scipy.sparse.coo_matrix(tensor.coords, data, tensor.shape) + # TODO: this function is duplicated multiple times across aha repository # and should be moved to a common location def bfbin2float(bfstr): @@ -68,6 +69,7 @@ def bfbin2float(bfstr): nfrac = int(lfrac, 2) return mult * nfrac * (2 ** (nexp - 7)) + # TODO: this function is duplicated multiple times across aha repository # and should be moved to a common location def float2bfbin(fnum):