diff --git a/heterocl/ast/ir_builder.py b/heterocl/ast/ir_builder.py index 2abbca30..fae0d970 100644 --- a/heterocl/ast/ir_builder.py +++ b/heterocl/ast/ir_builder.py @@ -7,8 +7,6 @@ # Import MLIR dialects # Naming rule: import dialect as dialect_d -import numpy as np - from hcl_mlir.dialects import ( func as func_d, hcl as hcl_d, @@ -52,7 +50,7 @@ from . import ast from ..context import get_context, get_location -from ..utils import hcl_dtype_to_mlir, get_extra_type_hints +from ..utils import hcl_dtype_to_mlir, get_extra_type_hints, make_anywidth_numpy_array from .. import types as htypes from . import build_cleaner @@ -1408,63 +1406,9 @@ def build_bit_reverse_op(self, op: ast.BitReverseOp, ip): def build_constant_tensor_op(self, op: ast.ConstantTensorOp, ip): loc = Location.file(op.loc.filename, op.loc.lineno, 0) dtype = hcl_dtype_to_mlir(op.dtype, signless=True) - shape = op.values.shape if isinstance(op.dtype, (htypes.Int, htypes.UInt)): - # The following code has several steps to convert the numpy array to have - # the correct data type in order to create an MLIR constant tensor. - # Since MLIR-NumPy Python interface only supports byte-addressable data types, - # we need to change the data type of the array to have the minimum number of bytes - # that can represent the target bitwidth. - # e.g., hcl.const_tensor(arr, dtype=hcl.Int(20)) (6*6 array) - # which requires 20 bits (3 bytes) to represent each element - # declaration: 6*6*i20 - # numpy input: 6*6*i64 - # 1. Decompose the original i32 or i64 array into a structured array of uint8 - # -> decompose: 6*6*8*i8 - if op.dtype.bits == 1: - val = op.values - array = np.packbits(val, axis=None, bitorder="little") - value_attr = DenseElementsAttr.get(array, shape=val.shape, type=dtype) - else: - # Here we construct a customized NumPy dtype, "f0", "f1", "f2", etc. - # are the field names, and the entire data type is `op.values.dtype`. - # This can be viewed as a `union` type in C/C++. - # Please refer to the documentation for more details: - # https://numpy.org/doc/stable/reference/arrays.dtypes.html#specifying-and-constructing-data-types - decomposed_np_dtype = np.dtype( - ( - op.values.dtype, - { - f"f{i}": (np.uint8, i) - for i in range(op.values.dtype.itemsize) - }, - ) - ) - val = op.values.view(decomposed_np_dtype) - # 2. Compose the uint8 array into a structured array of target bitwidth - # This is done by taking the first several bytes of the uint8 array - # "u1" means one unsigned byte, and "i1" means one signed byte - n_bytes = int(np.ceil(dtype.width / 8)) - new_dtype = np.dtype( - { - "names": [f"f{i}" for i in range(n_bytes)], - "formats": (["i1"] if isinstance(dtype, htypes.Int) else ["u1"]) - + ["u1"] * (n_bytes - 1), - "offsets": list(range(n_bytes)), - "itemize": n_bytes, - } - ) - # -> compose: 6*6*3*i8 - val = np.stack([val[f"f{i}"] for i in range(n_bytes)], axis=-1) - # -> flatten: 108*i8 - val = val.flatten() - # -> view: 36*i24 - val = val.view(np.dtype(new_dtype)) - # -> reshape: 6*6*i24 - val = val.reshape(shape) - # Pass in the numpy array to get the MLIR attribute - # -> result: 6*6*i20 - value_attr = DenseElementsAttr.get(val, shape=val.shape, type=dtype) + val = make_anywidth_numpy_array(op.values, op.dtype.bits) + value_attr = DenseElementsAttr.get(val, shape=op.values.shape, type=dtype) else: val = op.values value_attr = DenseElementsAttr.get(val) diff --git a/heterocl/build_module.py b/heterocl/build_module.py index 8a66bd68..91124927 100644 --- a/heterocl/build_module.py +++ b/heterocl/build_module.py @@ -337,13 +337,12 @@ def attach_llvm_attrs(module): hcl_d.lower_composite_type(module) hcl_d.lower_fixed_to_int(module) hcl_d.lower_print_ops(module) - hcl_d.lower_anywidth_int(module) + # hcl_d.lower_anywidth_int(module) # Note: lower_any_width_int should precede # move_return_to_input, because it uses input/output # type hints. hcl_d.move_return_to_input(module) hcl_d.lower_bit_ops(module) - # print(module) hcl_d.legalize_cast(module) hcl_d.remove_stride_map(module) pipeline = "lower-affine,func.func(buffer-loop-hoisting)" diff --git a/heterocl/module.py b/heterocl/module.py index 0dee554a..17bb2093 100644 --- a/heterocl/module.py +++ b/heterocl/module.py @@ -110,7 +110,7 @@ def __call__(self, *argv): argv[len(op.arguments) + i].np_array = np.pad( argv[len(op.arguments) + i].np_array, pad_shape ) - execute_llvm_backend(self.src, self.name, self.return_num, *argv) + execute_llvm_backend(self.src, self.name, *argv) for res, shape in original_results: slicing = [] for s in shape: diff --git a/heterocl/runtime.py b/heterocl/runtime.py index f4ef694e..230f4afe 100644 --- a/heterocl/runtime.py +++ b/heterocl/runtime.py @@ -7,11 +7,20 @@ import subprocess import ctypes import time -import numpy as np +import warnings from hcl_mlir import runtime as rt from .report import parse_xml +# Filter out the warning from numpy when using ctypes array as numpy array. +# This is a Python bug, see: +# https://stackoverflow.com/questions/4964101/pep-3118-warning-when-using-ctypes-array-as-numpy-array +warnings.filterwarnings( + "ignore", + category=RuntimeWarning, + message="A builtin ctypes object gave a PEP3118 format string that does not match its itemsize*", +) + def run_process(cmd, pattern=None): p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) @@ -111,32 +120,26 @@ def execute_fpga_backend(target, shell=True): raise RuntimeError("Not implemented") -def execute_llvm_backend(execution_engine, name, return_num, *argv): +def execute_llvm_backend(execution_engine, name, *argv): """ - - execution_engine: mlir.ExecutionEngine object, created in hcl.build - - name: str, device top-level function name - - return_num: int, the number of return values - - argv: list-like object, a list of input and output variables + Execute LLVM backend. Assume all return args have been moved to + input args. + ---------- + execution_engine: mlir.ExecutionEngine + JIT object, created in hcl.build + name: str + device top-level function name + argv: list-like object + a list of input and output variables """ if not isinstance(argv, list): argv = list(argv) + # Unwrap hcl Array to get numpy arrays argv_np = [arg.unwrap() for arg in argv] - # Extract output arrays - return_args = argv_np[-return_num:] - # Convert output variables from numpy arrays to memref pointers - return_pointers = [] - for arg in return_args: - memref = rt.get_ranked_memref_descriptor(arg) - return_pointers.append(ctypes.pointer(ctypes.pointer(memref))) - # Convert input variables from numpy arrays to memref pointers arg_pointers = [] - for arg in argv_np[0:-return_num]: + for arg in argv_np: memref = rt.get_ranked_memref_descriptor(arg) arg_pointers.append(ctypes.pointer(ctypes.pointer(memref))) # Invoke device top-level function - execution_engine.invoke(name, *return_pointers, *arg_pointers) - # Copy output arrays back - for i, return_p in enumerate(return_pointers): - out_array = rt.ranked_memref_to_numpy(return_p[0]) - np.copyto(argv[-(len(return_args) - i)].np_array, out_array) + execution_engine.invoke(name, *arg_pointers) diff --git a/heterocl/tensor.py b/heterocl/tensor.py index e161ea43..3d8e6226 100644 --- a/heterocl/tensor.py +++ b/heterocl/tensor.py @@ -1,87 +1,207 @@ # Copyright HeteroCL authors. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +import math import numpy as np -from hcl_mlir.exceptions import DTypeError +from hcl_mlir.exceptions import DTypeError, APIError, DTypeWarning from .types import dtype_to_str, Int, UInt, Float, Fixed, UFixed +from .utils import make_anywidth_numpy_array class Array: - """A wrapper class for numpy array - Differences between array and tensor: - tensor is only a placeholder while array holds actual values + """ + Represents a input tensor in HeteroCL. + This class is a wrapper of numpy.ndarray, but it also + support a wider range of data types, including any-width + integer and fixed-point data types. """ - def __init__(self, np_array, dtype): - self.dtype = dtype # should specify the type of `dtype` - if isinstance(np_array, list): - np_array = np.array(np_array) - if dtype is not None: - # Data type check - if isinstance(dtype, Float): - hcl_dtype_str = dtype_to_str(dtype) - correct_dtype = np.dtype(hcl_dtype_str) - if np_array.dtype != correct_dtype: - np_array = np_array.astype(correct_dtype) - elif isinstance(dtype, Int): - # Handle overflow - sb = 1 << self.dtype.bits - sb_limit = 1 << (self.dtype.bits - 1) - np_array = np_array % sb - - def cast_func(x): - return x if x < sb_limit else x - sb - - vec_np_array = np.vectorize(cast_func)(np_array) - np_array = vec_np_array.astype(np.uint64) - elif isinstance(dtype, UInt): - # Handle overflow - sb = 1 << self.dtype.bits - np_array = np_array % sb - np_array = np_array.astype(np.uint64) - elif isinstance(dtype, Fixed): - # Handle overflow - sb = 1 << self.dtype.bits - sb_limit = 1 << (self.dtype.bits - 1) - np_array = np_array * (2**dtype.fracs) - np_array = np.fix(np_array) % sb - - def cast_func(x): - return x if x < sb_limit else x - sb - - vec_np_array = np.vectorize(cast_func)(np_array) - np_array = vec_np_array.astype(np.uint64) - elif isinstance(dtype, UFixed): - # Handle overflow - sb = 1 << self.dtype.bits - np_array = np_array * (2**dtype.fracs) - np_array = np.fix(np_array) % sb - np_array = np_array.astype(np.uint64) - else: - raise DTypeError("Type error: unrecognized type: " + str(self.dtype)) - else: - raise RuntimeError("Should provide type info") - self.np_array = np_array + def __init__(self, array, dtype): + """ + Parameters + ---------- + array : numpy.ndarray or a python list + The array to be wrapped. + If the bitwidth of the data type is wider than 64, + the array should be a python list. + dtype : HeteroCL data type + """ + self.dtype = dtype + if dtype is None: + raise APIError("Should provide type info") + # self.np_array: a numpy array that holds the data + # For float type, self.np_array is a float type numpy array + # For int, uint, fixed, ufixed, self.np_array is a struct type numpy array + # with each field being a byte. + self.np_array = self._handle_overflow(array, dtype) + if isinstance(dtype, (Int, UInt)): + # closest power of 2 + bitwidth = 1 << (self.dtype.bits - 1).bit_length() + bitwidth = max(bitwidth, 8) + # this is to be compliant with MLIR's anywidth int type alignment + # e.g. i1-i8 -> int8 + # i9-i16 -> int16 + # i17-i32 -> int32 + # i33-i64 -> int64 + # i65-i128 -> int128 + # i129-i256 -> int256 + self.np_array = make_anywidth_numpy_array(self.np_array, bitwidth) def asnumpy(self): - if isinstance(self.dtype, (Fixed, UFixed)): - if isinstance(self.dtype, Fixed): - res_array = self.np_array.astype(np.int64) - else: - res_array = self.np_array - res_array = res_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) - return res_array - if isinstance(self.dtype, Int): - res_array = self.np_array.astype(np.int64) - return res_array + """ + Convert HeteroCL array to numpy array / python list. + If the bitwidth is wider than 64, the result will be a python list. + Otherwise, return a numpy array. + """ + # pylint: disable=no-else-return if isinstance(self.dtype, Float): - res_array = self.np_array.astype(float) + hcl_dtype_str = dtype_to_str(self.dtype) + np_dtype = np.dtype(hcl_dtype_str) + res_array = self.np_array.astype(np_dtype) return res_array - return self.np_array + elif isinstance(self.dtype, Int): + if self.dtype.bits > 64: + DTypeWarning( + f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list" + ).warn() + return self._struct_np_array_to_int() + elif isinstance(self.dtype, UInt): + if self.dtype.bits > 64: + DTypeWarning( + f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list" + ).warn() + return self._struct_np_array_to_int() + # TODO(Niansong): fixed/ufixed does not go through struct_np_array_to_int for now + # because a change in IR is needed to support this, leaving it to another PR + elif isinstance(self.dtype, Fixed): + if self.dtype.bits > 64: + DTypeWarning( + f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list" + ).warn() + # base_array = self._struct_np_array_to_int() + # return base_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) + return self.np_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) + elif isinstance(self.dtype, UFixed): + if self.dtype.bits > 64: + DTypeWarning( + f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list" + ).warn() + # base_array = self._struct_np_array_to_int() + # return base_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) + return self.np_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) + else: + raise DTypeError(f"Unsupported data type {self.dtype}") def unwrap(self): return self.np_array + def _handle_overflow(self, array, dtype): + """ + If the dtype is wider than 64 bits, + array should a list of numpy numbers. + """ + # Data type check + if isinstance(dtype, Float): + if isinstance(array, list): + array = np.array(array) + hcl_dtype_str = dtype_to_str(dtype) + correct_dtype = np.dtype(hcl_dtype_str) + if array.dtype != correct_dtype: + array = array.astype(correct_dtype) + elif isinstance(dtype, Int): + sb = 1 << self.dtype.bits + sb_limit = 1 << (self.dtype.bits - 1) + + def cast_func(x): + # recursive + if isinstance(x, list): + return [cast_func(y) for y in x] + # signed integer overflow function: wrap mode + x = x % sb # cap the value to the max value of the bitwidth + return x if x < sb_limit else x - sb + + if isinstance(array, list): + array = [ + cast_func(x) for x in array + ] # TODO: this should be tested independently + else: + array = np.vectorize(cast_func)(array).astype(np.int64) + elif isinstance(dtype, UInt): + # Handle overflow + sb = 1 << self.dtype.bits + array = array % sb + elif isinstance(dtype, Fixed): + # Handle overflow + sb = 1 << self.dtype.bits + sb_limit = 1 << (self.dtype.bits - 1) + array = array.astype(np.float64) + array = array * (2**dtype.fracs) + + def cast_func(x): + # recursive + if isinstance(x, list): + return [cast_func(y) for y in x] + x = math.trunc(x) % sb # rounds towards zero + # signed integer overflow function: wrap mode + return x if x < sb_limit else x - sb + + if isinstance(array, list): + array = [cast_func(x) for x in array] + else: + array = np.vectorize(cast_func)(array).astype(np.int64) + elif isinstance(dtype, UFixed): + # Handle overflow + sb = 1 << self.dtype.bits + array = array.astype(np.float64) + array = array * (2**dtype.fracs) + + def cast_func(x): + # recursive + if isinstance(x, list): + return [cast_func(y) for y in x] + x = math.trunc(x) % sb # rounds towards zero + return x + + if isinstance(array, list): + array = [cast_func(x) for x in array] + else: + array = np.vectorize(cast_func)(array).astype(np.int64) + else: + raise DTypeError("Type error: unrecognized type: " + str(self.dtype)) + return array + + def _struct_np_array_to_int(self): + pylist = self.np_array.tolist() + + # each element is a tuple + def to_int(x): + if isinstance(x, list): + return [to_int(y) for y in x] + signed = isinstance(self.dtype, (Int, Fixed)) + # turn x from tuple to list + x = list(x) + # find MSB + byte_idx = (self.dtype.bits - 1) // 8 + bit_idx = (self.dtype.bits - 1) % 8 + msb = (x[byte_idx] & (1 << bit_idx)) > 0 + # sign extension + if signed and msb: + x[byte_idx] |= (0xFF << bit_idx) & 0xFF + for i in range(byte_idx + 1, len(x)): + x[i] = 0xFF + # concatenate the tuple + # each element is a byte + byte_str = b"" + for byte in x: + byte_str += byte.to_bytes(1, byteorder="little", signed=False) + value = int.from_bytes(byte_str, byteorder="little", signed=signed) + return value + + pylist = to_int(pylist) + if self.dtype.bits <= 64: + return np.array(pylist, dtype=np.int64) + return pylist + def __repr__(self) -> str: return self.asnumpy().__repr__() diff --git a/heterocl/utils.py b/heterocl/utils.py index 68116a74..91418928 100644 --- a/heterocl/utils.py +++ b/heterocl/utils.py @@ -143,10 +143,6 @@ def make_const_tensor(val, dtype): np_dtype = np.int32 elif dtype.bits <= 64: np_dtype = np.int64 - elif dtype.bits <= 128: - np_dtype = np.int128 - elif dtype.bits <= 256: - np_dtype = np.int256 else: raise DTypeError( f"Integer width ({dtype}) too large, not supported by numpy" @@ -219,3 +215,80 @@ def get_max_value(dtype): if isinstance(dtype, UFixed): return (1 << dtype.bits) - 1 raise DTypeError(f"Unrecognized data type: {dtype}") + + +def make_anywidth_numpy_array(val, bitwidth): + """ + Converts a numpy array to any target bitwidth. + ---------------- + Parameters: + val: numpy.ndarray + numpy array, can be any numpy native bitwidth, e.g. np.int64 + bitwidth: int + target bitwidth e.g. 9, 31, 198 + signed: True or False + whether the values in the array are signed or unsigned + ---------------- + Returns: + numpy.ndarray + numpy array with the target bitwidth + """ + shape = val.shape + sign_array = val >= 0 + avail_bytes = val.itemsize # number of bytes of each element + # The following code has several steps to convert the numpy array to have + # the correct data type in order to create an MLIR constant tensor. + # Since MLIR-NumPy Python interface only supports byte-addressable data types, + # we need to change the data type of the array to have the minimum number of bytes + # that can represent the target bitwidth. + # e.g., hcl.const_tensor(arr, dtype=hcl.Int(20)) (6*6 array) + # which requires 20 bits (3 bytes) to represent each element + # declaration: 6*6*i20 + # numpy input: 6*6*i64 + # 1. Decompose the original i32 or i64 array into a structured array of uint8 + # -> decompose: 6*6*8*i8 + # pylint: disable=no-else-return + # I think this if-else makes the code more readable + if bitwidth == 1: + return np.packbits(val, axis=None, bitorder="little") + else: + # Here we construct a customized NumPy dtype, "f0", "f1", "f2", etc. + # are the field names, and the entire data type is `op.values.dtype`. + # This can be viewed as a `union` type in C/C++. + # Please refer to the documentation for more details: + # https://numpy.org/doc/stable/reference/arrays.dtypes.html#specifying-and-constructing-data-types + decomposed_np_dtype = np.dtype( + ( + val.dtype, + {f"f{i}": (np.uint8, i) for i in range(val.dtype.itemsize)}, + ) + ) + val = val.view(decomposed_np_dtype) + # 2. Compose the uint8 array into a structured array of target bitwidth + # This is done by taking the first several bytes of the uint8 array + # "u1" means one unsigned byte, and "i1" means one signed byte + # f0 is LSB, fn is MSB + n_bytes = int(np.ceil(bitwidth / 8)) + new_dtype = np.dtype( + { + "names": [f"f{i}" for i in range(n_bytes)], + "formats": ["u1"] * n_bytes, + "offsets": list(range(n_bytes)), + "itemsize": n_bytes, + } + ) + # sometimes the available bytes are not enough to represent the target bitwidth + # so that we need to pad the array + _bytes = [val[f"f{i}"] for i in range(min(avail_bytes, n_bytes))] + if avail_bytes < n_bytes: + padding = np.where(sign_array, 0x00, 0xFF).astype(np.uint8) + _bytes += [padding] * (n_bytes - avail_bytes) + # -> compose: 6*6*3*i8 + val = np.stack(_bytes, axis=-1) + # -> flatten: 108*i8 + val = val.flatten() + # -> view: 36*i24 + val = val.view(np.dtype(new_dtype)) + # -> reshape: 6*6*i24 + val = val.reshape(shape) + return val diff --git a/tests/test_dtype.py b/tests/test_dtype.py index 7ff3a3f5..8100ac82 100644 --- a/tests/test_dtype.py +++ b/tests/test_dtype.py @@ -671,5 +671,35 @@ def cast(A): assert False, "test failed, see failed test case above" +def test_irregular_bitwidth_input(): + def test_int(dtype): + hcl.init(dtype) + A = hcl.placeholder((10,), "A", dtype=dtype) + B = hcl.compute(A.shape, lambda *args: A[args] + 1, "B") + s = hcl.create_schedule([A, B]) + f = hcl.build(s) + A_np = np.random.randint(-10, 10, A.shape) + # A_np = np.zeros(A.shape) + A_hcl = hcl.asarray(A_np, dtype=dtype) + B_hcl = hcl.asarray(np.zeros(A.shape), dtype=dtype) + f(A_hcl, B_hcl) + B_np = B_hcl.asnumpy() + if dtype.bits <= 64: + golden = hcl.asarray(A_np + 1, dtype=dtype).asnumpy() + assert np.allclose(golden, B_np) + else: + # B_np is a list + golden = [x + 1 for x in A_np.tolist()] + for res, g in zip(B_np, golden): + if res != g: + print(f"res: {res}, hex: {hex(res)}\n") + print(f"g: {g}, hex: {hex(g)}\n") + assert res == g + + test_dtypes = [hcl.Int(2), hcl.Int(20), hcl.Int(63), hcl.Int(255), hcl.Int(512)] + for dtype in test_dtypes: + test_int(dtype) + + if __name__ == "__main__": pytest.main([__file__])