diff --git a/heterocl/ast/ir_builder.py b/heterocl/ast/ir_builder.py
index 2abbca30..fae0d970 100644
--- a/heterocl/ast/ir_builder.py
+++ b/heterocl/ast/ir_builder.py
@@ -7,8 +7,6 @@
 
 # Import MLIR dialects
 # Naming rule: import dialect as dialect_d
-import numpy as np
-
 from hcl_mlir.dialects import (
     func as func_d,
     hcl as hcl_d,
@@ -52,7 +50,7 @@
 
 from . import ast
 from ..context import get_context, get_location
-from ..utils import hcl_dtype_to_mlir, get_extra_type_hints
+from ..utils import hcl_dtype_to_mlir, get_extra_type_hints, make_anywidth_numpy_array
 from .. import types as htypes
 from . import build_cleaner
 
@@ -1408,63 +1406,9 @@ def build_bit_reverse_op(self, op: ast.BitReverseOp, ip):
     def build_constant_tensor_op(self, op: ast.ConstantTensorOp, ip):
         loc = Location.file(op.loc.filename, op.loc.lineno, 0)
         dtype = hcl_dtype_to_mlir(op.dtype, signless=True)
-        shape = op.values.shape
         if isinstance(op.dtype, (htypes.Int, htypes.UInt)):
-            # The following code has several steps to convert the numpy array to have
-            # the correct data type in order to create an MLIR constant tensor.
-            # Since MLIR-NumPy Python interface only supports byte-addressable data types,
-            # we need to change the data type of the array to have the minimum number of bytes
-            # that can represent the target bitwidth.
-            # e.g., hcl.const_tensor(arr, dtype=hcl.Int(20)) (6*6 array)
-            #       which requires 20 bits (3 bytes) to represent each element
-            # declaration: 6*6*i20
-            # numpy input: 6*6*i64
-            # 1. Decompose the original i32 or i64 array into a structured array of uint8
-            #  -> decompose: 6*6*8*i8
-            if op.dtype.bits == 1:
-                val = op.values
-                array = np.packbits(val, axis=None, bitorder="little")
-                value_attr = DenseElementsAttr.get(array, shape=val.shape, type=dtype)
-            else:
-                # Here we construct a customized NumPy dtype, "f0", "f1", "f2", etc.
-                # are the field names, and the entire data type is `op.values.dtype`.
-                # This can be viewed as a `union` type in C/C++.
-                # Please refer to the documentation for more details:
-                # https://numpy.org/doc/stable/reference/arrays.dtypes.html#specifying-and-constructing-data-types
-                decomposed_np_dtype = np.dtype(
-                    (
-                        op.values.dtype,
-                        {
-                            f"f{i}": (np.uint8, i)
-                            for i in range(op.values.dtype.itemsize)
-                        },
-                    )
-                )
-                val = op.values.view(decomposed_np_dtype)
-                # 2. Compose the uint8 array into a structured array of target bitwidth
-                # This is done by taking the first several bytes of the uint8 array
-                # "u1" means one unsigned byte, and "i1" means one signed byte
-                n_bytes = int(np.ceil(dtype.width / 8))
-                new_dtype = np.dtype(
-                    {
-                        "names": [f"f{i}" for i in range(n_bytes)],
-                        "formats": (["i1"] if isinstance(dtype, htypes.Int) else ["u1"])
-                        + ["u1"] * (n_bytes - 1),
-                        "offsets": list(range(n_bytes)),
-                        "itemize": n_bytes,
-                    }
-                )
-                # -> compose: 6*6*3*i8
-                val = np.stack([val[f"f{i}"] for i in range(n_bytes)], axis=-1)
-                # -> flatten: 108*i8
-                val = val.flatten()
-                # -> view: 36*i24
-                val = val.view(np.dtype(new_dtype))
-                # -> reshape: 6*6*i24
-                val = val.reshape(shape)
-                # Pass in the numpy array to get the MLIR attribute
-                # -> result: 6*6*i20
-                value_attr = DenseElementsAttr.get(val, shape=val.shape, type=dtype)
+            val = make_anywidth_numpy_array(op.values, op.dtype.bits)
+            value_attr = DenseElementsAttr.get(val, shape=op.values.shape, type=dtype)
         else:
             val = op.values
             value_attr = DenseElementsAttr.get(val)
diff --git a/heterocl/build_module.py b/heterocl/build_module.py
index 8a66bd68..91124927 100644
--- a/heterocl/build_module.py
+++ b/heterocl/build_module.py
@@ -337,13 +337,12 @@ def attach_llvm_attrs(module):
         hcl_d.lower_composite_type(module)
         hcl_d.lower_fixed_to_int(module)
         hcl_d.lower_print_ops(module)
-        hcl_d.lower_anywidth_int(module)
+        # hcl_d.lower_anywidth_int(module)
         # Note: lower_any_width_int should precede
         # move_return_to_input, because it uses input/output
         # type hints.
         hcl_d.move_return_to_input(module)
         hcl_d.lower_bit_ops(module)
-        # print(module)
         hcl_d.legalize_cast(module)
         hcl_d.remove_stride_map(module)
         pipeline = "lower-affine,func.func(buffer-loop-hoisting)"
diff --git a/heterocl/module.py b/heterocl/module.py
index 0dee554a..17bb2093 100644
--- a/heterocl/module.py
+++ b/heterocl/module.py
@@ -110,7 +110,7 @@ def __call__(self, *argv):
                                 argv[len(op.arguments) + i].np_array = np.pad(
                                     argv[len(op.arguments) + i].np_array, pad_shape
                                 )
-            execute_llvm_backend(self.src, self.name, self.return_num, *argv)
+            execute_llvm_backend(self.src, self.name, *argv)
             for res, shape in original_results:
                 slicing = []
                 for s in shape:
diff --git a/heterocl/runtime.py b/heterocl/runtime.py
index f4ef694e..230f4afe 100644
--- a/heterocl/runtime.py
+++ b/heterocl/runtime.py
@@ -7,11 +7,20 @@
 import subprocess
 import ctypes
 import time
-import numpy as np
+import warnings
 
 from hcl_mlir import runtime as rt
 from .report import parse_xml
 
+# Filter out the warning from numpy when using ctypes array as numpy array.
+# This is a Python bug, see:
+# https://stackoverflow.com/questions/4964101/pep-3118-warning-when-using-ctypes-array-as-numpy-array
+warnings.filterwarnings(
+    "ignore",
+    category=RuntimeWarning,
+    message="A builtin ctypes object gave a PEP3118 format string that does not match its itemsize*",
+)
+
 
 def run_process(cmd, pattern=None):
     p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
@@ -111,32 +120,26 @@ def execute_fpga_backend(target, shell=True):
         raise RuntimeError("Not implemented")
 
 
-def execute_llvm_backend(execution_engine, name, return_num, *argv):
+def execute_llvm_backend(execution_engine, name, *argv):
     """
-    - execution_engine: mlir.ExecutionEngine object, created in hcl.build
-    - name: str, device top-level function name
-    - return_num: int, the number of return values
-    - argv: list-like object, a list of input and output variables
+    Execute LLVM backend. Assume all return args have been moved to
+    input args.
+    ----------
+    execution_engine: mlir.ExecutionEngine
+        JIT object, created in hcl.build
+    name: str
+        device top-level function name
+    argv: list-like object
+        a list of input and output variables
     """
     if not isinstance(argv, list):
         argv = list(argv)
+
     # Unwrap hcl Array to get numpy arrays
     argv_np = [arg.unwrap() for arg in argv]
-    # Extract output arrays
-    return_args = argv_np[-return_num:]
-    # Convert output variables from numpy arrays to memref pointers
-    return_pointers = []
-    for arg in return_args:
-        memref = rt.get_ranked_memref_descriptor(arg)
-        return_pointers.append(ctypes.pointer(ctypes.pointer(memref)))
-    # Convert input variables from numpy arrays to memref pointers
     arg_pointers = []
-    for arg in argv_np[0:-return_num]:
+    for arg in argv_np:
         memref = rt.get_ranked_memref_descriptor(arg)
         arg_pointers.append(ctypes.pointer(ctypes.pointer(memref)))
     # Invoke device top-level function
-    execution_engine.invoke(name, *return_pointers, *arg_pointers)
-    # Copy output arrays back
-    for i, return_p in enumerate(return_pointers):
-        out_array = rt.ranked_memref_to_numpy(return_p[0])
-        np.copyto(argv[-(len(return_args) - i)].np_array, out_array)
+    execution_engine.invoke(name, *arg_pointers)
diff --git a/heterocl/tensor.py b/heterocl/tensor.py
index e161ea43..3d8e6226 100644
--- a/heterocl/tensor.py
+++ b/heterocl/tensor.py
@@ -1,87 +1,207 @@
 # Copyright HeteroCL authors. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
 
+import math
 import numpy as np
-from hcl_mlir.exceptions import DTypeError
+from hcl_mlir.exceptions import DTypeError, APIError, DTypeWarning
 
 from .types import dtype_to_str, Int, UInt, Float, Fixed, UFixed
+from .utils import make_anywidth_numpy_array
 
 
 class Array:
-    """A wrapper class for numpy array
-    Differences between array and tensor:
-    tensor is only a placeholder while array holds actual values
+    """
+    Represents a input tensor in HeteroCL.
+    This class is a wrapper of numpy.ndarray, but it also
+    support a wider range of data types, including any-width
+    integer and fixed-point data types.
     """
 
-    def __init__(self, np_array, dtype):
-        self.dtype = dtype  # should specify the type of `dtype`
-        if isinstance(np_array, list):
-            np_array = np.array(np_array)
-        if dtype is not None:
-            # Data type check
-            if isinstance(dtype, Float):
-                hcl_dtype_str = dtype_to_str(dtype)
-                correct_dtype = np.dtype(hcl_dtype_str)
-                if np_array.dtype != correct_dtype:
-                    np_array = np_array.astype(correct_dtype)
-            elif isinstance(dtype, Int):
-                # Handle overflow
-                sb = 1 << self.dtype.bits
-                sb_limit = 1 << (self.dtype.bits - 1)
-                np_array = np_array % sb
-
-                def cast_func(x):
-                    return x if x < sb_limit else x - sb
-
-                vec_np_array = np.vectorize(cast_func)(np_array)
-                np_array = vec_np_array.astype(np.uint64)
-            elif isinstance(dtype, UInt):
-                # Handle overflow
-                sb = 1 << self.dtype.bits
-                np_array = np_array % sb
-                np_array = np_array.astype(np.uint64)
-            elif isinstance(dtype, Fixed):
-                # Handle overflow
-                sb = 1 << self.dtype.bits
-                sb_limit = 1 << (self.dtype.bits - 1)
-                np_array = np_array * (2**dtype.fracs)
-                np_array = np.fix(np_array) % sb
-
-                def cast_func(x):
-                    return x if x < sb_limit else x - sb
-
-                vec_np_array = np.vectorize(cast_func)(np_array)
-                np_array = vec_np_array.astype(np.uint64)
-            elif isinstance(dtype, UFixed):
-                # Handle overflow
-                sb = 1 << self.dtype.bits
-                np_array = np_array * (2**dtype.fracs)
-                np_array = np.fix(np_array) % sb
-                np_array = np_array.astype(np.uint64)
-            else:
-                raise DTypeError("Type error: unrecognized type: " + str(self.dtype))
-        else:
-            raise RuntimeError("Should provide type info")
-        self.np_array = np_array
+    def __init__(self, array, dtype):
+        """
+        Parameters
+        ----------
+        array : numpy.ndarray or a python list
+            The array to be wrapped.
+            If the bitwidth of the data type is wider than 64,
+            the array should be a python list.
+        dtype : HeteroCL data type
+        """
+        self.dtype = dtype
+        if dtype is None:
+            raise APIError("Should provide type info")
+        # self.np_array: a numpy array that holds the data
+        # For float type, self.np_array is a float type numpy array
+        # For int, uint, fixed, ufixed, self.np_array is a struct type numpy array
+        # with each field being a byte.
+        self.np_array = self._handle_overflow(array, dtype)
+        if isinstance(dtype, (Int, UInt)):
+            # closest power of 2
+            bitwidth = 1 << (self.dtype.bits - 1).bit_length()
+            bitwidth = max(bitwidth, 8)
+            # this is to be compliant with MLIR's anywidth int type alignment
+            # e.g. i1-i8 -> int8
+            #      i9-i16 -> int16
+            #      i17-i32 -> int32
+            #      i33-i64 -> int64
+            #      i65-i128 -> int128
+            #      i129-i256 -> int256
+            self.np_array = make_anywidth_numpy_array(self.np_array, bitwidth)
 
     def asnumpy(self):
-        if isinstance(self.dtype, (Fixed, UFixed)):
-            if isinstance(self.dtype, Fixed):
-                res_array = self.np_array.astype(np.int64)
-            else:
-                res_array = self.np_array
-            res_array = res_array.astype(np.float64) / float(2 ** (self.dtype.fracs))
-            return res_array
-        if isinstance(self.dtype, Int):
-            res_array = self.np_array.astype(np.int64)
-            return res_array
+        """
+        Convert HeteroCL array to numpy array / python list.
+        If the bitwidth is wider than 64, the result will be a python list.
+        Otherwise, return a numpy array.
+        """
+        # pylint: disable=no-else-return
         if isinstance(self.dtype, Float):
-            res_array = self.np_array.astype(float)
+            hcl_dtype_str = dtype_to_str(self.dtype)
+            np_dtype = np.dtype(hcl_dtype_str)
+            res_array = self.np_array.astype(np_dtype)
             return res_array
-        return self.np_array
+        elif isinstance(self.dtype, Int):
+            if self.dtype.bits > 64:
+                DTypeWarning(
+                    f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list"
+                ).warn()
+            return self._struct_np_array_to_int()
+        elif isinstance(self.dtype, UInt):
+            if self.dtype.bits > 64:
+                DTypeWarning(
+                    f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list"
+                ).warn()
+            return self._struct_np_array_to_int()
+        # TODO(Niansong): fixed/ufixed does not go through struct_np_array_to_int for now
+        # because a change in IR is needed to support this, leaving it to another PR
+        elif isinstance(self.dtype, Fixed):
+            if self.dtype.bits > 64:
+                DTypeWarning(
+                    f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list"
+                ).warn()
+            # base_array = self._struct_np_array_to_int()
+            # return base_array.astype(np.float64) / float(2 ** (self.dtype.fracs))
+            return self.np_array.astype(np.float64) / float(2 ** (self.dtype.fracs))
+        elif isinstance(self.dtype, UFixed):
+            if self.dtype.bits > 64:
+                DTypeWarning(
+                    f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list"
+                ).warn()
+            # base_array = self._struct_np_array_to_int()
+            # return base_array.astype(np.float64) / float(2 ** (self.dtype.fracs))
+            return self.np_array.astype(np.float64) / float(2 ** (self.dtype.fracs))
+        else:
+            raise DTypeError(f"Unsupported data type {self.dtype}")
 
     def unwrap(self):
         return self.np_array
 
+    def _handle_overflow(self, array, dtype):
+        """
+        If the dtype is wider than 64 bits,
+        array should a list of numpy numbers.
+        """
+        # Data type check
+        if isinstance(dtype, Float):
+            if isinstance(array, list):
+                array = np.array(array)
+            hcl_dtype_str = dtype_to_str(dtype)
+            correct_dtype = np.dtype(hcl_dtype_str)
+            if array.dtype != correct_dtype:
+                array = array.astype(correct_dtype)
+        elif isinstance(dtype, Int):
+            sb = 1 << self.dtype.bits
+            sb_limit = 1 << (self.dtype.bits - 1)
+
+            def cast_func(x):
+                # recursive
+                if isinstance(x, list):
+                    return [cast_func(y) for y in x]
+                # signed integer overflow function: wrap mode
+                x = x % sb  # cap the value to the max value of the bitwidth
+                return x if x < sb_limit else x - sb
+
+            if isinstance(array, list):
+                array = [
+                    cast_func(x) for x in array
+                ]  # TODO: this should be tested independently
+            else:
+                array = np.vectorize(cast_func)(array).astype(np.int64)
+        elif isinstance(dtype, UInt):
+            # Handle overflow
+            sb = 1 << self.dtype.bits
+            array = array % sb
+        elif isinstance(dtype, Fixed):
+            # Handle overflow
+            sb = 1 << self.dtype.bits
+            sb_limit = 1 << (self.dtype.bits - 1)
+            array = array.astype(np.float64)
+            array = array * (2**dtype.fracs)
+
+            def cast_func(x):
+                # recursive
+                if isinstance(x, list):
+                    return [cast_func(y) for y in x]
+                x = math.trunc(x) % sb  # rounds towards zero
+                # signed integer overflow function: wrap mode
+                return x if x < sb_limit else x - sb
+
+            if isinstance(array, list):
+                array = [cast_func(x) for x in array]
+            else:
+                array = np.vectorize(cast_func)(array).astype(np.int64)
+        elif isinstance(dtype, UFixed):
+            # Handle overflow
+            sb = 1 << self.dtype.bits
+            array = array.astype(np.float64)
+            array = array * (2**dtype.fracs)
+
+            def cast_func(x):
+                # recursive
+                if isinstance(x, list):
+                    return [cast_func(y) for y in x]
+                x = math.trunc(x) % sb  # rounds towards zero
+                return x
+
+            if isinstance(array, list):
+                array = [cast_func(x) for x in array]
+            else:
+                array = np.vectorize(cast_func)(array).astype(np.int64)
+        else:
+            raise DTypeError("Type error: unrecognized type: " + str(self.dtype))
+        return array
+
+    def _struct_np_array_to_int(self):
+        pylist = self.np_array.tolist()
+
+        # each element is a tuple
+        def to_int(x):
+            if isinstance(x, list):
+                return [to_int(y) for y in x]
+            signed = isinstance(self.dtype, (Int, Fixed))
+            # turn x from tuple to list
+            x = list(x)
+            # find MSB
+            byte_idx = (self.dtype.bits - 1) // 8
+            bit_idx = (self.dtype.bits - 1) % 8
+            msb = (x[byte_idx] & (1 << bit_idx)) > 0
+            # sign extension
+            if signed and msb:
+                x[byte_idx] |= (0xFF << bit_idx) & 0xFF
+                for i in range(byte_idx + 1, len(x)):
+                    x[i] = 0xFF
+            # concatenate the tuple
+            # each element is a byte
+            byte_str = b""
+            for byte in x:
+                byte_str += byte.to_bytes(1, byteorder="little", signed=False)
+            value = int.from_bytes(byte_str, byteorder="little", signed=signed)
+            return value
+
+        pylist = to_int(pylist)
+        if self.dtype.bits <= 64:
+            return np.array(pylist, dtype=np.int64)
+        return pylist
+
     def __repr__(self) -> str:
         return self.asnumpy().__repr__()
diff --git a/heterocl/utils.py b/heterocl/utils.py
index 68116a74..91418928 100644
--- a/heterocl/utils.py
+++ b/heterocl/utils.py
@@ -143,10 +143,6 @@ def make_const_tensor(val, dtype):
             np_dtype = np.int32
         elif dtype.bits <= 64:
             np_dtype = np.int64
-        elif dtype.bits <= 128:
-            np_dtype = np.int128
-        elif dtype.bits <= 256:
-            np_dtype = np.int256
         else:
             raise DTypeError(
                 f"Integer width ({dtype}) too large, not supported by numpy"
@@ -219,3 +215,80 @@ def get_max_value(dtype):
     if isinstance(dtype, UFixed):
         return (1 << dtype.bits) - 1
     raise DTypeError(f"Unrecognized data type: {dtype}")
+
+
+def make_anywidth_numpy_array(val, bitwidth):
+    """
+    Converts a numpy array to any target bitwidth.
+    ----------------
+    Parameters:
+    val: numpy.ndarray
+        numpy array, can be any numpy native bitwidth, e.g. np.int64
+    bitwidth: int
+        target bitwidth e.g. 9, 31, 198
+    signed: True or False
+        whether the values in the array are signed or unsigned
+    ----------------
+    Returns:
+    numpy.ndarray
+        numpy array with the target bitwidth
+    """
+    shape = val.shape
+    sign_array = val >= 0
+    avail_bytes = val.itemsize  # number of bytes of each element
+    # The following code has several steps to convert the numpy array to have
+    # the correct data type in order to create an MLIR constant tensor.
+    # Since MLIR-NumPy Python interface only supports byte-addressable data types,
+    # we need to change the data type of the array to have the minimum number of bytes
+    # that can represent the target bitwidth.
+    # e.g., hcl.const_tensor(arr, dtype=hcl.Int(20)) (6*6 array)
+    #       which requires 20 bits (3 bytes) to represent each element
+    # declaration: 6*6*i20
+    # numpy input: 6*6*i64
+    # 1. Decompose the original i32 or i64 array into a structured array of uint8
+    #  -> decompose: 6*6*8*i8
+    # pylint: disable=no-else-return
+    # I think this if-else makes the code more readable
+    if bitwidth == 1:
+        return np.packbits(val, axis=None, bitorder="little")
+    else:
+        # Here we construct a customized NumPy dtype, "f0", "f1", "f2", etc.
+        # are the field names, and the entire data type is `op.values.dtype`.
+        # This can be viewed as a `union` type in C/C++.
+        # Please refer to the documentation for more details:
+        # https://numpy.org/doc/stable/reference/arrays.dtypes.html#specifying-and-constructing-data-types
+        decomposed_np_dtype = np.dtype(
+            (
+                val.dtype,
+                {f"f{i}": (np.uint8, i) for i in range(val.dtype.itemsize)},
+            )
+        )
+        val = val.view(decomposed_np_dtype)
+        # 2. Compose the uint8 array into a structured array of target bitwidth
+        # This is done by taking the first several bytes of the uint8 array
+        # "u1" means one unsigned byte, and "i1" means one signed byte
+        # f0 is LSB, fn is MSB
+        n_bytes = int(np.ceil(bitwidth / 8))
+        new_dtype = np.dtype(
+            {
+                "names": [f"f{i}" for i in range(n_bytes)],
+                "formats": ["u1"] * n_bytes,
+                "offsets": list(range(n_bytes)),
+                "itemsize": n_bytes,
+            }
+        )
+        # sometimes the available bytes are not enough to represent the target bitwidth
+        # so that we need to pad the array
+        _bytes = [val[f"f{i}"] for i in range(min(avail_bytes, n_bytes))]
+        if avail_bytes < n_bytes:
+            padding = np.where(sign_array, 0x00, 0xFF).astype(np.uint8)
+            _bytes += [padding] * (n_bytes - avail_bytes)
+        # -> compose: 6*6*3*i8
+        val = np.stack(_bytes, axis=-1)
+        # -> flatten: 108*i8
+        val = val.flatten()
+        # -> view: 36*i24
+        val = val.view(np.dtype(new_dtype))
+        # -> reshape: 6*6*i24
+        val = val.reshape(shape)
+        return val
diff --git a/tests/test_dtype.py b/tests/test_dtype.py
index 7ff3a3f5..8100ac82 100644
--- a/tests/test_dtype.py
+++ b/tests/test_dtype.py
@@ -671,5 +671,35 @@ def cast(A):
                 assert False, "test failed, see failed test case above"
 
 
+def test_irregular_bitwidth_input():
+    def test_int(dtype):
+        hcl.init(dtype)
+        A = hcl.placeholder((10,), "A", dtype=dtype)
+        B = hcl.compute(A.shape, lambda *args: A[args] + 1, "B")
+        s = hcl.create_schedule([A, B])
+        f = hcl.build(s)
+        A_np = np.random.randint(-10, 10, A.shape)
+        # A_np = np.zeros(A.shape)
+        A_hcl = hcl.asarray(A_np, dtype=dtype)
+        B_hcl = hcl.asarray(np.zeros(A.shape), dtype=dtype)
+        f(A_hcl, B_hcl)
+        B_np = B_hcl.asnumpy()
+        if dtype.bits <= 64:
+            golden = hcl.asarray(A_np + 1, dtype=dtype).asnumpy()
+            assert np.allclose(golden, B_np)
+        else:
+            # B_np is a list
+            golden = [x + 1 for x in A_np.tolist()]
+            for res, g in zip(B_np, golden):
+                if res != g:
+                    print(f"res: {res}, hex: {hex(res)}\n")
+                    print(f"g: {g}, hex: {hex(g)}\n")
+                assert res == g
+
+    test_dtypes = [hcl.Int(2), hcl.Int(20), hcl.Int(63), hcl.Int(255), hcl.Int(512)]
+    for dtype in test_dtypes:
+        test_int(dtype)
+
+
 if __name__ == "__main__":
     pytest.main([__file__])