diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index 9f92b7e22c6..a24e4364aae 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -930,14 +930,14 @@ def flatten(self, order="C"): Read the elements using this index order, and place the elements into the reshaped array using this index order. - - "C" means to read / write the elements using C-like index + - ``"C"`` means to read / write the elements using C-like index order, with the last axis index changing fastest, back to the first axis index changing slowest. - - "F" means to read / write the elements using Fortran-like + - ``"F"`` means to read / write the elements using Fortran-like index order, with the first index changing fastest, and the last index changing slowest. - The default is ``"C"``. + Default: ``"C"``. Returns ------- diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py index 565ef144d46..2c76b72d0da 100644 --- a/dpnp/dpnp_iface_arraycreation.py +++ b/dpnp/dpnp_iface_arraycreation.py @@ -242,7 +242,7 @@ def array( Default: ``True``. order : {"C", "F", "A", "K"}, optional Memory layout of the newly output array. - Default: "K". + Default: ``"K"``. device : {None, string, SyclDevice, SyclQueue}, optional An array API concept of device where the output array is created. The `device` can be ``None`` (the default), an OneAPI filter selector @@ -366,7 +366,8 @@ def asanyarray( used that can represent the values (by considering Promotion Type Rule and device capabilities when necessary). order : {None, "C", "F", "A", "K"}, optional - Memory layout of the newly output array. Default: "K". + Memory layout of the newly output array. + Default: ``"K"``. device : {None, string, SyclDevice, SyclQueue}, optional An array API concept of device where the output array is created. The `device` can be ``None`` (the default), an OneAPI filter selector @@ -470,7 +471,7 @@ def asarray( Default: ``None``. order : {None, "C", "F", "A", "K"}, optional Memory layout of the newly output array. - Default: "K". + Default: ``"K"``. device : {None, string, SyclDevice, SyclQueue}, optional An array API concept of device where the output array is created. The `device` can be ``None`` (the default), an OneAPI filter selector @@ -916,8 +917,9 @@ def diag(v, /, k=0, *, device=None, usm_type=None, sycl_queue=None): default values, returns a read/write view of its k-th diagonal. - Otherwise, returns a copy of its k-th diagonal. k : int, optional - Diagonal in question. The default is 0. Use k > 0 for diagonals above + Diagonal in question. Use k > 0 for diagonals above the main diagonal, and k < 0 for diagonals below the main diagonal. + Default: ``0``. device : {None, string, SyclDevice, SyclQueue}, optional An array API concept of device where the output array is created. The `device` can be ``None`` (the default), an OneAPI filter selector @@ -1123,7 +1125,8 @@ def empty( Default is the default floating point data type for the device where input array is allocated. order : {None, "C", "F"}, optional - Memory layout of the newly output array. Default: "C". + Memory layout of the newly output array. + Default: ``"C"``. device : {None, string, SyclDevice, SyclQueue}, optional An array API concept of device where the output array is created. The `device` can be ``None`` (the default), an OneAPI filter selector @@ -1225,7 +1228,8 @@ def empty_like( Default is the default floating point data type for the device where input array is allocated. order : {None, "C", "F"}, optional - Memory layout of the newly output array. Default: "C". + Memory layout of the newly output array. + Default: ``"C"``. shape : {None, int, sequence of ints} Overrides the shape of the result. device : {None, string, SyclDevice, SyclQueue}, optional @@ -1342,7 +1346,8 @@ def eye( Default is the default floating point data type for the device where input array is allocated. order : {None, "C", "F"}, optional - Memory layout of the newly output array. Default: "C". + Memory layout of the newly output array. + Default: ``"C"``. device : {None, string, SyclDevice, SyclQueue}, optional An array API concept of device where the output array is created. The `device` can be ``None`` (the default), an OneAPI filter selector @@ -1453,7 +1458,8 @@ def frombuffer( count : int, optional Number of items to read. ``-1`` means all data in the buffer. offset : int, optional - Start reading the buffer from this offset (in bytes); default: 0. + Start reading the buffer from this offset (in bytes). + Default: ``0``. device : {None, string, SyclDevice, SyclQueue}, optional An array API concept of device where the output array is created. The `device` can be ``None`` (the default), an OneAPI filter selector @@ -1981,7 +1987,8 @@ def full( Default is the default floating point data type for the device where input array is allocated. order : {None, "C", "F"}, optional - Memory layout of the newly output array. Default: "C". + Memory layout of the newly output array. + Default: ``"C"``. device : {None, string, SyclDevice, SyclQueue}, optional An array API concept of device where the output array is created. The `device` can be ``None`` (the default), an OneAPI filter selector @@ -2086,7 +2093,8 @@ def full_like( Default is the default floating point data type for the device where input array is allocated. order : {None, "C", "F"}, optional - Memory layout of the newly output array. Default: "C". + Memory layout of the newly output array. + Default: ``"C"``. shape : {None, int, sequence of ints} Overrides the shape of the result. device : {None, string, SyclDevice, SyclQueue}, optional @@ -2202,7 +2210,8 @@ def geomspace( ``num + 1`` values are spaced over the interval in log-space, of which all but the last (a sequence of length `num`) are returned. num : int, optional - Number of samples to generate. Default is 50. + Number of samples to generate. + Default: ``50``. dtype : {None, dtype}, optional The desired dtype for the array. If not given, a default dtype will be used that can represent the values (by considering Promotion Type Rule @@ -2221,7 +2230,7 @@ def geomspace( A SYCL queue to use for output array allocation and copying. endpoint : bool, optional If ``True``, `stop` is the last sample. Otherwise, it is not included. - Default is ``True``. + Default: ``True``. axis : int, optional The axis in the result to store the samples. Relevant only if start or stop are array-like. By default (0), the samples will be along a new @@ -2446,7 +2455,7 @@ def linspace( A SYCL queue to use for output array allocation and copying. endpoint : bool, optional If ``True``, `stop` is the last sample. Otherwise, it is not included. - Default is ``True``. + Default: ``True``. retstep : bool, optional If ``True``, return (samples, step), where step is the spacing between samples. @@ -2653,7 +2662,8 @@ def logspace( values are spaced over the interval in log-space, of which all but the last (a sequence of length `num`) are returned. num : int, optional - Number of samples to generate. Default is 50. + Number of samples to generate. + Default: ``50``. device : {None, string, SyclDevice, SyclQueue}, optional An array API concept of device where the output array is created. The `device` can be ``None`` (the default), an OneAPI filter selector @@ -2668,7 +2678,7 @@ def logspace( A SYCL queue to use for output array allocation and copying. endpoint : {bool}, optional If ``True``, stop is the last sample. Otherwise, it is not included. - Default is ``True``. + Default: ``True``. base : {array_like}, optional Input data, in any form that can be converted to an array. This includes scalars, lists, lists of tuples, tuples, tuples of tuples, @@ -2676,7 +2686,8 @@ def logspace( that can be converted to an array.This includes scalars, lists, lists of tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. The `step` size between the elements in ``ln(samples) / ln(base)`` - (or log_base(samples)) is uniform. Default is 10.0. + (or log_base(samples)) is uniform. + Default: ``10.0``. dtype : {None, dtype}, optional The desired dtype for the array. If not given, a default dtype will be used that can represent the values (by considering Promotion Type Rule @@ -3020,7 +3031,8 @@ def ones( Default is the default floating point data type for the device where input array is allocated. order : {None, "C", "F"}, optional - Memory layout of the newly output array. Default: "C". + Memory layout of the newly output array. + Default: ``"C"``. device : {None, string, SyclDevice, SyclQueue}, optional An array API concept of device where the output array is created. The `device` can be ``None`` (the default), an OneAPI filter selector @@ -3128,7 +3140,8 @@ def ones_like( Default is the default floating point data type for the device where input array is allocated. order : {None, "C", "F"}, optional - Memory layout of the newly output array. Default: "C". + Memory layout of the newly output array. + Default: ``"C"``. shape : {None, int, sequence of ints} Overrides the shape of the result. device : {None, string, SyclDevice, SyclQueue}, optional @@ -3300,7 +3313,7 @@ def tri( k : int, optional The sub-diagonal at and below which the array is filled. k = 0 is the main diagonal, while k < 0 is below it, and k > 0 is above. - The default is 0. + Default: ``0``. dtype : {None, dtype}, optional The desired dtype for the array, e.g., dpnp.int32. Default is the default floating point data type for the device where @@ -3663,11 +3676,12 @@ def zeros( shape : {int, sequence of ints} Shape of the new array, e.g., (2, 3) or 2. dtype : {None, dtype}, optional - The desired dtype for the array, e.g., dpnp.int32. + The desired dtype for the array, e.g., `dpnp.int32`. Default is the default floating point data type for the device where input array is allocated. order : {None, "C", "F"}, optional - Memory layout of the newly output array. Default: "C". + Memory layout of the newly output array. + Default: ``"C"``. device : {None, string, SyclDevice, SyclQueue}, optional An array API concept of device where the output array is created. The `device` can be ``None`` (the default), an OneAPI filter selector @@ -3775,7 +3789,8 @@ def zeros_like( Default is the default floating point data type for the device where input array is allocated. order : {None, "C", "F"}, optional - Memory layout of the newly output array. Default: "C". + Memory layout of the newly output array. + Default: ``"C"``. shape : {None, int, sequence of ints} Overrides the shape of the result. device : {None, string, SyclDevice, SyclQueue}, optional diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py index d2a03301f41..bc821be545d 100644 --- a/dpnp/dpnp_iface_linearalgebra.py +++ b/dpnp/dpnp_iface_linearalgebra.py @@ -163,11 +163,16 @@ def dot(a, b, out=None): def einsum( - *operands, out=None, dtype=None, order="K", casting="safe", optimize=False + *operands, + out=None, + dtype=None, + order="K", + casting="same_kind", + optimize=False, ): """ einsum(subscripts, *operands, out=None, dtype=None, order="K", \ - casting="safe", optimize=False) + casting="same_kind", optimize=False) Evaluates the Einstein summation convention on the operands. @@ -186,14 +191,14 @@ def einsum( If provided, the calculation is done into this array. dtype : {dtype, None}, optional If provided, forces the calculation to use the data type specified. - Default is ``None``. + Default: ``None``. order : {"C", "F", "A", "K"}, optional Controls the memory layout of the output. ``"C"`` means it should be C-contiguous. ``"F"`` means it should be F-contiguous, ``"A"`` means it should be ``"F"`` if the inputs are all ``"F"``, ``"C"`` otherwise. ``"K"`` means it should be as close to the layout as the inputs as is possible, including arbitrarily permuted axes. - Default is ``"K"``. + Default: ``"K"``. casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional Controls what kind of data casting may occur. Setting this to ``"unsafe"`` is not recommended, as it can adversely affect @@ -206,12 +211,17 @@ def einsum( like float64 to float32, are allowed. * ``"unsafe"`` means any data conversions may be done. - Default is ``"safe"``. + Please note that, in contrast to NumPy, the default setting here is + ``"same_kind"``. This is to prevent errors that may occur when data + needs to be converted to `float64`, but the device does not support it. + In such cases, the data is instead converted to `float32`. + Default: ``"same_kind"``. optimize : {False, True, "greedy", "optimal"}, optional Controls if intermediate optimization should occur. No optimization will occur if ``False`` and ``True`` will default to the ``"greedy"`` algorithm. Also accepts an explicit contraction list from the - :obj:`dpnp.einsum_path` function. Default is ``False``. + :obj:`dpnp.einsum_path` function. + Default: ``False``. Returns ------- @@ -453,7 +463,7 @@ def einsum_path(*operands, optimize="greedy", einsum_call=False): the number of terms in the contraction. Equivalent to the ``"optimal"`` path for most contractions. - Default is ``"greedy"``. + Default: ``"greedy"``. Returns ------- @@ -736,16 +746,18 @@ def matmul( out : {None, dpnp.ndarray, usm_ndarray}, optional Alternative output array in which to place the result. It must have a shape that matches the signature `(n,k),(k,m)->(n,m)` but the type - (of the calculated values) will be cast if necessary. Default: ``None``. + (of the calculated values) will be cast if necessary. + Default: ``None``. dtype : {None, dtype}, optional Type to use in computing the matrix product. By default, the returned array will have data type that is determined by considering Promotion Type Rule and device capabilities. casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional - Controls what kind of data casting may occur. Default: ``"same_kind"``. + Controls what kind of data casting may occur. + Default: ``"same_kind"``. order : {"C", "F", "A", "K", None}, optional Memory layout of the newly output array, if parameter `out` is ``None``. - Default: "K". + Default: ``"K"``. axes : {list of tuples}, optional A list of tuples with indices of axes the matrix product should operate on. For instance, for the signature of ``(i,j),(j,k)->(i,k)``, the base diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index 2f2b2c7f23a..f3eb9c1ba0e 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -772,7 +772,8 @@ def concatenate( corresponding to axis (the first, by default). axis : int, optional The axis along which the arrays will be joined. If axis is ``None``, - arrays are flattened before use. Default is 0. + arrays are flattened before use. + Default: ``0``. out : dpnp.ndarray, optional If provided, the destination to place the result. The shape must be correct, matching that of what concatenate would have returned diff --git a/dpnp/dpnp_iface_searching.py b/dpnp/dpnp_iface_searching.py index 4c4576dc996..19248342376 100644 --- a/dpnp/dpnp_iface_searching.py +++ b/dpnp/dpnp_iface_searching.py @@ -314,17 +314,17 @@ def searchsorted(a, v, side="left", sorter=None): sort it. v : {dpnp.ndarray, usm_ndarray, scalar} Values to insert into `a`. - side : {'left', 'right'}, optional - If ``'left'``, the index of the first suitable location found is given. - If ``'right'``, return the last such index. If there is no suitable + side : {"left", "right"}, optional + If ``"left"``, the index of the first suitable location found is given. + If ``"right"``, return the last such index. If there is no suitable index, return either 0 or N (where N is the length of `a`). - Default is ``'left'``. + Default: ``"left"``. sorter : {dpnp.ndarray, usm_ndarray}, optional Optional 1-D array of integer indices that sort array a into ascending order. They are typically the result of :obj:`dpnp.argsort`. - Out of bound index values of `sorter` array are treated using `"wrap"` - mode documented in :py:func:`dpnp.take`. - Default is ``None``. + Out of bound index values of `sorter` array are treated using + ``"wrap"`` mode documented in :py:func:`dpnp.take`. + Default: ``None``. Returns ------- diff --git a/dpnp/dpnp_iface_sorting.py b/dpnp/dpnp_iface_sorting.py index 3e4f868af8c..c007def058b 100644 --- a/dpnp/dpnp_iface_sorting.py +++ b/dpnp/dpnp_iface_sorting.py @@ -94,7 +94,7 @@ def argsort(a, axis=-1, kind=None, order=None): sorting. The default is -1, which sorts along the last axis. kind : {None, "stable"}, optional Default is ``None``, which is equivalent to `"stable"`. - Unlike in NumPy any other options are not accepted here. + Unlike NumPy, no other option is accepted here. Returns ------- diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py index 46d977df030..d663d9d1836 100644 --- a/dpnp/dpnp_iface_statistics.py +++ b/dpnp/dpnp_iface_statistics.py @@ -188,16 +188,18 @@ def average(a, axis=None, weights=None, returned=False, *, keepdims=False): The only constraint on `weights` is that `sum(weights)` must not be 0. returned : {bool}, optional - Default is ``False``. If ``True``, the tuple (`average`, - `sum_of_weights`) is returned, otherwise only the average is returned. - If `weights=None`, `sum_of_weights` is equivalent to the number of - elements over which the average is taken. + If ``True``, the tuple (`average`, `sum_of_weights`) is returned, + otherwise only the average is returned. If `weights=None`, + `sum_of_weights` is equivalent to the number of elements over which + the average is taken. + Default: ``False``. keepdims : {None, bool}, optional If ``True``, the reduced axes (dimensions) are included in the result as singleton dimensions, so that the returned array remains compatible with the input array according to Array Broadcasting rules. Otherwise, if ``False``, the reduced axes are not included in - the returned array. Default: ``False``. + the returned array. + Default: ``False``. Returns ------- diff --git a/dpnp/dpnp_iface_trigonometric.py b/dpnp/dpnp_iface_trigonometric.py index 68b9fcaf13a..0dff6fdb292 100644 --- a/dpnp/dpnp_iface_trigonometric.py +++ b/dpnp/dpnp_iface_trigonometric.py @@ -2206,10 +2206,10 @@ def unwrap(p, discont=None, axis=-1, *, period=2 * dpnp.pi): p : {dpnp.ndarray, usm_ndarray} Input array. discont : {float, None}, optional - Maximum discontinuity between values, default is ``period / 2``. Values - below ``period / 2`` are treated as if they were ``period / 2``. To - have an effect different from the default, `discont` should be larger - than ``period / 2``. + Maximum discontinuity between values, default is ``None`` which is an + alias for ``period / 2``. Values below ``period / 2`` are treated as if + they were ``period / 2``. To have an effect different from the default, + `discont` should be larger than ``period / 2``. Default: ``None``. axis : int, optional Axis along which unwrap will operate, default is the last axis. diff --git a/dpnp/dpnp_utils/dpnp_utils_einsum.py b/dpnp/dpnp_utils/dpnp_utils_einsum.py index 5ba3df23ed2..5439230ecfb 100644 --- a/dpnp/dpnp_utils/dpnp_utils_einsum.py +++ b/dpnp/dpnp_utils/dpnp_utils_einsum.py @@ -35,39 +35,14 @@ import dpnp from dpnp.dpnp_utils import get_usm_allocations +from ..dpnp_array import dpnp_array + _einsum_symbols = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" __all__ = ["dpnp_einsum"] -def _calc_offset(shape, linear_id, strides): - """ - Calculate the offset in a multi-dimensional array given the shape, linear_id, and strides. - - Parameters - ---------- - shape : tuple - The shape of the multi-dimensional array. - linear_id : int - The linear index in the multi-dimensional array. - strides : tuple - The strides of the multi-dimensional array. - - Returns - ------- - out : int - The offset in the multi-dimensional array. - - """ - - offset = 0 - indices = _index_linear_to_tuple(shape, linear_id) - for i in range(len(indices)): - offset += indices[i] * strides[i] - return offset - - def _chr(label): """ Copied from _chr in cupy/core/_einsum.py @@ -89,7 +64,7 @@ def _chr(label): Examples -------- - >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> import dpnp.dpnp_utils.dpnp_utils_einsum as np_util >>> np_util._chr(97) 'a' >>> np_util._chr(-1) @@ -123,7 +98,7 @@ def _compute_size_by_dict(indices, idx_dict): Examples -------- - >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> import dpnp.dpnp_utils.dpnp_utils_einsum as np_util >>> np_util._compute_size_by_dict("abbc", {"a": 2, "b":3, "c":5}) 90 @@ -182,7 +157,7 @@ def _einsum_diagonals(input_subscripts, operands): Examples -------- >>> import dpnp as np - >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> import dpnp.dpnp_utils.dpnp_utils_einsum as np_util >>> a = np.arange(9).reshape(3, 3) >>> input_subscripts = ["ii"] >>> operands = [a] @@ -246,7 +221,7 @@ def _expand_dims_transpose(arr, mode, mode_out): Example ------- >>> import dpnp - >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> import dpnp.dpnp_utils.dpnp_utils_einsum as np_util >>> a = dpnp.zeros((10, 20)) >>> mode_a = ("A", "B") >>> mode_out = ("B", "C", "A") @@ -296,7 +271,7 @@ def _find_contraction(positions, input_sets, output_set): Examples -------- # A simple dot product test case - >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> import dpnp.dpnp_utils.dpnp_utils_einsum as np_util >>> pos = (0, 1) >>> isets = [set("ab"), set("bc")] >>> oset = set("ac") @@ -352,7 +327,7 @@ def _flatten_transpose(a, axeses): Examples -------- >>> import dpnp as np - >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> import dpnp.dpnp_utils.dpnp_utils_einsum as np_util >>> a = np.arange(24).reshape(2, 3, 4) >>> axeses = [(0, 2), (1,)] >>> out, shapes = np_util._flatten_transpose(a, axeses) @@ -400,7 +375,7 @@ def _flop_count(idx_contraction, inner, num_terms, size_dictionary): Examples -------- - >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> import dpnp.dpnp_utils.dpnp_utils_einsum as np_util >>> np_util._flop_count("abc", False, 1, {"a": 2, "b":3, "c":5}) 30 @@ -447,7 +422,7 @@ def _greedy_path(input_sets, output_set, idx_dict, memory_limit): Examples -------- - >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> import dpnp.dpnp_utils.dpnp_utils_einsum as np_util >>> isets = [set("abd"), set("ac"), set("bdc")] >>> oset = set("") >>> idx_sizes = {"a": 1, "b":2, "c":3, "d":4} @@ -535,34 +510,6 @@ def _greedy_path(input_sets, output_set, idx_dict, memory_limit): return path -def _index_linear_to_tuple(shape, linear_id): - """ - Convert a linear index to a tuple of indices in a multi-dimensional array. - - Parameters - ---------- - shape : tuple - The shape of the multi-dimensional array. - linear_id : int - The linear index to convert. - - Returns - ------- - out: tuple - A tuple of indices corresponding to the linear index. - - """ - - len_shape = len(shape) - indices = [0] * len_shape - for i in range(len_shape): - prod_res = _compute_size(i + 1, shape) - indices[i] = linear_id // prod_res - linear_id %= prod_res - - return tuple(indices) - - def _iter_path_pairs(path): """ Copied from _iter_path_pairs in cupy/core/_einsum.py @@ -636,7 +583,7 @@ def _optimal_path(input_sets, output_set, idx_dict, memory_limit): Examples -------- - >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> import dpnp.dpnp_utils.dpnp_utils_einsum as np_util >>> isets = [set("abd"), set("ac"), set("bdc")] >>> oset = set("") >>> idx_sizes = {"a": 1, "b":2, "c":3, "d":4} @@ -709,7 +656,7 @@ def _parse_einsum_input(args): The operand list is simplified to reduce printing: >>> import dpnp as np - >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> import dpnp.dpnp_utils.dpnp_utils_einsum as np_util >>> a = np.random.rand(4, 4) >>> b = np.random.rand(4, 4, 4) >>> np_util._parse_einsum_input(("...a,...a->...", a, b)) @@ -853,6 +800,12 @@ def _parse_ellipsis_subscript(subscript, idx, ndim=None, ellipsis_len=None): def _parse_int_subscript(list_subscript): """Copied from _parse_int_subscript in cupy/core/_einsum.py""" + if not isinstance( + list_subscript, (list, tuple, numpy.ndarray, dpnp.ndarray) + ): + raise TypeError( + "subscripts for each operand must be a list, tuple or ndarray." + ) str_subscript = "" for s in list_subscript: if s is Ellipsis: @@ -865,6 +818,11 @@ def _parse_int_subscript(list_subscript): "For this input type lists must contain " "either int or Ellipsis" ) from e + if isinstance(s, int): + if not 0 <= s < len(_einsum_symbols): + raise ValueError( + f"subscript is not within the valid range [0, {len(_einsum_symbols)})." + ) str_subscript += _einsum_symbols[s] return str_subscript @@ -1012,8 +970,14 @@ def _transpose_ex(a, axeses): strides.append(stride) # TODO: replace with a.view() when it is implemented in dpnp - a = _view_work_around(a, shape, strides) - return a + return dpnp_array( + shape, + dtype=a.dtype, + buffer=a, + strides=strides, + usm_type=a.usm_type, + sycl_queue=a.sycl_queue, + ) def _tuple_sorted_by_0(zs): @@ -1063,39 +1027,6 @@ def _update_other_results(results, best): return mod_results -def _view_work_around(a, shape, strides): - """ - Create a copy of the input array with the specified shape and strides. - - Parameters - ---------- - a : {dpnp.ndarray, usm_ndarray} - The input array. - shape : tuple - The desired shape of the output array. - strides : tuple - The desired strides of the output array. - - Returns - ------- - out : dpnp.ndarray - A copy of the input array with the specified shape and strides. - - """ - - n_size = numpy.prod(shape) - b = dpnp.empty( - n_size, dtype=a.dtype, usm_type=a.usm_type, sycl_queue=a.sycl_queue - ) - for linear_id in range(n_size): - offset = _calc_offset(shape, linear_id, strides) - indices = _index_linear_to_tuple(a.shape, offset) - b[linear_id] = a[indices] - b = b.reshape(tuple(shape)) - - return b - - def dpnp_einsum( *operands, out=None, dtype=None, order="K", casting="safe", optimize=False ): @@ -1118,12 +1049,18 @@ def dpnp_einsum( raise ExecutionPlacementError( "Input and output allocation queues are not compatible" ) + result_dtype = dpnp.result_type(*arrays) if dtype is None else dtype for id, a in enumerate(operands): if dpnp.isscalar(a): operands[id] = dpnp.array( a, dtype=result_dtype, usm_type=res_usm_type, sycl_queue=exec_q ) + result_dtype = dpnp.result_type(*arrays) if dtype is None else dtype + if order in ["a", "A"]: + order = ( + "F" if not any(arr.flags.c_contiguous for arr in arrays) else "C" + ) input_subscripts = [ _parse_ellipsis_subscript(sub, idx, ndim=arr.ndim) @@ -1241,9 +1178,7 @@ def dpnp_einsum( operands = [a for a in operands] else: operands = [ - dpnp.astype( - a, result_dtype, copy=False, casting=casting, order=order - ) + dpnp.astype(a, result_dtype, copy=False, casting=casting) for a in operands ] @@ -1309,5 +1244,7 @@ def dpnp_einsum( arr_out = arr0.transpose(transpose_axes).reshape( [dimension_dict[label] for label in output_subscript] ) + + arr_out = dpnp.asarray(arr_out, order=order) assert returns_view or arr_out.dtype == result_dtype - return dpnp.get_result_array(arr_out, out) + return dpnp.get_result_array(arr_out, out, casting=casting) diff --git a/tests/test_linalg.py b/tests/test_linalg.py index 221bfd29c51..5951f103b17 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -9,6 +9,8 @@ assert_almost_equal, assert_array_equal, assert_raises, + assert_raises_regex, + suppress_warnings, ) import dpnp as inp @@ -695,39 +697,1048 @@ def test_einsum_trivial_cases(self): def test_einsum_out(self): a = inp.ones((5, 5)) - a_np = a.asnumpy() out = inp.empty((5,)) - out_np = out.asnumpy() result = inp.einsum("ii->i", a, out=out) assert result is out - expected = numpy.einsum("ii->i", a_np, out=out_np) + expected = numpy.einsum("ii->i", a.asnumpy()) assert_dtype_allclose(result, expected) - def test_einsum_error(self): - a = inp.ones((5, 5)) - # unknown keyword argument - with pytest.raises(TypeError): - inp.einsum("ii->i", a, copy=False) - + def test_einsum_error1(self): a = inp.ones((5, 5)) out = inp.empty((5,), sycl_queue=dpctl.SyclQueue()) # inconsistent sycl_queue - with pytest.raises(ExecutionPlacementError): - inp.einsum("ii->i", a, out=out) + assert_raises(ExecutionPlacementError, inp.einsum, "ii->i", a, out=out) # unknown value for optimize keyword - with pytest.raises(TypeError): - inp.einsum("ii->i", a, optimize="average") + assert_raises(TypeError, inp.einsum, "ii->i", a, optimize="blah") + + # repeated scripts in output + assert_raises(ValueError, inp.einsum, "ij,jk->ii", a, a) a = inp.ones((5, 4)) # different size for same label 5 != 4 - with pytest.raises(ValueError): - inp.einsum("ii", a) + assert_raises(ValueError, inp.einsum, "ii", a) + + @pytest.mark.parametrize("do_opt", [True, False]) + @pytest.mark.parametrize("xp", [numpy, inp]) + def test_einsum_error2(self, do_opt, xp): + a = xp.asarray(0) + b = xp.asarray([0]) + c = xp.asarray([0, 0]) + d = xp.asarray([[0, 0], [0, 0]]) + + # Need enough arguments + assert_raises(ValueError, xp.einsum, optimize=do_opt) + assert_raises(ValueError, xp.einsum, "", optimize=do_opt) + + # subscripts must be a string + assert_raises(TypeError, xp.einsum, a, 0, optimize=do_opt) + + # this call returns a segfault + assert_raises(TypeError, xp.einsum, *(None,) * 63, optimize=do_opt) + + # number of operands must match count in subscripts string + assert_raises(ValueError, xp.einsum, "", a, 0, optimize=do_opt) + assert_raises(ValueError, xp.einsum, ",", 0, b, b, optimize=do_opt) + assert_raises(ValueError, xp.einsum, ",", b, optimize=do_opt) + + # can't have more subscripts than dimensions in the operand + assert_raises(ValueError, xp.einsum, "i", a, optimize=do_opt) + assert_raises(ValueError, xp.einsum, "ij", c, optimize=do_opt) + assert_raises(ValueError, xp.einsum, "...i", a, optimize=do_opt) + assert_raises(ValueError, xp.einsum, "i...j", c, optimize=do_opt) + assert_raises(ValueError, xp.einsum, "i...", a, optimize=do_opt) + assert_raises(ValueError, xp.einsum, "ij...", c, optimize=do_opt) + + # invalid ellipsis + assert_raises(ValueError, xp.einsum, "i..", c, optimize=do_opt) + assert_raises(ValueError, xp.einsum, ".i...", c, optimize=do_opt) + assert_raises(ValueError, xp.einsum, "j->..j", c, optimize=do_opt) + assert_raises(ValueError, xp.einsum, "j->.j...", c, optimize=do_opt) + + # invalid subscript character + assert_raises(ValueError, xp.einsum, "i%...", c, optimize=do_opt) + assert_raises(ValueError, xp.einsum, "...j$", c, optimize=do_opt) + assert_raises(ValueError, xp.einsum, "i->&", c, optimize=do_opt) + + # output subscripts must appear in input + assert_raises(ValueError, xp.einsum, "i->ij", c, optimize=do_opt) + + # output subscripts may only be specified once + assert_raises(ValueError, xp.einsum, "ij->jij", d, optimize=do_opt) + + # dimensions must match when being collapsed + a = xp.arange(6).reshape(2, 3) + assert_raises(ValueError, xp.einsum, "ii", a, optimize=do_opt) + assert_raises(ValueError, xp.einsum, "ii->i", a, optimize=do_opt) + + with assert_raises_regex(ValueError, "'b'"): + # 'c' erroneously appeared in the error message + a = xp.ones((3, 3, 4, 5, 6)) + b = xp.ones((3, 4, 5)) + xp.einsum("aabcb,abc", a, b) + + @pytest.mark.parametrize("do_opt", [True, False]) + @pytest.mark.parametrize("xp", [numpy, inp]) + def test_einsum_specific_errors(self, do_opt, xp): + a = xp.asarray(0) + # out parameter must be an array + assert_raises(TypeError, xp.einsum, "", a, out="test", optimize=do_opt) + + # order parameter must be a valid order + assert_raises(ValueError, xp.einsum, "", a, order="W", optimize=do_opt) + + # other keyword arguments are rejected + assert_raises(TypeError, xp.einsum, "", a, bad_arg=0, optimize=do_opt) + + # broadcasting to new dimensions must be enabled explicitly + a = xp.arange(6).reshape(2, 3) + b = xp.asarray([[0, 1], [0, 1]]) + assert_raises(ValueError, xp.einsum, "i", a, optimize=do_opt) + out = xp.arange(4).reshape(2, 2) + assert_raises( + ValueError, xp.einsum, "i->i", b, out=out, optimize=do_opt + ) - a = inp.ones((5, 5)) - # repeated scripts in output - with pytest.raises(ValueError): - inp.einsum("ij,jk->ii", a, a) + # Check order kwarg, asanyarray allows 1d to pass through + a = xp.arange(6).reshape(-1, 1) + assert_raises( + ValueError, xp.einsum, "i->i", a, optimize=do_opt, order="d" + ) + + def check_einsum_sums(self, dtype, do_opt=False): + dtype = numpy.dtype(dtype) + # Check various sums. Does many sizes to exercise unrolled loops. + + # sum(a, axis=-1) + for n in range(1, 17): + a = numpy.arange(n, dtype=dtype) + a_dp = inp.array(a) + expected = numpy.einsum("i->", a, optimize=do_opt) + assert_dtype_allclose( + inp.einsum("i->", a_dp, optimize=do_opt), expected + ) + assert_dtype_allclose( + inp.einsum(a_dp, [0], [], optimize=do_opt), expected + ) + + for n in range(1, 17): + a = numpy.arange(2 * 3 * n, dtype=dtype).reshape(2, 3, n) + a_dp = inp.array(a) + expected = numpy.einsum("...i->...", a, optimize=do_opt) + result = inp.einsum("...i->...", a_dp, optimize=do_opt) + assert_dtype_allclose(result, expected) + + result = inp.einsum( + a_dp, [Ellipsis, 0], [Ellipsis], optimize=do_opt + ) + assert_dtype_allclose(result, expected) + + # sum(a, axis=0) + for n in range(1, 17): + a = numpy.arange(2 * n, dtype=dtype).reshape(2, n) + a_dp = inp.array(a) + expected = numpy.einsum("i...->...", a, optimize=do_opt) + result = inp.einsum("i...->...", a_dp, optimize=do_opt) + assert_dtype_allclose(result, expected) + + result = inp.einsum( + a_dp, [0, Ellipsis], [Ellipsis], optimize=do_opt + ) + assert_dtype_allclose(result, expected) + + for n in range(1, 17): + a = numpy.arange(2 * 3 * n, dtype=dtype).reshape(2, 3, n) + a_dp = inp.array(a) + expected = numpy.einsum("i...->...", a, optimize=do_opt) + result = inp.einsum("i...->...", a_dp, optimize=do_opt) + assert_dtype_allclose(result, expected) + + result = inp.einsum( + a_dp, [0, Ellipsis], [Ellipsis], optimize=do_opt + ) + assert_dtype_allclose(result, expected) + + # trace(a) + for n in range(1, 17): + a = numpy.arange(n * n, dtype=dtype).reshape(n, n) + a_dp = inp.array(a) + expected = numpy.einsum("ii", a, optimize=do_opt) + result = inp.einsum("ii", a_dp, optimize=do_opt) + assert_dtype_allclose(result, expected) + + result = inp.einsum(a_dp, [0, 0], optimize=do_opt) + assert_dtype_allclose(result, expected) + + # should accept dpnp array in subscript list + dp_array = inp.asarray([0, 0]) + assert_dtype_allclose( + inp.einsum(a_dp, dp_array, optimize=do_opt), expected + ) + assert_dtype_allclose( + inp.einsum(a_dp, list(dp_array), optimize=do_opt), expected + ) + + # multiply(a, b) + for n in range(1, 17): + a = numpy.arange(3 * n, dtype=dtype).reshape(3, n) + b = numpy.arange(2 * 3 * n, dtype=dtype).reshape(2, 3, n) + a_dp = inp.array(a) + b_dp = inp.array(b) + expected = numpy.einsum("..., ...", a, b, optimize=do_opt) + result = inp.einsum("..., ...", a_dp, b_dp, optimize=do_opt) + assert_dtype_allclose(result, expected) + + result = inp.einsum( + a_dp, [Ellipsis], b_dp, [Ellipsis], optimize=do_opt + ) + assert_dtype_allclose(result, expected) + + # inner(a,b) + for n in range(1, 17): + a = numpy.arange(2 * 3 * n, dtype=dtype).reshape(2, 3, n) + b = numpy.arange(n, dtype=dtype) + a_dp = inp.array(a) + b_dp = inp.array(b) + expected = numpy.einsum("...i, ...i", a, b, optimize=do_opt) + result = inp.einsum("...i, ...i", a_dp, b_dp, optimize=do_opt) + assert_dtype_allclose(result, expected) + + result = inp.einsum( + a_dp, [Ellipsis, 0], b_dp, [Ellipsis, 0], optimize=do_opt + ) + assert_dtype_allclose(result, expected) + + for n in range(1, 11): + a = numpy.arange(n * 3 * 2, dtype=dtype).reshape(n, 3, 2) + b = numpy.arange(n, dtype=dtype) + a_dp = inp.array(a) + b_dp = inp.array(b) + expected = numpy.einsum("i..., i...", a, b, optimize=do_opt) + result = inp.einsum("i..., i...", a_dp, b_dp, optimize=do_opt) + assert_dtype_allclose(result, expected) + + result = inp.einsum( + a_dp, [0, Ellipsis], b_dp, [0, Ellipsis], optimize=do_opt + ) + assert_dtype_allclose(result, expected) + + # outer(a,b) + for n in range(1, 17): + a = numpy.arange(3, dtype=dtype) + 1 + b = numpy.arange(n, dtype=dtype) + 1 + a_dp = inp.array(a) + b_dp = inp.array(b) + expected = numpy.einsum("i,j", a, b, optimize=do_opt) + assert_dtype_allclose( + inp.einsum("i,j", a_dp, b_dp, optimize=do_opt), expected + ) + assert_dtype_allclose( + inp.einsum(a_dp, [0], b_dp, [1], optimize=do_opt), expected + ) + + # Suppress the complex warnings for the 'as f8' tests + with suppress_warnings() as sup: + sup.filter(numpy.exceptions.ComplexWarning) + + # matvec(a,b) / a.dot(b) where a is matrix, b is vector + for n in range(1, 17): + a = numpy.arange(4 * n, dtype=dtype).reshape(4, n) + b = numpy.arange(n, dtype=dtype) + a_dp = inp.array(a) + b_dp = inp.array(b) + expected = numpy.einsum("ij, j", a, b, optimize=do_opt) + result = inp.einsum("ij, j", a_dp, b_dp, optimize=do_opt) + assert_dtype_allclose(result, expected) + + result = inp.einsum(a_dp, [0, 1], b_dp, [1], optimize=do_opt) + assert_dtype_allclose(result, expected) + + c = inp.arange(4, dtype=a_dp.dtype) + args = ["ij, j", a_dp, b_dp] + result = inp.einsum( + *args, out=c, dtype="f4", casting="unsafe", optimize=do_opt + ) + assert result is c + assert_dtype_allclose(result, expected) + + c[...] = 0 + args = [a_dp, [0, 1], b_dp, [1]] + result = inp.einsum( + *args, out=c, dtype="f4", casting="unsafe", optimize=do_opt + ) + assert result is c + assert_dtype_allclose(result, expected) + + for n in range(1, 17): + a = numpy.arange(4 * n, dtype=dtype).reshape(4, n) + b = numpy.arange(n, dtype=dtype) + a_dp = inp.array(a) + b_dp = inp.array(b) + expected = numpy.einsum("ji,j", a.T, b.T, optimize=do_opt) + result = inp.einsum("ji,j", a_dp.T, b_dp.T, optimize=do_opt) + assert_dtype_allclose(result, expected) + + result = inp.einsum( + a_dp.T, [1, 0], b_dp.T, [1], optimize=do_opt + ) + assert_dtype_allclose(result, expected) + + c = inp.arange(4, dtype=a_dp.dtype) + args = ["ji,j", a_dp.T, b_dp.T] + result = inp.einsum( + *args, out=c, dtype="f4", casting="unsafe", optimize=do_opt + ) + assert result is c + assert_dtype_allclose(result, expected) + + c[...] = 0 + args = [a_dp.T, [1, 0], b_dp.T, [1]] + result = inp.einsum( + *args, out=c, dtype="f4", casting="unsafe", optimize=do_opt + ) + assert result is c + assert_dtype_allclose(result, expected) + + # matmat(a,b) / a.dot(b) where a is matrix, b is matrix + for n in range(1, 17): + a = numpy.arange(4 * n, dtype=dtype).reshape(4, n) + b = numpy.arange(n * 6, dtype=dtype).reshape(n, 6) + a_dp = inp.array(a) + b_dp = inp.array(b) + expected = numpy.einsum("ij, jk", a, b, optimize=do_opt) + result = inp.einsum("ij, jk", a_dp, b_dp, optimize=do_opt) + assert_dtype_allclose(result, expected) + + result = inp.einsum(a_dp, [0, 1], b_dp, [1, 2], optimize=do_opt) + assert_dtype_allclose(result, expected) + + for n in range(1, 17): + a = numpy.arange(4 * n, dtype=dtype).reshape(4, n) + b = numpy.arange(n * 6, dtype=dtype).reshape(n, 6) + c = numpy.arange(24, dtype=dtype).reshape(4, 6) + a_dp = inp.array(a) + b_dp = inp.array(b) + d = inp.array(c) + args = ["ij, jk", a, b] + expected = numpy.einsum( + *args, out=c, dtype="f4", casting="unsafe", optimize=do_opt + ) + args = ["ij, jk", a_dp, b_dp] + result = inp.einsum( + *args, out=d, dtype="f4", casting="unsafe", optimize=do_opt + ) + assert result is d + assert_dtype_allclose(result, expected) + + d[...] = 0 + args = [a_dp, [0, 1], b_dp, [1, 2]] + result = inp.einsum( + *args, out=d, dtype="f4", casting="unsafe", optimize=do_opt + ) + assert result is d + assert_dtype_allclose(result, expected) + + # matrix triple product + a = numpy.arange(12, dtype=dtype).reshape(3, 4) + b = numpy.arange(20, dtype=dtype).reshape(4, 5) + c = numpy.arange(30, dtype=dtype).reshape(5, 6) + a_dp = inp.array(a) + b_dp = inp.array(b) + c_dp = inp.array(c) + # equivalent of a.dot(b).dot(c) + # if optimize is True, NumPy does not respect the given dtype + args = ["ij,jk,kl", a, b, c] + expected = numpy.einsum( + *args, dtype="f4", casting="unsafe", optimize=False + ) + args = ["ij,jk,kl", a_dp, b_dp, c_dp] + result = inp.einsum( + *args, dtype="f4", casting="unsafe", optimize=do_opt + ) + assert_dtype_allclose(result, expected) + + args = a_dp, [0, 1], b_dp, [1, 2], c_dp, [2, 3] + result = inp.einsum( + *args, dtype="f4", casting="unsafe", optimize=do_opt + ) + assert_dtype_allclose(result, expected) + + d = numpy.arange(18, dtype=dtype).reshape(3, 6) + d_dp = inp.array(d) + args = ["ij,jk,kl", a, b, c] + expected = numpy.einsum( + *args, out=d, dtype="f4", casting="unsafe", optimize=do_opt + ) + args = ["ij,jk,kl", a_dp, b_dp, c_dp] + result = inp.einsum( + *args, out=d_dp, dtype="f4", casting="unsafe", optimize=do_opt + ) + assert result is d_dp + assert_dtype_allclose(result, expected) + + d_dp[...] = 0 + args = [a_dp, [0, 1], b_dp, [1, 2], c_dp, [2, 3]] + result = inp.einsum( + *args, out=d_dp, dtype="f4", casting="unsafe", optimize=do_opt + ) + assert result is d_dp + assert_dtype_allclose(result, expected) + + # tensordot(a, b) + a = numpy.arange(60, dtype=dtype).reshape(3, 4, 5) + b = numpy.arange(24, dtype=dtype).reshape(4, 3, 2) + a_dp = inp.array(a) + b_dp = inp.array(b) + # equivalent of numpy.tensordot(a, b, axes=([1, 0], [0, 1])) + expected = numpy.einsum("ijk, jil -> kl", a, b, optimize=do_opt) + result = inp.einsum("ijk, jil -> kl", a_dp, b_dp, optimize=do_opt) + assert_dtype_allclose(result, expected) + + result = inp.einsum( + a_dp, [0, 1, 2], b_dp, [1, 0, 3], optimize=do_opt + ) + assert_dtype_allclose(result, expected) + + c = inp.arange(10, dtype=a_dp.dtype).reshape(5, 2) + args = ["ijk, jil -> kl", a_dp, b_dp] + result = inp.einsum( + *args, out=c, dtype="f4", casting="unsafe", optimize=do_opt + ) + assert result is c + assert_dtype_allclose(result, expected) + + c[...] = 0 + args = [a_dp, [0, 1, 2], b_dp, [1, 0, 3]] + result = inp.einsum( + *args, out=c, dtype="f4", casting="unsafe", optimize=do_opt + ) + assert result is c + assert_dtype_allclose(result, expected) + + # logical_and(logical_and(a!=0, b!=0), c!=0) + neg_val = -2 if dtype.kind != "u" else numpy.iinfo(dtype).max - 1 + a = numpy.array([1, 3, neg_val, 0, 12, 13, 0, 1], dtype=dtype) + b = numpy.array([0, 3.5, 0, neg_val, 0, 1, 3, 12], dtype=dtype) + c = numpy.array([True, True, False, True, True, False, True, True]) + a_dp = inp.array(a) + b_dp = inp.array(b) + c_dp = inp.array(c) + expected = numpy.einsum( + "i,i,i->i", a, b, c, dtype="?", casting="unsafe", optimize=do_opt + ) + args = ["i,i,i->i", a_dp, b_dp, c_dp] + result = inp.einsum(*args, dtype="?", casting="unsafe", optimize=do_opt) + assert_dtype_allclose(result, expected) + + args = [a_dp, [0], b_dp, [0], c_dp, [0], [0]] + result = inp.einsum(*args, dtype="?", casting="unsafe", optimize=do_opt) + assert_dtype_allclose(result, expected) + + # with an scalar, NumPy < 2.0.0 uses the other input arrays to + # determine the output type while for NumPy > 2.0.0 the scalar + # with default machine dtype is used to determine the output + # data type + if numpy.lib.NumpyVersion(numpy.__version__) < "2.0.0": + check_type = True + else: + check_type = False + a = numpy.arange(9, dtype=dtype) + a_dp = inp.array(a) + expected = numpy.einsum(",i->", 3, a) + assert_dtype_allclose( + inp.einsum(",i->", 3, a_dp), expected, check_type=check_type + ) + assert_dtype_allclose( + inp.einsum(3, [], a_dp, [0], []), expected, check_type=check_type + ) + + expected = numpy.einsum("i,->", a, 3) + assert_dtype_allclose( + inp.einsum("i,->", a_dp, 3), expected, check_type=check_type + ) + assert_dtype_allclose( + inp.einsum(a_dp, [0], 3, [], []), expected, check_type=check_type + ) + + # Various stride0, contiguous, and SSE aligned variants + for n in range(1, 25): + a = numpy.arange(n, dtype=dtype) + a_dp = inp.array(a) + assert_dtype_allclose( + inp.einsum("...,...", a_dp, a_dp, optimize=do_opt), + numpy.einsum("...,...", a, a, optimize=do_opt), + ) + assert_dtype_allclose( + inp.einsum("i,i", a_dp, a_dp, optimize=do_opt), + numpy.einsum("i,i", a, a, optimize=do_opt), + ) + assert_dtype_allclose( + inp.einsum("i,->i", a_dp, 2, optimize=do_opt), + numpy.einsum("i,->i", a, 2, optimize=do_opt), + check_type=check_type, + ) + assert_dtype_allclose( + inp.einsum(",i->i", 2, a_dp, optimize=do_opt), + numpy.einsum(",i->i", 2, a, optimize=do_opt), + check_type=check_type, + ) + assert_dtype_allclose( + inp.einsum("i,->", a_dp, 2, optimize=do_opt), + numpy.einsum("i,->", a, 2, optimize=do_opt), + check_type=check_type, + ) + assert_dtype_allclose( + inp.einsum(",i->", 2, a_dp, optimize=do_opt), + numpy.einsum(",i->", 2, a, optimize=do_opt), + check_type=check_type, + ) + + assert_dtype_allclose( + inp.einsum("...,...", a_dp[1:], a_dp[:-1], optimize=do_opt), + numpy.einsum("...,...", a[1:], a[:-1], optimize=do_opt), + ) + assert_dtype_allclose( + inp.einsum("i,i", a_dp[1:], a_dp[:-1], optimize=do_opt), + numpy.einsum("i,i", a[1:], a[:-1], optimize=do_opt), + ) + assert_dtype_allclose( + inp.einsum("i,->i", a_dp[1:], 2, optimize=do_opt), + numpy.einsum("i,->i", a[1:], 2, optimize=do_opt), + check_type=check_type, + ) + assert_dtype_allclose( + inp.einsum(",i->i", 2, a_dp[1:], optimize=do_opt), + numpy.einsum(",i->i", 2, a[1:], optimize=do_opt), + check_type=check_type, + ) + assert_dtype_allclose( + inp.einsum("i,->", a_dp[1:], 2, optimize=do_opt), + numpy.einsum("i,->", a[1:], 2, optimize=do_opt), + check_type=check_type, + ) + assert_dtype_allclose( + inp.einsum(",i->", 2, a_dp[1:], optimize=do_opt), + numpy.einsum(",i->", 2, a[1:], optimize=do_opt), + check_type=check_type, + ) + + # special case + a = numpy.arange(2) + 1 + b = numpy.arange(4).reshape(2, 2) + 3 + c = numpy.arange(4).reshape(2, 2) + 7 + a_dp = inp.array(a) + b_dp = inp.array(b) + c_dp = inp.array(c) + assert_dtype_allclose( + inp.einsum("z,mz,zm->", a_dp, b_dp, c_dp, optimize=do_opt), + numpy.einsum("z,mz,zm->", a, b, c, optimize=do_opt), + ) + + # singleton dimensions broadcast + a = numpy.ones((10, 2)) + b = numpy.ones((1, 2)) + a_dp = inp.array(a) + b_dp = inp.array(b) + assert_dtype_allclose( + inp.einsum("ij,ij->j", a_dp, b_dp, optimize=do_opt), + numpy.einsum("ij,ij->j", a, b, optimize=do_opt), + ) + + # a blas-compatible contraction broadcasting case + a = numpy.array([2.0, 3.0]) + b = numpy.array([4.0]) + a_dp = inp.array(a) + b_dp = inp.array(b) + assert_dtype_allclose( + inp.einsum("i, i", a_dp, b_dp, optimize=do_opt), + numpy.einsum("i, i", a, b, optimize=do_opt), + ) + + # all-ones array + a = numpy.ones((1, 5)) / 2 + b = numpy.ones((5, 5)) / 2 + a_dp = inp.array(a) + b_dp = inp.array(b) + assert_dtype_allclose( + inp.einsum("...ij,...jk->...ik", a_dp, a_dp, optimize=do_opt), + numpy.einsum("...ij,...jk->...ik", a, a, optimize=do_opt), + ) + assert_dtype_allclose( + inp.einsum("...ij,...jk->...ik", a_dp, b_dp, optimize=do_opt), + numpy.einsum("...ij,...jk->...ik", a, b, optimize=do_opt), + ) + + # special case + a = numpy.eye(2, dtype=dtype) + b = numpy.ones(2, dtype=dtype) + a_dp = inp.array(a) + b_dp = inp.array(b) + assert_dtype_allclose( # contig_contig_outstride0_two + inp.einsum("ji,i->", a_dp, b_dp, optimize=do_opt), + numpy.einsum("ji,i->", a, b, optimize=do_opt), + ) + assert_dtype_allclose( # stride0_contig_outstride0_two + inp.einsum("i,ij->", b_dp, a_dp, optimize=do_opt), + numpy.einsum("i,ij->", b, a, optimize=do_opt), + ) + assert_dtype_allclose( # contig_stride0_outstride0_two + inp.einsum("ij,i->", a_dp, b_dp, optimize=do_opt), + numpy.einsum("ij,i->", a, b, optimize=do_opt), + ) + + def test_einsum_sums_int32(self): + self.check_einsum_sums("i4") + self.check_einsum_sums("i4", True) + + def test_einsum_sums_uint32(self): + self.check_einsum_sums("u4") + self.check_einsum_sums("u4", True) + + def test_einsum_sums_int64(self): + self.check_einsum_sums("i8") + + def test_einsum_sums_uint64(self): + self.check_einsum_sums("u8") + + def test_einsum_sums_float32(self): + self.check_einsum_sums("f4") + + def test_einsum_sums_float64(self): + self.check_einsum_sums("f8") + self.check_einsum_sums("f8", True) + + def test_einsum_sums_cfloat64(self): + self.check_einsum_sums("c8") + self.check_einsum_sums("c8", True) + + def test_einsum_sums_cfloat128(self): + self.check_einsum_sums("c16") + + def test_einsum_misc(self): + for opt in [True, False]: + a = numpy.ones((1, 2)) + b = numpy.ones((2, 2, 1)) + a_dp = inp.array(a) + b_dp = inp.array(b) + expected = numpy.einsum("ij...,j...->i...", a, b, optimize=opt) + result = inp.einsum("ij...,j...->i...", a_dp, b_dp, optimize=opt) + assert_dtype_allclose(result, expected) + + a = numpy.array([1, 2, 3]) + b = numpy.array([2, 3, 4]) + a_dp = inp.array(a) + b_dp = inp.array(b) + expected = numpy.einsum("...i,...i", a, b, optimize=True) + result = inp.einsum("...i,...i", a_dp, b_dp, optimize=True) + assert_dtype_allclose(result, expected) + + a = numpy.ones((5, 12, 4, 2, 3), numpy.int64) + b = numpy.ones((5, 12, 11), numpy.int64) + a_dp = inp.array(a) + b_dp = inp.array(b) + expected = numpy.einsum("ijklm,ijn,ijn->", a, b, b, optimize=opt) + result1 = inp.einsum( + "ijklm,ijn,ijn->", a_dp, b_dp, b_dp, optimize=opt + ) + assert_dtype_allclose(result1, expected) + result2 = inp.einsum("ijklm,ijn->", a_dp, b_dp, optimize=opt) + assert_dtype_allclose(result2, expected) + + a = numpy.arange(1, 3) + b = numpy.arange(1, 5).reshape(2, 2) + c = numpy.arange(1, 9).reshape(4, 2) + a_dp = inp.array(a) + b_dp = inp.array(b) + c_dp = inp.array(c) + expected = numpy.einsum("x,yx,zx->xzy", a, b, c, optimize=opt) + result = inp.einsum("x,yx,zx->xzy", a_dp, b_dp, c_dp, optimize=opt) + assert_dtype_allclose(result, expected) + + # Ensure explicitly setting out=None does not cause an error + a = numpy.array([1]) + b = numpy.array([2]) + a_dp = inp.array(a) + b_dp = inp.array(b) + expected = numpy.einsum("i,j", a, b, out=None) + result = inp.einsum("i,j", a_dp, b_dp, out=None) + assert_dtype_allclose(result, expected) + + def test_subscript_range(self): + # make sure that all letters of Latin alphabet (both uppercase & lowercase) can be used + # when creating a subscript from arrays + a = inp.ones((2, 3)) + b = inp.ones((3, 4)) + inp.einsum(a, [0, 20], b, [20, 2], [0, 2]) + inp.einsum(a, [0, 27], b, [27, 2], [0, 2]) + inp.einsum(a, [0, 51], b, [51, 2], [0, 2]) + assert_raises(ValueError, inp.einsum, a, [0, 52], b, [52, 2], [0, 2]) + assert_raises(ValueError, inp.einsum, a, [-1, 5], b, [5, 2], [-1, 2]) + + def test_einsum_broadcast(self): + a = numpy.arange(2 * 3 * 4).reshape(2, 3, 4) + b = numpy.arange(3) + a_dp = inp.array(a) + b_dp = inp.array(b) + expected = numpy.einsum("ijk,j->ijk", a, b, optimize=False) + result = inp.einsum("ijk,j->ijk", a_dp, b_dp, optimize=False) + assert_dtype_allclose(result, expected) + for opt in [True, False]: + assert_dtype_allclose( + inp.einsum("ij...,j...->ij...", a_dp, b_dp, optimize=opt), + expected, + ) + assert_dtype_allclose( + inp.einsum("ij...,...j->ij...", a_dp, b_dp, optimize=opt), + expected, + ) + assert_dtype_allclose( + inp.einsum("ij...,j->ij...", a_dp, b_dp, optimize=opt), expected + ) + + a = numpy.arange(12).reshape((4, 3)) + b = numpy.arange(6).reshape((3, 2)) + a_dp = inp.array(a) + b_dp = inp.array(b) + expected = numpy.einsum("ik,kj->ij", a, b, optimize=False) + result = inp.einsum("ik,kj->ij", a_dp, b_dp, optimize=False) + assert_dtype_allclose(result, expected) + for opt in [True, False]: + assert_dtype_allclose( + inp.einsum("ik...,k...->i...", a_dp, b_dp, optimize=opt), + expected, + ) + assert_dtype_allclose( + inp.einsum("ik...,...kj->i...j", a_dp, b_dp, optimize=opt), + expected, + ) + assert_dtype_allclose( + inp.einsum("...k,kj", a_dp, b_dp, optimize=opt), expected + ) + assert_dtype_allclose( + inp.einsum("ik,k...->i...", a_dp, b_dp, optimize=opt), expected + ) + + dims = [2, 3, 4, 5] + a = numpy.arange(numpy.prod(dims)).reshape(dims) + v = numpy.arange(4) + a_dp = inp.array(a) + v_dp = inp.array(v) + expected = numpy.einsum("ijkl,k->ijl", a, v, optimize=False) + result = inp.einsum("ijkl,k->ijl", a_dp, v_dp, optimize=False) + assert_dtype_allclose(result, expected) + for opt in [True, False]: + assert_dtype_allclose( + inp.einsum("ijkl,k", a_dp, v_dp, optimize=opt), expected + ) + assert_dtype_allclose( + inp.einsum("...kl,k", a_dp, v_dp, optimize=opt), expected + ) + assert_dtype_allclose( + inp.einsum("...kl,k...", a_dp, v_dp, optimize=opt), expected + ) + + J, K, M = 8, 8, 6 + a = numpy.arange(J * K * M).reshape(1, 1, 1, J, K, M) + b = numpy.arange(J * K * M * 3).reshape(J, K, M, 3) + a_dp = inp.array(a) + b_dp = inp.array(b) + expected = numpy.einsum("...lmn,...lmno->...o", a, b, optimize=False) + result = inp.einsum("...lmn,...lmno->...o", a_dp, b_dp, optimize=False) + assert_dtype_allclose(result, expected) + for opt in [True, False]: + assert_dtype_allclose( + inp.einsum("...lmn,lmno->...o", a_dp, b_dp, optimize=opt), + expected, + ) + + def test_einsum_stride(self): + a = numpy.arange(2 * 3).reshape(2, 3).astype(numpy.float32) + b = numpy.arange(2 * 3 * 2731).reshape(2, 3, 2731).astype(numpy.int16) + a_dp = inp.array(a) + b_dp = inp.array(b) + expected = numpy.einsum("cl, cpx->lpx", a, b) + result = inp.einsum("cl, cpx->lpx", a_dp, b_dp) + assert_dtype_allclose(result, expected) + + a = numpy.arange(3 * 3).reshape(3, 3).astype(numpy.float64) + b = numpy.arange(3 * 3 * 64 * 64) + b = b.reshape(3, 3, 64, 64).astype(numpy.float32) + a_dp = inp.array(a) + b_dp = inp.array(b) + expected = numpy.einsum("cl, cpxy->lpxy", a, b) + result = inp.einsum("cl, cpxy->lpxy", a_dp, b_dp) + assert_dtype_allclose(result, expected) + + def test_einsum_collapsing(self): + x = numpy.random.normal(0, 1, (5, 5, 5, 5)) + y = numpy.zeros((5, 5)) + expected = numpy.einsum("aabb->ab", x, out=y) + x_dp = inp.array(x) + y_dp = inp.array(y) + result = inp.einsum("aabb->ab", x_dp, out=y_dp) + assert result is y_dp + assert_dtype_allclose(result, expected) + + def test_einsum_tensor(self): + tensor = numpy.random.random_sample((10, 10, 10, 10)) + tensor_dp = inp.array(tensor) + expected = numpy.einsum("ijij->", tensor) + result = inp.einsum("ijij->", tensor_dp) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True) + ) + def test_different_paths(self, dtype): + # Simple test, designed to exercise most specialized code paths, + # note the +0.5 for floats. This makes sure we use a float value + # where the results must be exact. + a = (numpy.arange(7) + 0.5).astype(dtype) + s = numpy.array(2, dtype=dtype) + + a_dp = inp.asarray(a) + s_dp = inp.asarray(s) + + # contig -> scalar: + expected = numpy.einsum("i->", a) + result = inp.einsum("i->", a_dp) + assert_dtype_allclose(result, expected) + + # contig, contig -> contig: + expected = numpy.einsum("i,i->i", a, a) + result = inp.einsum("i,i->i", a_dp, a_dp) + assert_dtype_allclose(result, expected) + + # noncontig, noncontig -> contig: + expected = numpy.einsum("i,i->i", a.repeat(2)[::2], a.repeat(2)[::2]) + result = inp.einsum("i,i->i", a_dp.repeat(2)[::2], a_dp.repeat(2)[::2]) + assert_dtype_allclose(result, expected) + + # contig + contig -> scalar + expected = numpy.einsum("i,i->", a, a) + result = inp.einsum("i,i->", a_dp, a_dp) + assert_dtype_allclose(result, expected) + + # contig + scalar -> contig (with out) + out_dp = inp.ones(7, dtype=dtype) + expected = numpy.einsum("i,->i", a, s) + result = inp.einsum("i,->i", a_dp, s_dp, out=out_dp) + assert result is out_dp + assert_dtype_allclose(result, expected) + + # scalar + contig -> contig (with out) + expected = numpy.einsum(",i->i", s, a) + result = inp.einsum(",i->i", s_dp, a_dp) + assert_dtype_allclose(result, expected) + + # scalar + contig -> scalar + # Use einsum to compare to not have difference due to sum round-offs: + result1 = inp.einsum(",i->", s_dp, a_dp) + result2 = inp.einsum("i->", s_dp * a_dp) + assert_array_equal(result1.asnumpy(), result2.asnumpy()) + + # contig + scalar -> scalar + # Use einsum to compare to not have difference due to sum round-offs: + result3 = inp.einsum("i,->", a_dp, s_dp) + assert_array_equal(result2.asnumpy(), result3.asnumpy()) + + # contig + contig + contig -> scalar + a = numpy.array([0.5, 0.5, 0.25, 4.5, 3.0], dtype=dtype) + a_dp = inp.array(a) + expected = numpy.einsum("i,i,i->", a, a, a) + result = inp.einsum("i,i,i->", a_dp, a_dp, a_dp) + assert_dtype_allclose(result, expected) + + # four arrays: + expected = numpy.einsum("i,i,i,i->", a, a, a, a) + result = inp.einsum("i,i,i,i->", a_dp, a_dp, a_dp, a_dp) + assert_dtype_allclose(result, expected) + + def test_small_boolean_arrays(self): + # Use array of True embedded in False. + a = numpy.zeros((16, 1, 1), dtype=inp.bool)[:2] + a[...] = True + a_dp = inp.array(a) + out_dp = inp.zeros((16, 1, 1), dtype=inp.bool)[:2] + expected = numpy.einsum("...ij,...jk->...ik", a, a) + result = inp.einsum("...ij,...jk->...ik", a_dp, a_dp, out=out_dp) + assert result is out_dp + assert_dtype_allclose(result, expected) + + def test_out_is_res(self): + a = numpy.arange(9).reshape(3, 3) + a_dp = inp.array(a) + expected = numpy.einsum("...ij,...jk->...ik", a, a) + result = inp.einsum("...ij,...jk->...ik", a_dp, a_dp, out=a_dp) + assert result is a_dp + assert_dtype_allclose(result, expected) + + def optimize_compare(self, subscripts, operands=None): + # Setup for optimize einsum + chars = "abcdefghij" + sizes = numpy.array([2, 3, 4, 5, 4, 3, 2, 6, 5, 4, 3]) + global_size_dict = dict(zip(chars, sizes)) + + # Tests all paths of the optimization function against + # conventional einsum + if operands is None: + args = [subscripts] + terms = subscripts.split("->")[0].split(",") + for term in terms: + dims = [global_size_dict[x] for x in term] + args.append(numpy.random.rand(*dims)) + else: + args = [subscripts] + operands + + dpnp_args = [args[0]] + for arr in args[1:]: + dpnp_args.append(inp.asarray(arr)) + + expected = numpy.einsum(*args) + # no optimization + result = inp.einsum(*dpnp_args, optimize=False) + assert_dtype_allclose(result, expected, factor=16) + + result = inp.einsum(*dpnp_args, optimize="greedy") + assert_dtype_allclose(result, expected, factor=16) + + result = inp.einsum(*dpnp_args, optimize="optimal") + assert_dtype_allclose(result, expected, factor=16) + + def test_hadamard_like_products(self): + # Hadamard outer products + self.optimize_compare("a,ab,abc->abc") + self.optimize_compare("a,b,ab->ab") + + def test_index_transformations(self): + # Simple index transformation cases + self.optimize_compare("ea,fb,gc,hd,abcd->efgh") + self.optimize_compare("ea,fb,abcd,gc,hd->efgh") + self.optimize_compare("abcd,ea,fb,gc,hd->efgh") + + def test_complex(self): + # Long test cases + self.optimize_compare("acdf,jbje,gihb,hfac,gfac,gifabc,hfac") + self.optimize_compare("acdf,jbje,gihb,hfac,gfac,gifabc,hfac") + self.optimize_compare("cd,bdhe,aidb,hgca,gc,hgibcd,hgac") + self.optimize_compare("abhe,hidj,jgba,hiab,gab") + self.optimize_compare("bde,cdh,agdb,hica,ibd,hgicd,hiac") + self.optimize_compare("chd,bde,agbc,hiad,hgc,hgi,hiad") + self.optimize_compare("chd,bde,agbc,hiad,bdi,cgh,agdb") + self.optimize_compare("bdhe,acad,hiab,agac,hibd") + + def test_collapse(self): + # Inner products + self.optimize_compare("ab,ab,c->") + self.optimize_compare("ab,ab,c->c") + self.optimize_compare("ab,ab,cd,cd->") + self.optimize_compare("ab,ab,cd,cd->ac") + self.optimize_compare("ab,ab,cd,cd->cd") + self.optimize_compare("ab,ab,cd,cd,ef,ef->") + + def test_expand(self): + # Outer products + self.optimize_compare("ab,cd,ef->abcdef") + self.optimize_compare("ab,cd,ef->acdf") + self.optimize_compare("ab,cd,de->abcde") + self.optimize_compare("ab,cd,de->be") + self.optimize_compare("ab,bcd,cd->abcd") + self.optimize_compare("ab,bcd,cd->abd") + + def test_edge_cases(self): + # Difficult edge cases for optimization + self.optimize_compare("eb,cb,fb->cef") + self.optimize_compare("dd,fb,be,cdb->cef") + self.optimize_compare("bca,cdb,dbf,afc->") + self.optimize_compare("dcc,fce,ea,dbf->ab") + self.optimize_compare("fdf,cdd,ccd,afe->ae") + self.optimize_compare("abcd,ad") + self.optimize_compare("ed,fcd,ff,bcf->be") + self.optimize_compare("baa,dcf,af,cde->be") + self.optimize_compare("bd,db,eac->ace") + self.optimize_compare("fff,fae,bef,def->abd") + self.optimize_compare("efc,dbc,acf,fd->abe") + self.optimize_compare("ja,ac,da->jcd") + + def test_inner_product(self): + # Inner products + self.optimize_compare("ab,ab") + self.optimize_compare("ac,ca") + self.optimize_compare("abc,abc") + self.optimize_compare("abc,bac") + self.optimize_compare("abc,cba") + + def test_random_cases(self): + # Randomly built test cases + self.optimize_compare("aab,fa,df,ecc->bde") + self.optimize_compare("ecb,fef,bad,ed->ac") + self.optimize_compare("bcf,bbb,fbf,fc->") + self.optimize_compare("bb,ff,be->e") + self.optimize_compare("bcb,bb,fc,fff->") + self.optimize_compare("fbb,dfd,fc,fc->") + self.optimize_compare("afd,ea,cc,dc->ef") + self.optimize_compare("adb,bc,fa,cfc->d") + self.optimize_compare("bbd,bda,fc,db->acf") + self.optimize_compare("dba,ead,cad->bce") + self.optimize_compare("aef,fbc,dca->bde") + + def test_combined_views_mapping(self): + a = inp.arange(9).reshape(1, 1, 3, 1, 3) + expected = numpy.einsum("bbcdc->d", a.asnumpy()) + result = inp.einsum("bbcdc->d", a) + assert_dtype_allclose(result, expected) + + def test_broadcasting_dot_cases(self): + a = numpy.random.rand(1, 5, 4) + b = numpy.random.rand(4, 6) + c = numpy.random.rand(5, 6) + d = numpy.random.rand(10) + + self.optimize_compare("ijk,kl,jl", operands=[a, b, c]) + self.optimize_compare("ijk,kl,jl,i->i", operands=[a, b, c, d]) + + e = numpy.random.rand(1, 1, 5, 4) + f = numpy.random.rand(7, 7) + self.optimize_compare("abjk,kl,jl", operands=[e, b, c]) + self.optimize_compare("abjk,kl,jl,ab->ab", operands=[e, b, c, f]) + + g = numpy.arange(64).reshape(2, 4, 8) + self.optimize_compare("obk,ijk->ioj", operands=[g, g]) + + def test_output_order(self): + # Ensure output order is respected for optimize cases, the below + # conraction should yield a reshaped tensor view + a = inp.ones((2, 3, 5), order="F") + b = inp.ones((4, 3), order="F") + + for opt in [True, False]: + tmp = inp.einsum("...ft,mf->...mt", a, b, order="a", optimize=opt) + assert tmp.flags.f_contiguous + + tmp = inp.einsum("...ft,mf->...mt", a, b, order="f", optimize=opt) + assert tmp.flags.f_contiguous + + tmp = inp.einsum("...ft,mf->...mt", a, b, order="c", optimize=opt) + assert tmp.flags.c_contiguous + + tmp = inp.einsum("...ft,mf->...mt", a, b, order="k", optimize=opt) + assert tmp.flags.c_contiguous is False + assert tmp.flags.f_contiguous is False + + tmp = inp.einsum("...ft,mf->...mt", a, b, optimize=opt) + assert tmp.flags.c_contiguous is False + assert tmp.flags.f_contiguous is False + + c = inp.ones((4, 3), order="C") + for opt in [True, False]: + tmp = inp.einsum("...ft,mf->...mt", a, c, order="a", optimize=opt) + assert tmp.flags.c_contiguous + + d = inp.ones((2, 3, 5), order="C") + for opt in [True, False]: + tmp = inp.einsum("...ft,mf->...mt", d, c, order="a", optimize=opt) + assert tmp.flags.c_contiguous class TestInv: diff --git a/tests/third_party/cupy/linalg_tests/test_einsum.py b/tests/third_party/cupy/linalg_tests/test_einsum.py index b382ec28f17..fa2e28b6aa7 100644 --- a/tests/third_party/cupy/linalg_tests/test_einsum.py +++ b/tests/third_party/cupy/linalg_tests/test_einsum.py @@ -475,15 +475,23 @@ def test_einsum_binary(self, xp, dtype_a, dtype_b): class TestEinSumBinaryOperationWithScalar: + # with an scalar, NumPy < 2.0.0 uses the other input arrays to determine + # the output type while for NumPy > 2.0.0 the scalar with default machine + # dtype is used to determine the output type + if numpy.lib.NumpyVersion(numpy.__version__) < "2.0.0": + type_check = has_support_aspect64() + else: + type_check = False + @testing.for_all_dtypes() - @testing.numpy_cupy_allclose(contiguous_check=False) + @testing.numpy_cupy_allclose(contiguous_check=False, type_check=type_check) def test_scalar_1(self, xp, dtype): shape_a = (2,) a = testing.shaped_arange(shape_a, xp, dtype) return xp.asarray(xp.einsum(",i->", 3, a)) @testing.for_all_dtypes() - @testing.numpy_cupy_allclose(contiguous_check=False) + @testing.numpy_cupy_allclose(contiguous_check=False, type_check=type_check) def test_scalar_2(self, xp, dtype): shape_a = (2,) a = testing.shaped_arange(shape_a, xp, dtype)