diff --git a/.codecov.yml b/.codecov.yml index edbdb12..7287653 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -1,6 +1,4 @@ +# Ignore files ignore: - - "tinynumpy/tests/**/*.py" # Ignore files in the tests directory - - "tinynumpy/benchmark.py" - -range: 70..90 # First number represents red, and second represents green -round: down # up, down, or nearest \ No newline at end of file + - "tinynumpy/tests/*" + - "tinynumpy/benchmark.py" \ No newline at end of file diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 420e6a6..9f59057 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -51,7 +51,7 @@ jobs: - name: Run tests with Python ${{ matrix.python-version }} run: | - python${{ matrix.python-version }} -m pytest --cov=tinynumpy --cov-report=xml tinynumpy/tests + python -m pytest --cov=tinynumpy --cov-report=xml tinynumpy/tests - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v4.4.0 diff --git a/README.md b/README.md index 81360a5..7b37933 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,6 @@ Caveats ------- * ndarray.flat iterator cannot be indexed (it is a generator). -* No support for Fortran order. * Support for data types limited to bool, uin8, uint16, uint32, uint64, int8, int16, int32, int64, float32, float64. * Functions that calculate statistics on the data are much slower, since diff --git a/tinynumpy/tests/test_tinynumpy.py b/tinynumpy/tests/test_tinynumpy.py index c981832..13bfa76 100644 --- a/tinynumpy/tests/test_tinynumpy.py +++ b/tinynumpy/tests/test_tinynumpy.py @@ -9,6 +9,7 @@ import pytest from pytest import raises, skip +import faulthandler try: import tinynumpy.tinynumpy as tnp @@ -51,27 +52,117 @@ def test_shapes_and_strides(): else: assert len(repr(b)) > (b.size * 3) # "x.0" for each element +def test_strides_for_shape(): + + shapes_itemsize = [ + ((3,), 4, 'C', (4,)), + ((3,), 4, 'F', (4,)), + ((3, 4), 4, 'C', (16, 4)), + ((3, 4), 4, 'F', (4, 12)), + ((3, 4, 2), 4, 'C', (32, 8, 4)), + ((3, 4, 2), 4, 'F', (4, 12, 48)), + ((5, 4, 3), 8, 'C', (96, 24, 8)), + ((5, 4, 3), 8, 'F', (8, 40, 160)), + ] + + for shape, itemsize, order, expected_strides in shapes_itemsize: + + actual_strides = tnp._strides_for_shape(shape, itemsize, order) + + dtype = f'int{itemsize * 8}' + a = np.empty(shape, dtype=dtype, order=order) + numpy_strides = a.strides + + # check against numpy + assert actual_strides == numpy_strides, f"For shape {shape}, order {order}: Expected {actual_strides}, got {numpy_strides}" + +def test_c_order(): + a = tnp.array([1, 2, 3], order='C') + assert a.flags['C_CONTIGUOUS'] == True + assert a.flags['F_CONTIGUOUS'] == True + + b = tnp.array([[1, 2, 3], [4, 5, 6]], order='C') + assert b.flags['C_CONTIGUOUS'] == True + assert b.flags['F_CONTIGUOUS'] == False + +def test_f_order(): + a = np.array([1, 2, 3], order='F') + assert a.flags['C_CONTIGUOUS'] == True + assert a.flags['F_CONTIGUOUS'] == True + + b = tnp.array([[1, 2, 3], [4, 5, 6]], order='F') + assert b.flags['C_CONTIGUOUS'] == False + assert b.flags['F_CONTIGUOUS'] == True + +def test_unspecified_order(): + a = tnp.array([1, 2, 3]) + assert a.flags['C_CONTIGUOUS'] == True + assert a.flags['F_CONTIGUOUS'] == True + + b = tnp.array([[1, 2, 3], [4, 5, 6]]) + assert b.flags['C_CONTIGUOUS'] == True + assert b.flags['F_CONTIGUOUS'] == False + +def test_empty_array(): + a = tnp.array([], order='C') + assert a.flags['C_CONTIGUOUS'] == True + assert a.flags['F_CONTIGUOUS'] == True + + b = tnp.array([], order='F') + assert b.flags['C_CONTIGUOUS'] == True + assert b.flags['F_CONTIGUOUS'] == True + +def test_multiple_dimensions(): + a = tnp.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], order='C') + assert a.flags['C_CONTIGUOUS'] == True + assert a.flags['F_CONTIGUOUS'] == False + + skip() + b = tnp.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], order='F') + assert b.flags['C_CONTIGUOUS'] == False + assert b.flags['F_CONTIGUOUS'] == True + + +def test_ndarray_int_conversion(): + # Test case 1: Array with size 1 + a = tnp.array([42]) + assert int(a) == 42 + + # Test case 2: Array with size > 1 + b = tnp.array([1, 2, 3]) + try: + int(b) + except TypeError as e: + assert str(e) == 'Only length-1 arrays can be converted to scalar' + else: + assert False, "Expected TypeError not raised" + + # edge scenarios + c = tnp.array([], dtype='int32') + try: + int(c) + except TypeError as e: + assert str(e) == 'Only length-1 arrays can be converted to scalar' + else: + assert False, "Expected TypeError not raised" + def test_repr(): - for dtype in ['float32', 'float64', 'int32', 'int64']: - for data in [[1, 2, 3, 4, 5, 6, 7, 8], - [[1, 2], [3, 4], [5, 6], [7, 8]], - [[[1, 2], [3, 4]],[[5, 6], [7, 8]]], - ]: + for data in [ + [1, 2, 3, 4, 5, 6, 7, 8], + [[1, 2], [3, 4], [5, 6], [7, 8]], + [[[1, 2], [3, 4]], [[5, 6], [7, 8]]], + ]: a = np.array(data, dtype) b = tnp.array(data, dtype) - # Compare line by line (forget leading whitespace) - charscompared = 0 + for l1, l2 in zip(repr(a).splitlines(), repr(b).splitlines()): l1, l2 = l1.rstrip(), l2.rstrip() l1, l2 = l1.split('dtype=')[0], l2.split('dtype=')[0] - l1 = l1.replace(' ', '').replace('\t', '').rstrip(',)') + l1 = l1.replace(' ', '').replace('\t', '').rstrip(',)').replace('.', '') l2 = l2.replace(' ', '').replace('\t', '').rstrip(',)') assert l1 == l2 - charscompared += len(l1) - assert charscompared > (3 * b.size) - 2 - def test__float__(): @@ -393,7 +484,6 @@ def test_dtype(): def test_reshape(): - a = np.array([1, 2, 3, 4, 5, 6, 7, 8], dtype='int32') b = tnp.array([1, 2, 3, 4, 5, 6, 7, 8], dtype='int32') @@ -424,6 +514,11 @@ def test_reshape(): assert b2.base is b assert a2[:].base is a assert b2[:].base is b + + # Test reshape + reshaped_a = a.reshape((4, 2)) + reshaped_b = b.reshape((4, 2)) + assert(reshaped_a == reshaped_b).all() # Fail with raises(ValueError): # Invalid shape @@ -542,6 +637,33 @@ def test_getitem(): a = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) b = tnp.array([[1, 2, 3, 4], [5, 6, 7, 8]]) + +def test_get_step(): + # Test for C-contiguous arrays + c_array = np.array([[1, 2, 3], [4, 5, 6]], order='C') + tnp_c_array = tnp.array([[1, 2, 3], [4, 5, 6]], order='C') + assert tnp._get_step(c_array, order='C') == 1 + assert tnp._get_step(tnp_c_array, order='C') == 1 + + # Test for F-contiguous arrays + f_array = np.array([[1, 2, 3], [4, 5, 6]], order='F') + tnp_f_array = tnp.array([[1, 2, 3], [4, 5, 6]], order='F') + assert tnp._get_step(f_array, order='F') == 0 + assert tnp._get_step(tnp_f_array, order='F') == 0 + + # Test for non-contiguous arrays + nc_array = c_array[:, ::2] + tnp_nc_array = tnp_c_array[:, ::2] + assert tnp._get_step(nc_array) == 0 + assert tnp._get_step(tnp_nc_array) == 0 + + # Test for non-contiguous arrays with Fortran order + f_nc_array = f_array[::2, :] + tnp_f_nc_array = tnp_f_array[::2, :] + assert tnp._get_step(f_nc_array, order='F') == 0 + assert tnp._get_step(tnp_f_nc_array, order='F') == 0 + + def test_setitem_writeable(): a = tnp.array([1, 2, 3]) @@ -557,6 +679,41 @@ def test_setitem_writeable(): with pytest.raises(ValueError): a = tnp.array([1, 2, 3]) a.flags = {'WRITEBACKIFCOPY': True} + + +def test_asfortranarray(): + """test the asfortranarray function for tinynumpy""" + + a = tnp.array([[1, 2, 3], [4, 5, 6]]) + if a.ndim >= 1: + b = tnp.asfortranarray(a) + result_F = b.flags['F_CONTIGUOUS'] + result_C = b.flags['C_CONTIGUOUS'] + assert result_F == True + assert result_C == False + + assert b.flags['OWNDATA'] == False + assert b.flags['WRITEABLE'] == True + assert b.flags['ALIGNED'] == True + assert b.flags['WRITEBACKIFCOPY'] == False + + expected_data = tnp.array([[1, 2, 3], [4, 5, 6]]) + for i in range(b.shape[0]): + for j in range(b.shape[1]): + assert b[i, j] == expected_data[i][j] + + b = tnp.array([1, 2, 3]) + if b.ndim <= 1: + c = tnp.asfortranarray(b) + result_F = c.flags['F_CONTIGUOUS'] + result_C = b.flags['C_CONTIGUOUS'] + assert result_F == True + assert result_C == True + + assert b.flags['OWNDATA'] == True + assert b.flags['WRITEABLE'] == True + assert b.flags['ALIGNED'] == True + assert b.flags['WRITEBACKIFCOPY'] == False def test_transpose(): @@ -753,6 +910,7 @@ def test_divide(): assert a == tnp.array([5, -4, 1], dtype='int64') + def test_multiply(): """test the addition function for tinynumpy""" @@ -911,7 +1069,7 @@ def test_linspace(): # data types result = tnp.linspace(0, 1, dtype='float64') assert result.dtype == 'float64' - + def test_astype(): """test the astype function for tinynumpy""" for dtype in ['bool', 'int8', 'uint8', 'int16', 'uint16', @@ -951,4 +1109,4 @@ def test_astype(): assert result == expected_result_float32 expected_result_float64 = tnp.array([ 1., 2., 3.], dtype='float64') - assert result == expected_result_float64 + assert result == expected_result_float64 \ No newline at end of file diff --git a/tinynumpy/tinynumpy.py b/tinynumpy/tinynumpy.py index 965e7c4..32bfdae 100644 --- a/tinynumpy/tinynumpy.py +++ b/tinynumpy/tinynumpy.py @@ -36,7 +36,6 @@ # todo: mathematical operators # todo: more methods? # todo: logspace, meshgrid -# todo: Fortran order? from __future__ import division from __future__ import absolute_import @@ -94,14 +93,17 @@ def _ceildiv(a, b): return -(-a // b) -def _get_step(view): +def _get_step(view, order='C'): """ Return step to walk over array. If 1, the array is fully C-contiguous. If 0, the striding is such that one cannot step through the array. """ cont_strides = _strides_for_shape(view.shape, view.itemsize) - step = view.strides[-1] // cont_strides[-1] + if order == 'C': + step = view.strides[-1] // cont_strides[-1] + elif order == 'F': + step = view.strides[0] // cont_strides[0] corrected_strides = tuple([i * step for i in cont_strides]) almost_cont = view.strides == corrected_strides @@ -110,14 +112,18 @@ def _get_step(view): else: return 0 # not contiguous +def _strides_for_shape(shape, itemsize, order='C'): + strides = [0] * len(shape) + if order == 'C': + strides[-1] = itemsize + for i in range(len(shape) - 2, -1, -1): + strides[i] = strides[i + 1] * shape[i + 1] + elif order == 'F': + strides[0] = itemsize + for i in range(1, len(shape)): + strides[i] = strides[i - 1] * shape[i - 1] + return tuple(strides) -def _strides_for_shape(shape, itemsize): - strides = [] - stride_product = 1 - for s in reversed(shape): - strides.append(stride_product) - stride_product *= s - return tuple([i * itemsize for i in reversed(strides)]) def _size_for_shape(shape): @@ -146,14 +152,19 @@ def _shape_from_object_r(element, axis): return tuple(shape) -def _assign_from_object(array, obj): - def _assign_from_object_r(element, indicies): +def _assign_from_object(array, obj, order): + def _assign_from_object_r(element, indices): if isinstance(element, list): for i, e in enumerate(element): - _assign_from_object_r(e, indicies + [i]) + new_indices = indices + [i] + _assign_from_object_r(e, new_indices) else: - array[tuple(indicies)] = element + if order == 'F': + indices = indices[::1] + array[tuple(indices)] = element + _assign_from_object_r(obj, []) + return array def _increment_mutable_key(key, shape): @@ -232,9 +243,11 @@ def array(obj, dtype=None, copy=True, order=None): el = el[0] if isinstance(el, int): dtype = 'int64' + if order is None: + order = 'C' # Create array - a = ndarray(shape, dtype, order=None) - _assign_from_object(a, obj) + a = ndarray(shape, dtype, order=order) + _assign_from_object(a, obj, order) return a @@ -242,6 +255,7 @@ def zeros_like(a, dtype=None, order=None): """ Return an array of zeros with the same shape and type as a given array. """ dtype = a.dtype if dtype is None else dtype + order = 'C' if order is None else order return zeros(a.shape, dtype, order) @@ -249,6 +263,7 @@ def ones_like(a, dtype=None, order=None): """ Return an array of ones with the same shape and type as a given array. """ dtype = a.dtype if dtype is None else dtype + order = 'C' if order is None else order return ones(a.shape, dtype, order) @@ -256,18 +271,21 @@ def empty_like(a, dtype=None, order=None): """ Return a new array with the same shape and type as a given array. """ dtype = a.dtype if dtype is None else dtype + order = 'C' if order is None else order return empty(a.shape, dtype, order) def zeros(shape, dtype=None, order=None): """Return a new array of given shape and type, filled with zeros """ + order = 'C' if order is None else order return empty(shape, dtype, order) def ones(shape, dtype=None, order=None): """Return a new array of given shape and type, filled with ones """ + order = 'C' if order is None else order a = empty(shape, dtype, order) a.fill(1) return a @@ -286,6 +304,7 @@ def eye(size): def empty(shape, dtype=None, order=None): """Return a new array of given shape and type, without initializing entries """ + order = 'C' if order is None else order return ndarray(shape, dtype, order=order) @@ -431,6 +450,26 @@ def reshape(X,shape): return X.reshape(shape) +def asfortranarray(self): + """ + Convert the array to F-contiguous order. + + Returns: + ndarray: A new array in F-contiguous order. + + """ + + # calculate new strides + strides = _strides_for_shape(self.shape, self._itemsize) + + # create new object with the same data from buffer + out = ndarray(self._shape, dtype=self._dtype, buffer=self._data, + offset=self._offset, strides=strides) + out._asfortranarray = True + + return out + + class ndarray(object): """ ndarray(shape, dtype='float64', buffer=None, offset=0, strides=None, order=None) @@ -462,7 +501,7 @@ class ndarray(object): Offset of array data in buffer. strides : tuple of ints, optional Strides of data in memory. - order : {'C', 'F'}, optional NOT SUPPORTED + order : {'C', 'F'}, optional Row-major or column-major order. Attributes @@ -526,14 +565,11 @@ class ndarray(object): """ __slots__ = ['_dtype', '_shape', '_strides', '_itemsize', - '_offset', '_base', '_data', '_flags_bool'] + '_offset', '_base', '_data', '_flags_bool', '_asfortranarray'] def __init__(self, shape, dtype='float64', buffer=None, offset=0, - strides=None, order=None): + strides=None, order='C'): - # Check order - if order is not None: - raise RuntimeError('ndarray order parameter is not supported') # Check and set shape try : assert isinstance(shape, Iterable) @@ -542,6 +578,7 @@ def __init__(self, shape, dtype='float64', buffer=None, offset=0, raise AssertionError('The shape must be tuple or list') assert all([isinstance(x, int) for x in shape]) self._shape = shape + # Check and set dtype dtype = _convert_dtype(dtype) if (dtype is not None) else 'float64' if dtype not in _known_dtypes: @@ -556,19 +593,28 @@ def __init__(self, shape, dtype='float64', buffer=None, offset=0, # Check and set offset and strides assert offset == 0 self._offset = 0 - assert strides is None - self._strides = _strides_for_shape(self._shape, self.itemsize) # Set flag to true by default self._flags_bool = True - + # Check order + if order == 'C': + strides = _strides_for_shape(shape, self._itemsize, order='C') + elif order == 'F': + strides = _strides_for_shape(shape, self._itemsize, order='F') + self._strides = strides + self.flags = { + 'C_CONTIGUOUS': (order == 'C' or self.ndim <= 1), + 'F_CONTIGUOUS': (order == 'F' or self.ndim <= 1) + } else: # Existing array if isinstance(buffer, ndarray) and buffer.base is not None: buffer = buffer.base - # Keep a reference to avoid memory cleanup + # Keep a reference to e memory cleanup self._base = buffer # WRITEABLE should be True when creating a view self._flags_bool = True + # Check to keep track of asfortranarray() and @property flag + self._asfortranarray = False # for ndarray we use the data property if isinstance(buffer, ndarray): buffer = buffer.data @@ -582,11 +628,18 @@ def __init__(self, shape, dtype='float64', buffer=None, offset=0, assert all([isinstance(x, int) for x in strides]) assert len(strides) == len(shape) self._strides = strides - - # Define our buffer class - buffersize = self._strides[0] * self._shape[0] // self._itemsize - buffersize += self._offset - BufferClass = _convert_dtype(dtype, 'ctypes') * buffersize + + # If order is F we need to loop + if order == 'F': + total_elements = 1 + for dim in shape: + total_elements += dim + buffersize = total_elements + BufferClass = _convert_dtype(dtype, 'ctypes') * buffersize + else: + buffersize = self._strides[0] * self._shape[0] // self._itemsize + buffersize += self._offset + BufferClass = _convert_dtype(dtype, 'ctypes') * buffersize # Create buffer if buffer is None: self._data = BufferClass() @@ -708,38 +761,40 @@ def __int__(self): else: raise TypeError('Only length-1 arrays can be converted to scalar') + def _repr_r(self, s, axis, offset): + axisindent = min(2, max(0, (self.ndim - axis - 1))) + if axis < len(self._shape): + s += '[' + for k_index in range(self._shape[axis]): + if k_index > 0: + s += ('\n ' + ' ' * axis) * axisindent + if axis == self.ndim - 1: # Last axis + offset_ = offset + k_index * self._strides[axis] // self._itemsize + elem_repr = repr(self._data[offset_]) + if self._dtype.startswith('float'): + if elem_repr.endswith('.0'): + elem_repr = elem_repr[:-2] # Remove trailing '.0' + s += elem_repr + else: + offset_ = offset + k_index * self._strides[axis] // self._itemsize + s = self._repr_r(s, axis + 1, offset_) + if k_index < self._shape[axis] - 1: + s += ', ' + s += ']' + return s + def __repr__(self): # If more than 100 elements, show short repr if self.size > 100: - shapestr = 'x'.join([str(i) for i in self.shape]) - return '' % (shapestr, self.dtype, id(self)) + shapestr = 'x'.join(str(i) for i in self._shape) + return f'' + # Otherwise, try to show in nice way - def _repr_r(s, axis, offset): - axisindent = min(2, max(0, (self.ndim - axis - 1))) - if axis < len(self.shape): - s += '[' - for k_index, k in enumerate(range(self.shape[axis])): - if k_index > 0: - s += ('\n ' + ' ' * axis) * axisindent - offset_ = offset + k * self._strides[axis] // self.itemsize - s = _repr_r(s, axis+1, offset_) - if k_index < self.shape[axis] - 1: - s += ', ' - s += ']' - else: - r = repr(self.data[offset]) - if '.' in r: - r = ' ' + r - if r.endswith('.0'): - r = r[:-1] - s += r - return s - - s = _repr_r('', 0, self._offset) - if self.dtype != 'float64' and self.dtype != 'int32': - return "array(" + s + ", dtype='%s')" % self.dtype + s = self._repr_r('', 0, self._offset) + if self._dtype not in {'float64', 'int32'}: + return f"array({s}, dtype='{self._dtype}')" else: - return "array(" + s + ")" + return f"array({s})" def __eq__(self, other): if other.__module__.split('.')[0] == 'numpy': @@ -1156,11 +1211,12 @@ def T(self): @property def flags(self): c_cont = _get_step(self) == 1 - return {'C_CONTIGUOUS': c_cont, - 'F_CONTIGUOUS': (c_cont and self.ndim < 2), - 'OWNDATA': (self._base is None), + f_cont = _get_step(self) == 1 + return {'C_CONTIGUOUS': (c_cont and not self._asfortranarray), + 'F_CONTIGUOUS': (f_cont and self.ndim <=1 or self._asfortranarray), + 'OWNDATA': self._base is None, 'WRITEABLE': self._flags_bool, - 'ALIGNED': c_cont, + 'ALIGNED': True, 'WRITEBACKIFCOPY': False} @flags.setter @@ -1168,6 +1224,10 @@ def flags(self, value): if isinstance(value, dict): if 'WRITEABLE' in value: self._flags_bool = value['WRITEABLE'] + if 'F_CONTIGUOUS' in value: + self._asfortranarray = value['F_CONTIGUOUS'] + if 'C_CONTIGUOUS' in value: + self._asfortranarray = not value['C_CONTIGUOUS'] if 'WRITEBACKIFCOPY' in value and value['WRITEBACKIFCOPY'] == True: raise ValueError("can't set WRITEBACKIFCOPY to True") @@ -1216,14 +1276,13 @@ def reshape(self, newshape): return out def transpose(self): - # Numpy returns a view, but we cannot do that since we do not - # support Fortran ordering + ndim = self.ndim if ndim < 2: return self.view() shape = self.shape[::-1] out = empty(shape, self.dtype) - # + if ndim == 2: for i in range(self.shape[0]): out[:, i] = self[i, :]