From 68017a2238ddd01f40c3d8b78997c29dd9a8073e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Fri, 19 Jul 2024 20:04:20 +0200
Subject: [PATCH] try to reduce firmware size

---
 code/ndarray.c         | 40 ++++++++++++++++++++++++++--------------
 code/ndarray.h         |  2 +-
 code/numpy/create.c    | 15 +++++++++++++--
 code/numpy/numerical.c |  2 +-
 4 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/code/ndarray.c b/code/ndarray.c
index bae6d97c..4e6ecd95 100644
--- a/code/ndarray.c
+++ b/code/ndarray.c
@@ -509,7 +509,7 @@ static size_t multiply_size(size_t a, size_t b) {
     return result;
 }
 
-ndarray_obj_t *ndarray_new_ndarray(uint8_t ndim, size_t *shape, int32_t *strides, uint8_t dtype, uint8_t *buffer) {
+ndarray_obj_t *ndarray_new_ndarray(uint8_t ndim, size_t *shape, int32_t *strides, uint8_t dtype) {
     // Creates the base ndarray with shape, and initialises the values to straight 0s
     ndarray_obj_t *ndarray = m_new_obj(ndarray_obj_t);
     ndarray->base.type = &ulab_ndarray_type;
@@ -536,10 +536,7 @@ ndarray_obj_t *ndarray_new_ndarray(uint8_t ndim, size_t *shape, int32_t *strides
 
     // if the length is 0, still allocate a single item, so that contractions can be handled
     size_t len = multiply_size(ndarray->itemsize, MAX(1, ndarray->len));
-    uint8_t *array = buffer;
-    if(buffer == NULL) {
-        array = m_new0(byte, len);
-    }
+    uint8_t *array = m_new0(byte, len);
     // this should set all elements to 0, irrespective of the of the dtype (all bits are zero)
     // we could, perhaps, leave this step out, and initialise the array only, when needed
     ndarray->array = array;
@@ -555,24 +552,39 @@ ndarray_obj_t *ndarray_new_dense_ndarray(uint8_t ndim, size_t *shape, uint8_t dt
     // for(size_t i = ULAB_MAX_DIMS; i > 1; i--) {
     //     strides[i-2] = strides[i-1] * MAX(1, shape[i-1]);
     // }
-    return ndarray_new_ndarray(ndim, shape, NULL, dtype, NULL);
+    return ndarray_new_ndarray(ndim, shape, NULL, dtype);
 }
 
 ndarray_obj_t *ndarray_ndarray_from_buffer(uint8_t ndim, size_t *shape, uint8_t dtype, mp_obj_t inbuffer, size_t offset) {
     // creates a dense array from a buffer
 
-    uint8_t sz = ulab_binary_get_size(dtype);
-    size_t len = sz; 
-    for(uint8_t i = ULAB_MAX_DIMS; i > ULAB_MAX_DIMS - ndim; i--) {
-        len = multiply_size(len, shape[i - 1]);
-    }
     mp_buffer_info_t bufinfo;
     if(mp_get_buffer(inbuffer, &bufinfo, MP_BUFFER_READ)) {
+        ndarray_obj_t *ndarray = m_new_obj(ndarray_obj_t);
+        ndarray->base.type = &ulab_ndarray_type;
+        ndarray->dtype = dtype == NDARRAY_BOOL ? NDARRAY_UINT8 : dtype;
+        ndarray->boolean = dtype == NDARRAY_BOOL ? NDARRAY_BOOLEAN : NDARRAY_NUMERIC;
+        ndarray->ndim = ndim;
+        ndarray->len = ndim == 0 ? 0 : 1;
+        ndarray->itemsize = ulab_binary_get_size(dtype);
+        int32_t *_strides = strides_from_shape(shape, ndarray->dtype);
+        for(uint8_t i=ULAB_MAX_DIMS; i > ULAB_MAX_DIMS-ndim; i--) {
+            ndarray->shape[i - 1] = shape[i - 1];
+            ndarray->strides[i - 1] = _strides[i - 1];
+            ndarray->len = multiply_size(ndarray->len, shape[i - 1]);
+        }
+
+        if (SIZE_MAX / ndarray->itemsize <= ndarray->len) {
+            mp_raise_ValueError(MP_ERROR_TEXT("ndarray length overflows"));
+        }
+        size_t len = multiply_size(ndarray->itemsize, MAX(1, ndarray->len));
         if(len > (bufinfo.len - offset)) {
             mp_raise_ValueError(MP_ERROR_TEXT("buffer size is not compatible with shape"));
         }
         uint8_t *buffer = bufinfo.buf;
-        return ndarray_new_ndarray(ndim, shape, NULL, dtype, buffer + offset);
+        ndarray->array = buffer + offset;
+        ndarray->origin = buffer + offset;
+        return ndarray;
     } else {
         mp_raise_ValueError(MP_ERROR_TEXT("can't read from input buffer"));
     }
@@ -673,7 +685,7 @@ ndarray_obj_t *ndarray_copy_view(ndarray_obj_t *source) {
     if(source->boolean) {
         dtype = NDARRAY_BOOL;
     }
-    ndarray_obj_t *ndarray = ndarray_new_ndarray(source->ndim, source->shape, strides, dtype, NULL);
+    ndarray_obj_t *ndarray = ndarray_new_ndarray(source->ndim, source->shape, strides, dtype);
     ndarray_copy_array(source, ndarray, 0);
     return ndarray;
 }
@@ -1907,7 +1919,7 @@ mp_obj_t ndarray_unary_op(mp_unary_op_t op, mp_obj_t self_in) {
             #if ULAB_SUPPORTS_COMPLEX
             if(self->dtype == NDARRAY_COMPLEX) {
                 int32_t *strides = strides_from_shape(self->shape, NDARRAY_FLOAT);
-                ndarray_obj_t *target = ndarray_new_ndarray(self->ndim, self->shape, strides, NDARRAY_FLOAT, NULL);
+                ndarray_obj_t *target = ndarray_new_ndarray(self->ndim, self->shape, strides, NDARRAY_FLOAT);
                 ndarray = MP_OBJ_TO_PTR(carray_abs(self, target));
             } else {
             #endif
diff --git a/code/ndarray.h b/code/ndarray.h
index 877cec28..d16a06cd 100644
--- a/code/ndarray.h
+++ b/code/ndarray.h
@@ -189,7 +189,7 @@ ndarray_obj_t *ndarray_from_iterable(mp_obj_t , uint8_t );
 ndarray_obj_t *ndarray_new_dense_ndarray(uint8_t , size_t *, uint8_t );
 ndarray_obj_t *ndarray_ndarray_from_buffer(uint8_t , size_t *, uint8_t , mp_obj_t , size_t );
 ndarray_obj_t *ndarray_new_ndarray_from_tuple(mp_obj_tuple_t *, uint8_t );
-ndarray_obj_t *ndarray_new_ndarray(uint8_t , size_t *, int32_t *, uint8_t , uint8_t *);
+ndarray_obj_t *ndarray_new_ndarray(uint8_t , size_t *, int32_t *, uint8_t );
 ndarray_obj_t *ndarray_new_linear_array(size_t , uint8_t );
 ndarray_obj_t *ndarray_new_view(ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t );
 bool ndarray_is_dense(ndarray_obj_t *);
diff --git a/code/numpy/create.c b/code/numpy/create.c
index fac9fd16..f427e374 100644
--- a/code/numpy/create.c
+++ b/code/numpy/create.c
@@ -838,8 +838,19 @@ mp_obj_t create_frombuffer(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
                 len = count;
             }
         }
-        size_t *shape = ndarray_shape_vector(0, 0, 0, len);
-        return ndarray_ndarray_from_buffer(1, shape, dtype, args[0].u_obj, offset);
+        ndarray_obj_t *ndarray = m_new_obj(ndarray_obj_t);
+        ndarray->base.type = &ulab_ndarray_type;
+        ndarray->dtype = dtype == NDARRAY_BOOL ? NDARRAY_UINT8 : dtype;
+        ndarray->boolean = dtype == NDARRAY_BOOL ? NDARRAY_BOOLEAN : NDARRAY_NUMERIC;
+        ndarray->ndim = 1;
+        ndarray->len = len;
+        ndarray->itemsize = sz;
+        ndarray->shape[ULAB_MAX_DIMS - 1] = len;
+        ndarray->strides[ULAB_MAX_DIMS - 1] = sz;
+
+        uint8_t *buffer = bufinfo.buf;
+        ndarray->array = buffer + offset;
+        return MP_OBJ_FROM_PTR(ndarray);
     }
     return mp_const_none;
 }
diff --git a/code/numpy/numerical.c b/code/numpy/numerical.c
index 68cf44dd..e3b42525 100644
--- a/code/numpy/numerical.c
+++ b/code/numpy/numerical.c
@@ -746,7 +746,7 @@ mp_obj_t numerical_argsort(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
     numerical_reduce_axes(ndarray, ax, shape, strides);
 
     // We could return an NDARRAY_UINT8 array, if all lengths are shorter than 256
-    ndarray_obj_t *indices = ndarray_new_ndarray(ndarray->ndim, ndarray->shape, NULL, NDARRAY_UINT16, NULL);
+    ndarray_obj_t *indices = ndarray_new_ndarray(ndarray->ndim, ndarray->shape, NULL, NDARRAY_UINT16);
     int32_t *istrides = m_new0(int32_t, ULAB_MAX_DIMS);
     numerical_reduce_axes(indices, ax, shape, istrides);