v923z · eleroy · Nov 25, 2024 · Nov 26, 2024 · Nov 26, 2024 · Nov 27, 2024
diff --git a/code/scipy/signal/signal.c b/code/scipy/signal/signal.c
@@ -19,6 +19,8 @@
 #include "../../ulab.h"
 #include "../../ndarray.h"
 #include "../../numpy/carray/carray_tools.h"
+#include "../../numpy/fft/fft_tools.h" // For fft
+#include "../../ulab_tools.h"
 
 #if ULAB_SCIPY_SIGNAL_HAS_SOSFILT & ULAB_MAX_DIMS > 1
 static void signal_sosfilt_array(mp_float_t *x, const mp_float_t *coeffs, mp_float_t *zf, const size_t len) {
@@ -120,11 +122,180 @@ mp_obj_t signal_sosfilt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_ar
 MP_DEFINE_CONST_FUN_OBJ_KW(signal_sosfilt_obj, 2, signal_sosfilt);
 #endif /* ULAB_SCIPY_SIGNAL_HAS_SOSFILT */
 
+#if ULAB_SCIPY_SIGNAL_HAS_OACONVOLVE
+
+mp_obj_t signal_oaconvolve(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args)
+{
+    static const mp_arg_t allowed_args[] = {
+        {MP_QSTR_a, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE}},
+        {MP_QSTR_v, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE}},
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    if (!mp_obj_is_type(args[0].u_obj, &ulab_ndarray_type) || !mp_obj_is_type(args[1].u_obj, &ulab_ndarray_type))
+    {
+        mp_raise_TypeError(MP_ERROR_TEXT("oa convolve arguments must be ndarrays"));
+    }
+
+    ndarray_obj_t *a = MP_OBJ_TO_PTR(args[0].u_obj);
+    ndarray_obj_t *c = MP_OBJ_TO_PTR(args[1].u_obj);
+// deal with linear arrays only
+#if ULAB_MAX_DIMS > 1
+    if ((a->ndim != 1) || (c->ndim != 1))
+    {
+        mp_raise_TypeError(MP_ERROR_TEXT("convolve arguments must be linear arrays"));
+    }
+#endif
+    size_t signal_len = a->len;
+    size_t kernel_len = c->len;
+    if (signal_len == 0 || kernel_len == 0)
+    {
+        mp_raise_TypeError(MP_ERROR_TEXT("convolve arguments must not be empty"));
+    }
+
+
+
+    size_t segment_len = kernel_len; // Min segment size, at least size of kernel
+    size_t fft_size = 1;
+    while (fft_size < segment_len + kernel_len - 1)
+    {
+        fft_size *= 2; // Find smallest power of 2 for fft size
+    }
+    segment_len = fft_size - kernel_len + 1; // Final segment size
+    size_t output_len = signal_len + kernel_len - 1;
+    uint8_t dtype = NDARRAY_FLOAT;
+#if ULAB_SUPPORTS_COMPLEX
+    if ((a->dtype == NDARRAY_COMPLEX) || (c->dtype == NDARRAY_COMPLEX))
+    {
+        dtype = NDARRAY_COMPLEX;
+    }
+#endif
+
+    uint8_t sz = 2 * sizeof(mp_float_t);
+
+    // Buffer allocation
+    mp_float_t *kernel_fft_array = m_new0(mp_float_t, fft_size * 2);
+    mp_float_t *segment_fft_array = m_new0(mp_float_t, fft_size * 2);
+    ndarray_obj_t *output = ndarray_new_linear_array(output_len, dtype);
+    mp_float_t *output_array = (mp_float_t *)output->array;
+    uint8_t *kernel_array = (uint8_t *)c->array;
+    uint8_t *signal_array = (uint8_t *)a->array;
+
+    // Copy kernel data
+    if (c->dtype == NDARRAY_COMPLEX)
+    {
+        for (size_t i = 0; i < kernel_len; i++)
+        {
+            memcpy(kernel_fft_array, kernel_array, sz);
+            kernel_fft_array += 2;
+            kernel_array += c->strides[ULAB_MAX_DIMS - 1];
+        }
+    }
+    else
+    {
+        mp_float_t (*func)(void *) = ndarray_get_float_function(c->dtype);
+        for (size_t i = 0; i < kernel_len; i++)
+        {
+            // real part; the imaginary part is 0, no need to assign
+            *kernel_fft_array = func(kernel_array);
+            kernel_fft_array += 2;
+            kernel_array += c->strides[ULAB_MAX_DIMS - 1];
+        }
+    }
+    kernel_fft_array -= 2 * kernel_len;
+
+    // Compute kernel fft in place
+    fft_kernel(kernel_fft_array, fft_size, 1);
+
+    mp_float_t real, imag; // For complex multiplication
+    size_t current_segment_size = segment_len;
+    mp_float_t (*funca)(void *) = ndarray_get_float_function(a->dtype);
+
+    for (size_t i = 0; i < signal_len; i += segment_len)
+    {
+        // Check if remaining data is less than segment length
+        if (i + segment_len > signal_len)
+        {
+            current_segment_size = signal_len - i;
+        }
+        // Load segment in buffer
+        memset(segment_fft_array, 0, fft_size * sz); // Reset buffer to zero
+        if (a->dtype == NDARRAY_COMPLEX)
+        {
+            for (size_t k = 0; k < current_segment_size; k++)
+            {
+                memcpy(segment_fft_array, signal_array, sz);
+                segment_fft_array += 2;
+                signal_array += a->strides[ULAB_MAX_DIMS - 1];
+            }
+        }
+        else
+        {
+            for (size_t k = 0; k < current_segment_size; k++)
+            {
+                // real part; the imaginary part is 0, no need to assign
+                *segment_fft_array = funca(signal_array);
+                segment_fft_array += 2;
+                signal_array += a->strides[ULAB_MAX_DIMS - 1];
+            }
+        }
+        segment_fft_array -= 2 * current_segment_size;
+
+        // Compute segment fft in place
+        fft_kernel(segment_fft_array, fft_size, 1);
+
+        // Product of kernel and segment fft (complex multiplication)
+        for (size_t j = 0; j < fft_size; j++)
+        {
+            real = segment_fft_array[j * 2] * kernel_fft_array[j * 2] - segment_fft_array[j * 2 + 1] * kernel_fft_array[j * 2 + 1];
+            imag = segment_fft_array[j * 2] * kernel_fft_array[j * 2 + 1] + segment_fft_array[j * 2 + 1] * kernel_fft_array[j * 2];
+            segment_fft_array[j * 2] = real;
+            segment_fft_array[j * 2 + 1] = imag;
+        }
+        // Inverse FFT in place
+        fft_kernel(segment_fft_array, fft_size, -1);
+
+        #if ULAB_SUPPORTS_COMPLEX
+        if (dtype == NDARRAY_COMPLEX)
+        {
+            // Overlap, Add and normalized inverse fft
+            for (size_t j = 0; j < fft_size * 2; j++)
+            {
+                if ((i * 2 + j) < (output_len * 2))
+                {
+                    output_array[i * 2 + j] += (segment_fft_array[j] / fft_size);
+                }
+            }
+            continue;
+        }
+        #endif
+
+
+        for (size_t j = 0; j < fft_size; j++)
+        {
+            if ((i + j) < (output_len)) // adds only real part
+            {
+                output_array[i + j] += (segment_fft_array[j * 2] / fft_size);
+            }
+        }
+
+    }
+    return MP_OBJ_FROM_PTR(output);
+}
+MP_DEFINE_CONST_FUN_OBJ_KW(signal_oaconvolve_obj, 2, signal_oaconvolve);
+#endif /* ULAB_SCIPY_SIGNAL_HAS_OACONVOLVE */
+
+
 static const mp_rom_map_elem_t ulab_scipy_signal_globals_table[] = {
     { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_signal) },
     #if ULAB_SCIPY_SIGNAL_HAS_SOSFILT & ULAB_MAX_DIMS > 1
         { MP_ROM_QSTR(MP_QSTR_sosfilt), MP_ROM_PTR(&signal_sosfilt_obj) },
     #endif
+    #if ULAB_SCIPY_SIGNAL_HAS_OACONVOLVE
+        { MP_ROM_QSTR(MP_QSTR_oaconvolve), MP_ROM_PTR(&signal_oaconvolve_obj) },
+    #endif
 };
 
 static MP_DEFINE_CONST_DICT(mp_module_ulab_scipy_signal_globals, ulab_scipy_signal_globals_table);

diff --git a/code/scipy/signal/signal.h b/code/scipy/signal/signal.h
@@ -19,5 +19,6 @@
 extern const mp_obj_module_t ulab_scipy_signal_module;
 
 MP_DECLARE_CONST_FUN_OBJ_KW(signal_sosfilt_obj);
+MP_DECLARE_CONST_FUN_OBJ_KW(signal_oaconvolve_obj);
 
 #endif /* _SCIPY_SIGNAL_ */
diff --git a/code/ulab.c b/code/ulab.c
@@ -33,7 +33,7 @@
 #include "user/user.h"
 #include "utils/utils.h"
 
-#define ULAB_VERSION 6.6.1
+#define ULAB_VERSION 6.6.2
 #define xstr(s) str(s)
 #define str(s) #s
 

diff --git a/code/ulab.h b/code/ulab.h
@@ -740,6 +740,10 @@
 #define ULAB_SCIPY_SIGNAL_HAS_SOSFILT       (1)
 #endif
 
+#ifndef ULAB_SCIPY_SIGNAL_HAS_OACONVOLVE
+#define ULAB_SCIPY_SIGNAL_HAS_OACONVOLVE       (1)
+#endif
+
 #ifndef ULAB_SCIPY_HAS_OPTIMIZE_MODULE
 #define ULAB_SCIPY_HAS_OPTIMIZE_MODULE      (1)
 #endif

diff --git a/docs/manual/source/scipy-signal.rst b/docs/manual/source/scipy-signal.rst
@@ -2,9 +2,10 @@
 scipy.signal
 ============
 
-This module defines the single function:
+This module defines the functions:
 
 1. `scipy.signal.sosfilt <#sosfilt>`__
+2. `scipy.signal.oaconvolve <#oaconvolve>`__
 
 sosfilt
 -------
@@ -64,6 +65,39 @@ initial values are assumed to be 0.
     ========================================
     zf:  array([[37242.0, 74835.0],
     	 [1026187.0, 1936542.0]], dtype=float)
+
+oaconvolve
+-------
+
+``scipy``:
+https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.oaconvolve.html
+
+Convolve two N-dimensional arrays using the overlap-add method.
+
+Convolve in1 and in2 using the overlap-add method. Similarly to numpy.convolve, 
+this method works in full mode and other modes can be obtained by slicing the result?
+
+This is generally much faster than linear convolve for large arrays (n > ~500), 
+and generally much faster than fftconvolve (not implemented yet in ulab) when one array is much larger 
+than the other (e.g. for a matched filter algorithm), but can be slower when only a few output values 
+are needed or when the arrays are very similar in shape, and can only output float arrays (int or object array inputs will be cast to float).
+
+.. code::
+
+    # code to be run in micropython
+
+    from ulab import numpy as np
+    from ulab import scipy as spy
+
+    x = np.array((1,2,3))
+    y = np.array((1,10,100,1000))
+    result = spy.signal.oaconvolve(x, y)
+    print('result: ', result)
+
+.. parsed-literal::
+
+    result:  array([1.0, 12.00024, 123.0001, 1230.0, 2300.0, 3000.0], dtype=float32)
+
 
 
 
diff --git a/tests/1d/scipy/oaconvolve.py b/tests/1d/scipy/oaconvolve.py
@@ -0,0 +1,16 @@
+import math
+
+try:
+    from ulab import scipy, numpy as np
+except ImportError:
+    import scipy
+    import numpy as np
+
+x = np.array((1,2,3))
+y = np.array((1,10,100,1000))
+result = (scipy.signal.oaconvolve(x, y))
+ref_result = np.array([1, 12, 123, 1230, 2300, 3000],dtype=np.float)
+cmp_result = []
+for p,q in zip(list(result), list(ref_result)):
+    cmp_result.append(math.isclose(p, q, rel_tol=1e-06, abs_tol=5e-04))
+print(cmp_result)
diff --git a/tests/1d/scipy/oaconvolve.py.exp b/tests/1d/scipy/oaconvolve.py.exp
@@ -0,0 +1 @@
+[True, True, True, True, True, True]