Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Overlap and Add algorithm for fast convolution of long array #695

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 149 additions & 0 deletions code/scipy/signal/signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* 2020 Scott Shawcroft for Adafruit Industries
* 2020-2021 Zoltán Vörös
* 2020 Taku Fukada
* 2024 Edouard Leroy (oaconvolve implementation)
*/

#include <math.h>
Expand All @@ -19,6 +20,8 @@
#include "../../ulab.h"
eleroy marked this conversation as resolved.
Show resolved Hide resolved
#include "../../ndarray.h"
#include "../../numpy/carray/carray_tools.h"
#include "../../numpy/fft/fft_tools.h"
#include "../../ulab_tools.h"

#if ULAB_SCIPY_SIGNAL_HAS_SOSFILT & ULAB_MAX_DIMS > 1
static void signal_sosfilt_array(mp_float_t *x, const mp_float_t *coeffs, mp_float_t *zf, const size_t len) {
Expand Down Expand Up @@ -120,11 +123,157 @@ mp_obj_t signal_sosfilt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_ar
MP_DEFINE_CONST_FUN_OBJ_KW(signal_sosfilt_obj, 2, signal_sosfilt);
#endif /* ULAB_SCIPY_SIGNAL_HAS_SOSFILT */

#if ULAB_SCIPY_SIGNAL_HAS_OACONVOLVE

mp_obj_t signal_oaconvolve(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
static const mp_arg_t allowed_args[] = {
{MP_QSTR_a, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE}},
{MP_QSTR_v, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE}},
};

mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);

if (!mp_obj_is_type(args[0].u_obj, &ulab_ndarray_type) || !mp_obj_is_type(args[1].u_obj, &ulab_ndarray_type)) {
mp_raise_TypeError(MP_ERROR_TEXT("oa convolve arguments must be ndarrays"));
}

ndarray_obj_t *a = MP_OBJ_TO_PTR(args[0].u_obj);
ndarray_obj_t *c = MP_OBJ_TO_PTR(args[1].u_obj);
// deal with linear arrays only
#if ULAB_MAX_DIMS > 1
if ((a->ndim != 1) || (c->ndim != 1)) {
mp_raise_TypeError(MP_ERROR_TEXT("convolve arguments must be linear arrays"));
}
#endif
size_t signal_len = a->len;
size_t kernel_len = c->len;
if (signal_len == 0 || kernel_len == 0) {
mp_raise_TypeError(MP_ERROR_TEXT("convolve arguments must not be empty"));
}



size_t segment_len = kernel_len; // Min segment size, at least size of kernel
size_t fft_size = 1;
while (fft_size < segment_len + kernel_len - 1) {
fft_size *= 2; // Find smallest power of 2 for fft size
}
segment_len = fft_size - kernel_len + 1; // Final segment size
size_t output_len = signal_len + kernel_len - 1;
uint8_t dtype = NDARRAY_FLOAT;
#if ULAB_SUPPORTS_COMPLEX
if ((a->dtype == NDARRAY_COMPLEX) || (c->dtype == NDARRAY_COMPLEX)) {
dtype = NDARRAY_COMPLEX;
}
#endif

uint8_t sz = 2 * sizeof(mp_float_t);

// Buffer allocation
mp_float_t *kernel_fft_array = m_new0(mp_float_t, fft_size * 2);
mp_float_t *segment_fft_array = m_new0(mp_float_t, fft_size * 2);
ndarray_obj_t *output = ndarray_new_linear_array(output_len, dtype);
mp_float_t *output_array = (mp_float_t *)output->array;
uint8_t *kernel_array = (uint8_t *)c->array;
uint8_t *signal_array = (uint8_t *)a->array;

// Copy kernel data
if (c->dtype == NDARRAY_COMPLEX) {
for (size_t i = 0; i < kernel_len; i++){
memcpy(kernel_fft_array, kernel_array, sz);
kernel_fft_array += 2;
kernel_array += c->strides[ULAB_MAX_DIMS - 1];
}
}
else {
mp_float_t (*func)(void *) = ndarray_get_float_function(c->dtype);
for (size_t i = 0; i < kernel_len; i++) {
// real part; the imaginary part is 0, no need to assign
*kernel_fft_array = func(kernel_array);
kernel_fft_array += 2;
kernel_array += c->strides[ULAB_MAX_DIMS - 1];
}
}
kernel_fft_array -= 2 * kernel_len;

// Compute kernel fft in place
fft_kernel(kernel_fft_array, fft_size, 1);

mp_float_t real, imag; // For complex multiplication
size_t current_segment_size = segment_len;
mp_float_t (*funca)(void *) = ndarray_get_float_function(a->dtype);

for (size_t i = 0; i < signal_len; i += segment_len) {
// Check if remaining data is less than segment length
if (i + segment_len > signal_len) {
current_segment_size = signal_len - i;
}
// Load segment in buffer
memset(segment_fft_array, 0, fft_size * sz); // Reset buffer to zero
if (a->dtype == NDARRAY_COMPLEX) {
for (size_t k = 0; k < current_segment_size; k++){
memcpy(segment_fft_array, signal_array, sz);
segment_fft_array += 2;
signal_array += a->strides[ULAB_MAX_DIMS - 1];
}
}
else {
for (size_t k = 0; k < current_segment_size; k++) {
// real part; the imaginary part is 0, no need to assign
*segment_fft_array = funca(signal_array);
segment_fft_array += 2;
signal_array += a->strides[ULAB_MAX_DIMS - 1];
}
}
segment_fft_array -= 2 * current_segment_size;

// Compute segment fft in place
fft_kernel(segment_fft_array, fft_size, 1);

// Product of kernel and segment fft (complex multiplication)
for (size_t j = 0; j < fft_size; j++) {
real = segment_fft_array[j * 2] * kernel_fft_array[j * 2] - segment_fft_array[j * 2 + 1] * kernel_fft_array[j * 2 + 1];
imag = segment_fft_array[j * 2] * kernel_fft_array[j * 2 + 1] + segment_fft_array[j * 2 + 1] * kernel_fft_array[j * 2];
segment_fft_array[j * 2] = real;
segment_fft_array[j * 2 + 1] = imag;
}
// Inverse FFT in place
fft_kernel(segment_fft_array, fft_size, -1);

#if ULAB_SUPPORTS_COMPLEX
if (dtype == NDARRAY_COMPLEX) {
// Overlap, Add and normalized inverse fft
for (size_t j = 0; j < fft_size * 2; j++) {
if ((i * 2 + j) < (output_len * 2)) {
output_array[i * 2 + j] += (segment_fft_array[j] / fft_size);
}
}
continue;
}
#endif


for (size_t j = 0; j < fft_size; j++) {
if ((i + j) < (output_len)) { // adds only real part
output_array[i + j] += (segment_fft_array[j * 2] / fft_size);
}
}
}
return MP_OBJ_FROM_PTR(output);
}
MP_DEFINE_CONST_FUN_OBJ_KW(signal_oaconvolve_obj, 2, signal_oaconvolve);
#endif /* ULAB_SCIPY_SIGNAL_HAS_OACONVOLVE */


static const mp_rom_map_elem_t ulab_scipy_signal_globals_table[] = {
{ MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_signal) },
#if ULAB_SCIPY_SIGNAL_HAS_SOSFILT & ULAB_MAX_DIMS > 1
{ MP_ROM_QSTR(MP_QSTR_sosfilt), MP_ROM_PTR(&signal_sosfilt_obj) },
#endif
#if ULAB_SCIPY_SIGNAL_HAS_OACONVOLVE
{ MP_ROM_QSTR(MP_QSTR_oaconvolve), MP_ROM_PTR(&signal_oaconvolve_obj) },
#endif
};

static MP_DEFINE_CONST_DICT(mp_module_ulab_scipy_signal_globals, ulab_scipy_signal_globals_table);
Expand Down
1 change: 1 addition & 0 deletions code/scipy/signal/signal.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,6 @@
extern const mp_obj_module_t ulab_scipy_signal_module;

MP_DECLARE_CONST_FUN_OBJ_KW(signal_sosfilt_obj);
MP_DECLARE_CONST_FUN_OBJ_KW(signal_oaconvolve_obj);

#endif /* _SCIPY_SIGNAL_ */
2 changes: 1 addition & 1 deletion code/ulab.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include "user/user.h"
#include "utils/utils.h"

#define ULAB_VERSION 6.6.1
#define ULAB_VERSION 6.6.2
#define xstr(s) str(s)
#define str(s) #s

Expand Down
4 changes: 4 additions & 0 deletions code/ulab.h
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,10 @@
#define ULAB_SCIPY_SIGNAL_HAS_SOSFILT (1)
#endif

#ifndef ULAB_SCIPY_SIGNAL_HAS_OACONVOLVE
#define ULAB_SCIPY_SIGNAL_HAS_OACONVOLVE (1)
#endif

#ifndef ULAB_SCIPY_HAS_OPTIMIZE_MODULE
#define ULAB_SCIPY_HAS_OPTIMIZE_MODULE (1)
#endif
Expand Down
36 changes: 35 additions & 1 deletion docs/manual/source/scipy-signal.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
scipy.signal
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The documentation is actually generated from this: https://github.com/v923z/micropython-ulab/blob/master/docs/scipy-signal.ipynb. The jupyter notebook allows you to run micropython code directly, so you don't have to copy anything.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ha ok, I made the changes in the jupyter notebook but I'm not sure how to run it (my system is windows).

============

This module defines the single function:
This module defines the functions:

1. `scipy.signal.sosfilt <#sosfilt>`__
2. `scipy.signal.oaconvolve <#oaconvolve>`__

sosfilt
-------
Expand Down Expand Up @@ -64,6 +65,39 @@ initial values are assumed to be 0.
========================================
zf: array([[37242.0, 74835.0],
[1026187.0, 1936542.0]], dtype=float)

oaconvolve
-------

``scipy``:
https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.oaconvolve.html

Convolve two N-dimensional arrays using the overlap-add method.

Convolve in1 and in2 using the overlap-add method. Similarly to numpy.convolve,
this method works in full mode and other modes can be obtained by slicing the result?

This is generally much faster than linear convolve for large arrays (n > ~500),
and generally much faster than fftconvolve (not implemented yet in ulab) when one array is much larger
than the other (e.g. for a matched filter algorithm), but can be slower when only a few output values
are needed or when the arrays are very similar in shape, and can only output float arrays (int or object array inputs will be cast to float).

.. code::

# code to be run in micropython

from ulab import numpy as np
from ulab import scipy as spy

x = np.array((1,2,3))
y = np.array((1,10,100,1000))
result = spy.signal.oaconvolve(x, y)
print('result: ', result)

.. parsed-literal::

result: array([1.0, 12.00024, 123.0001, 1230.0, 2300.0, 3000.0], dtype=float32)




Loading
Loading