Skip to content

Commit

Permalink
filter work
Browse files Browse the repository at this point in the history
  • Loading branch information
jweinst1 committed Nov 27, 2021
1 parent 0906882 commit 0468c4e
Show file tree
Hide file tree
Showing 4 changed files with 260 additions and 0 deletions.
16 changes: 16 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,19 @@ python types, such as with ``as_bytes()``
File "<stdin>", line 1, in <module>
simd.SimdError: start: '40', is out of bounds for vector of size 16
The data inside a vector can also be retrieved as a collection type, like a ``tuple`` ,
.. code:: py
>>> a = simd.Vec(size=32, repeat_value=5, repeat_size=4)
>>> a
[5,0,0,0,5,0,0,0,5,0,0,0,5,0,0,0,5,0,0,0,5,0,0,0,5,0,0,0,5,0,0,0]
>>> a.as_tuple(type=int, width=4)
(5, 5, 5, 5, 5, 5, 5, 5)
>>> a.as_tuple(type=int, width=1)
(5, 0, 0, 0, 5, 0, 0, 0, 5, 0, 0, 0, 5, 0, 0, 0, 5, 0, 0, 0, 5, 0, 0, 0, 5, 0, 0, 0, 5, 0, 0, 0)
>>> a.as_tuple(type=int, width=8)
(21474836485, 21474836485, 21474836485, 21474836485)
The above example shows the pure ``__repr__`` method of ``Vec`` only depicts a hexadecimal, byte level representation of the vector data, but a method like ``as_tuple`` allows the viewing of data with different types. One unique aspect of the ``simd`` module is it treats data and memory similar to that of C, where a chunk of 16 bytes could be two 64 bit integers, four 32 bit integers, and so on.
121 changes: 121 additions & 0 deletions include/simd_vec_filter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#ifndef PYSIMD_VEC_FILTER_H
#define PYSIMD_VEC_FILTER_H

#include "simd_vec_type.h"
#include "vec_macros.h"

static int pysimd_vec_filter_32(struct pysimd_vec_t* vec, int* gt,
int* lt,
int* eq) {
#if defined(PYSIMD_X86_SSE2)
// pass
unsigned char* ptr = vec->data;
const unsigned char* ptr_end = ptr + vec->size;
while (ptr < ptr_end) {
__m128i loaded = _mm_load_si128((__m128i const*)ptr);
__m128i mask = _mm_set1_epi8(0xff);
if (gt != NULL) {
__m128i gtnum = _mm_set1_epi32(*gt);
__m128i gtres = _mm_cmpgt_epi32 (loaded, gtnum);
mask = _mm_and_si128(mask, gtres);
}
if (lt != NULL) {
__m128i ltnum = _mm_set1_epi32(*lt);
__m128i ltres = _mm_cmplt_epi32 (loaded, ltnum);
mask = _mm_and_si128(mask, ltres);
}
if (eq != NULL) {
__m128i eqnum = _mm_set1_epi32(*eq);
__m128i eqres = _mm_cmpeq_epi32 (loaded, eqnum);
mask = _mm_and_si128(mask, eqres);
}
__m128i final_result = _mm_and_si128(mask, loaded);
int mask_result = _mm_movemask_epi8 (final_result);
size_t to_advance = 0;
switch (mask_result) {
case 0xFFFF:
case 0x0FFF:
case 0x00FF:
case 0x000F:
case 0x0:
// no filtering needed
to_advance = 16;
break;
case 0xFF0F:
final_result = _mm_shuffle_epi32(final_result, 0x78);
to_advance = 12;
break;
case 0xF00F:
// shuffle, reverse order of 0b10101100
final_result = _mm_shuffle_epi32(final_result, 0xac);
to_advance = 8;
break;
case 0x0FF0:
// shuffle, reverse order of 0b11001001
final_result = _mm_shuffle_epi32(final_result, 0xc9);
to_advance = 8;
break;
case 0xF0F0:
// shuffle, reverse order of 0b10001101
final_result = _mm_shuffle_epi32(final_result, 0x8d);
to_advance = 8;
break;
case 0x0F0F:
// shuffle, reverse order of 0b11011000
final_result = _mm_shuffle_epi32(final_result, 0xd8);
to_advance = 8;
break;
case 0xF0FF:
final_result = _mm_shuffle_epi32(final_result, 0xb4);
to_advance = 12;
break;
case 0xFFF0:
// shuffle, reversed order of 00011011
final_result = _mm_shuffle_epi32(final_result, 0x1b);
to_advance = 12;
break;
case 0xFF00:
// shuffle, reversed order of 00001110
final_result = _mm_shuffle_epi32(final_result, 0xe);
to_advance = 8;
break;
case 0xF000:
// shuffle, reversed order of 00000011
final_result = _mm_shuffle_epi32(final_result, 0x3);
to_advance = 4;
break;
case 0x0F00:
// shuffle, reversed order of 0b00000010
final_result = _mm_shuffle_epi32(final_result, 0x2);
to_advance = 4;
break;
case 0x00F0:
// shuffle, reversed order of 0b00000001
final_result = _mm_shuffle_epi32(final_result, 0x1);
to_advance = 4;
break;
default:
fprintf(stderr, "Got impossible mask value: 0x%x, aborting ...\n", mask_result);
abort();
}
_mm_store_si128 ((__m128i*)ptr, final_result);
ptr += to_advance;
}
#else
const unsigned char* reader = vec->data;
const unsigned char* read_end = reader + vec->size;
void* new_buf = calloc(1, vec->size);
unsigned char* writer = new_buf;
while (reader < read_end) {
if (*reader) {
*writer++ = *reader;
}
++reader;
}
free(vec->data);
vec->data = new_buf;
#endif
return 1;
}

#endif // PYSIMD_VEC_FILTER_H
11 changes: 11 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@

DEFAULT_COMPILER = get_default_compiler()

# This attribute determines the minimum alignment required by sizes of a simd.Vec object
# The intention is that, the minimum allows any simd instruction available to be executed
# on vector object without needing to check the length/size of it
pysimd_minimum_align = 8

pysimd_patch_version = 4
pysimd_minor_version = 0
pysimd_major_version = 0
Expand Down Expand Up @@ -62,6 +67,7 @@
""") as sse2_test:
if sse2_test.works:
macro_defs.append(('PYSIMD_X86_SSE2', '1'))
pysimd_minimum_align = 16

with CheckCCompiles("sse3", x86_header_string + """
int main(void) {
Expand Down Expand Up @@ -108,6 +114,7 @@
""") as avx_test:
if avx_test.works:
macro_defs.append(('PYSIMD_X86_AVX', '1'))
pysimd_minimum_align = 32
if DEFAULT_COMPILER == 'unix':
compiler_flags.append('-mavx')

Expand All @@ -123,6 +130,7 @@
""") as avx2_test:
if avx2_test.works:
macro_defs.append(('PYSIMD_X86_AVX2', '1'))
pysimd_minimum_align = 32
if DEFAULT_COMPILER == 'unix':
compiler_flags.append('-mavx2')

Expand All @@ -142,9 +150,12 @@
""") as avx512f_test:
if avx512f_test.works:
macro_defs.append(('PYSIMD_X86_AVX512F', '1'))
pysimd_minimum_align = 64
if DEFAULT_COMPILER == 'unix':
compiler_flags.append('-mavx512f')

macro_defs.append(('PYSIMD_MIN_ALIGN', str(pysimd_minimum_align)))

if os.name == 'nt':
macro_defs.append(('_CRT_SECURE_NO_WARNINGS', '1'))

Expand Down
112 changes: 112 additions & 0 deletions src/pymain.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "core_simd_info.h"
#include "simd_vec.h"
#include "simd_vec_arith.h"
#include "simd_vec_filter.h"
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include "structmember.h"
Expand Down Expand Up @@ -329,6 +330,114 @@ SimdObject_as_bytes(SimdObject *self, PyObject *args, PyObject *kwargs)

}

static PyObject*
SimdObject_as_tuple(SimdObject *self, PyObject *args, PyObject *kwargs)
{
static char *kwlist[] = {"type", "width", NULL};
PyObject* tuple_to_give = NULL;
PyObject* param_type = NULL;
Py_ssize_t param_width = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "On", kwlist,
&param_type, &param_width)) {
return NULL;
}

size_t actual_width = (size_t)param_width;
if (actual_width != 1 && actual_width != 2 && actual_width != 4 && actual_width != 8) {
PyErr_Format(SimdError, "The width '%zu' is not supported for method 'as_tuple'", actual_width);
return NULL;
}
size_t n_members = self->vec.size / actual_width;
tuple_to_give = PyTuple_New(n_members);

if ((PyTypeObject*)param_type == &PyLong_Type) {
if (actual_width == 1) {
char* reader = (char*)(self->vec.data);
for (size_t i = 0; i < n_members; ++i) {
PyObject* to_put = PyLong_FromLong(reader[i]);
if (to_put == NULL) {
Py_DECREF(tuple_to_give);
PyErr_Format(PyExc_SystemError, "Internal object failure line: %u", __LINE__);
return NULL;
}
PyTuple_SET_ITEM(tuple_to_give, i, to_put);
}
} else if (actual_width == 2) {
short* reader = (short*)(self->vec.data);
for (size_t i = 0; i < n_members; ++i) {
PyObject* to_put = PyLong_FromLong(reader[i]);
if (to_put == NULL) {
Py_DECREF(tuple_to_give);
PyErr_Format(PyExc_SystemError, "Internal object failure line: %u", __LINE__);
return NULL;
}
PyTuple_SET_ITEM(tuple_to_give, i, to_put);
}
} else if (actual_width == 4) {
int* reader = (int*)(self->vec.data);
for (size_t i = 0; i < n_members; ++i) {
PyObject* to_put = PyLong_FromLong(reader[i]);
if (to_put == NULL) {
Py_DECREF(tuple_to_give);
PyErr_Format(PyExc_SystemError, "Internal object failure line: %u", __LINE__);
return NULL;
}
PyTuple_SET_ITEM(tuple_to_give, i, to_put);
}
} else if (actual_width == 8) {
long long* reader = (long long*)(self->vec.data);
for (size_t i = 0; i < n_members; ++i) {
PyObject* to_put = PyLong_FromLongLong(reader[i]);
if (to_put == NULL) {
Py_DECREF(tuple_to_give);
PyErr_Format(PyExc_SystemError, "Internal object failure line: %u", __LINE__);
return NULL;
}
PyTuple_SET_ITEM(tuple_to_give, i, to_put);
}
} else {
Py_FatalError("Should not reach this point in 'as_tuple', width error");
}
} else if ((PyTypeObject*)param_type == &PyFloat_Type) {
if (actual_width == 4) {
float* reader = (float*)(self->vec.data);
for (size_t i = 0; i < n_members; ++i) {
PyObject* to_put = PyFloat_FromDouble((double)reader[i]);
if (to_put == NULL) {
Py_DECREF(tuple_to_give);
PyErr_Format(PyExc_SystemError, "Internal object failure line: %u", __LINE__);
return NULL;
}
PyTuple_SET_ITEM(tuple_to_give, i, to_put);
}
} else if (actual_width == 8) {
double* reader = (double*)(self->vec.data);
for (size_t i = 0; i < n_members; ++i) {
PyObject* to_put = PyFloat_FromDouble(reader[i]);
if (to_put == NULL) {
Py_DECREF(tuple_to_give);
PyErr_Format(PyExc_SystemError, "Internal object failure line: %u", __LINE__);
return NULL;
}
PyTuple_SET_ITEM(tuple_to_give, i, to_put);
}
} else {
if (actual_width == 1 || actual_width == 2) {
PyErr_Format(SimdError, "The width '%zu' is not supported for floats for 'as_tuple'", actual_width);
Py_DECREF(tuple_to_give);
return NULL;
} else {
Py_FatalError("Should not reach invalid state for float in 'as_tuple");
}
}
} else {
Py_DECREF(tuple_to_give);
PyErr_Format(SimdError, "The type '%s' is not supported for method 'as_tuple'", param_type->ob_type->tp_name);
return NULL;
}
return tuple_to_give;
}

static PyObject *
SimdObject_clear(SimdObject *self, PyObject *Py_UNUSED(ignored))
{
Expand Down Expand Up @@ -362,6 +471,9 @@ static PyMethodDef SimdObject_methods[] = {
{"as_bytes", (PyCFunction) SimdObject_as_bytes, METH_VARARGS | METH_KEYWORDS,
"Returns a bytes object representing the internal bytes of the vector"
},
{"as_tuple", (PyCFunction) SimdObject_as_tuple, METH_VARARGS | METH_KEYWORDS,
"Returns a tuple populated with members of the vector, defaults to 32 bit integers"
},
{"copy", (PyCFunction) SimdObject_copy, METH_VARARGS | METH_KEYWORDS,
"Returns a copy of the vector"
},
Expand Down

0 comments on commit 0468c4e

Please sign in to comment.