diff --git a/.gitignore b/.gitignore index bddaf4c..4275cc3 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ ## Ignore Visual Studio temporary files, build results, and ## files generated by popular Visual Studio add-ons. +.DS_Store + # User-specific files *.suo *.user diff --git a/README.rst b/README.rst index ab04888..e3d7518 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,10 @@ PySIMD ====== +.. image:: images/pysimd.png + :width: 800 + :alt: PySimd logo + ``simd`` is the python module for SIMD computing and programming. It prodives an extensive interface to SIMD instruction sets on several different architectures, and fallback scalar implementations when no SIMD instructions @@ -129,3 +133,26 @@ The data inside a vector can also be retrieved as a collection type, like a ``tu The above example shows the pure ``__repr__`` method of ``Vec`` only depicts a hexadecimal, byte level representation of the vector data, but a method like ``as_tuple`` allows the viewing of data with different types. One unique aspect of the ``simd`` module is it treats data and memory similar to that of C, where a chunk of 16 bytes could be two 64 bit integers, four 32 bit integers, and so on. + +Math +~~~~ + +The ``simd`` module supports simd operations that involve artihmetic and math on integers and floating point numbers. Operations like ``add`` or ``sub`` work off another vector and a ``width``. The ``width`` indicates the width of the data lane the simd instruction applies to, such as ``8`` for 64 bit operations. Here are a few examples: + +.. code:: py + + >>> v = simd.Vec(size=16, repeat_value=5, repeat_size=4) + >>> v2 = simd.Vec(size=16, repeat_value=10, repeat_size=4) + >>> v.add(v2) + Traceback (most recent call last): + File "", line 1, in + TypeError: function missing required argument 'width' (pos 2) + >>> v.add(v2, width=4) + >>> v.as_tuple(type=int, width=4) + (15, 15, 15, 15) + >>> v.sub(v2, width=4) + >>> v.sub(v2, width=4) + >>> v.as_tuple(type=int, width=4) + (-5, -5, -5, -5) + + diff --git a/images/logo.png b/images/logo.png deleted file mode 100644 index a25f803..0000000 Binary files a/images/logo.png and /dev/null differ diff --git a/images/pysimd.png b/images/pysimd.png new file mode 100644 index 0000000..95c0751 Binary files /dev/null and b/images/pysimd.png differ diff --git a/include/simd_vec_filter.h b/include/simd_vec_filter.h index ed03e37..46ed58d 100644 --- a/include/simd_vec_filter.h +++ b/include/simd_vec_filter.h @@ -4,6 +4,7 @@ #include "simd_vec_type.h" #include "vec_macros.h" + static int pysimd_vec_filter_32(struct pysimd_vec_t* vec, int* gt, int* lt, int* eq) { @@ -118,4 +119,118 @@ static int pysimd_vec_filter_32(struct pysimd_vec_t* vec, int* gt, return 1; } +static int pysimd_vec_filter_64(struct pysimd_vec_t* vec, long long* gt, + long long* lt, + long long* eq) { +#if defined(PYSIMD_X86_SSE2) + // pass + unsigned char* ptr = vec->data; + const unsigned char* ptr_end = ptr + vec->size; + while (ptr < ptr_end) { + __m128i loaded = _mm_load_si128((__m128i const*)ptr); + __m128i mask = _mm_set1_epi8(0xff); + if (gt != NULL) { + __m128i gtnum = _mm_set1_epi64(*gt); + __m128i gtres = _mm_cmpgt_epi64(loaded, gtnum); + mask = _mm_and_si128(mask, gtres); + } + if (lt != NULL) { + __m128i ltnum = _mm_set1_epi64(*lt); + __m128i ltres = _mm_cmplt_epi64(loaded, ltnum); + mask = _mm_and_si128(mask, ltres); + } + if (eq != NULL) { + __m128i eqnum = _mm_set1_epi64(*eq); + __m128i eqres = _mm_cmpeq_epi64(loaded, eqnum); + mask = _mm_and_si128(mask, eqres); + } + __m128i final_result = _mm_and_si128(mask, loaded); + int mask_result = _mm_movemask_epi8 (final_result); + size_t to_advance = 0; + switch (mask_result) { + case 0xFFFF: + case 0x0FFF: + case 0x00FF: + case 0x000F: + case 0x0: + // no filtering needed + to_advance = 16; + break; + case 0xFF0F: + final_result = _mm_shuffle_epi32(final_result, 0x78); + to_advance = 12; + break; + case 0xF00F: + // shuffle, reverse order of 0b10101100 + final_result = _mm_shuffle_epi32(final_result, 0xac); + to_advance = 8; + break; + case 0x0FF0: + // shuffle, reverse order of 0b11001001 + final_result = _mm_shuffle_epi32(final_result, 0xc9); + to_advance = 8; + break; + case 0xF0F0: + // shuffle, reverse order of 0b10001101 + final_result = _mm_shuffle_epi32(final_result, 0x8d); + to_advance = 8; + break; + case 0x0F0F: + // shuffle, reverse order of 0b11011000 + final_result = _mm_shuffle_epi32(final_result, 0xd8); + to_advance = 8; + break; + case 0xF0FF: + final_result = _mm_shuffle_epi32(final_result, 0xb4); + to_advance = 12; + break; + case 0xFFF0: + // shuffle, reversed order of 00011011 + final_result = _mm_shuffle_epi32(final_result, 0x1b); + to_advance = 12; + break; + case 0xFF00: + // shuffle, reversed order of 00001110 + final_result = _mm_shuffle_epi32(final_result, 0xe); + to_advance = 8; + break; + case 0xF000: + // shuffle, reversed order of 00000011 + final_result = _mm_shuffle_epi32(final_result, 0x3); + to_advance = 4; + break; + case 0x0F00: + // shuffle, reversed order of 0b00000010 + final_result = _mm_shuffle_epi32(final_result, 0x2); + to_advance = 4; + break; + case 0x00F0: + // shuffle, reversed order of 0b00000001 + final_result = _mm_shuffle_epi32(final_result, 0x1); + to_advance = 4; + break; + default: + fprintf(stderr, "Got impossible mask value: 0x%x, aborting ...\n", mask_result); + abort(); + } + _mm_store_si128 ((__m128i*)ptr, final_result); + ptr += to_advance; + } +#else + const unsigned char* reader = vec->data; + const unsigned char* read_end = reader + vec->size; + void* new_buf = calloc(1, vec->size); + unsigned char* writer = new_buf; + while (reader < read_end) { + if (*reader) { + *writer++ = *reader; + } + ++reader; + } + free(vec->data); + vec->data = new_buf; +#endif + return 1; +} + #endif // PYSIMD_VEC_FILTER_H diff --git a/setup.py b/setup.py index 0954326..cc288cd 100644 --- a/setup.py +++ b/setup.py @@ -10,8 +10,8 @@ # on vector object without needing to check the length/size of it pysimd_minimum_align = 8 -pysimd_patch_version = 4 -pysimd_minor_version = 0 +pysimd_patch_version = 0 +pysimd_minor_version = 1 pysimd_major_version = 0 pysimd_version = [pysimd_major_version, diff --git a/src/pymain.c b/src/pymain.c index 4a7df2c..22df339 100644 --- a/src/pymain.c +++ b/src/pymain.c @@ -1,7 +1,7 @@ #include "core_simd_info.h" #include "simd_vec.h" #include "simd_vec_arith.h" -#include "simd_vec_filter.h" +//#include "simd_vec_filter.h" #define PY_SSIZE_T_CLEAN #include #include "structmember.h"