-
Notifications
You must be signed in to change notification settings - Fork 44
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: added rswrapper thanks to cgutman
- Loading branch information
Showing
4 changed files
with
190 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
/** | ||
* @file src/rswrapper.c | ||
* @brief Wrappers for nanors vectorization with different ISA options | ||
*/ | ||
|
||
// _FORTIY_SOURCE can cause some versions of GCC to try to inline | ||
// memset() with incompatible target options when compiling rs.c | ||
#ifdef _FORTIFY_SOURCE | ||
#undef _FORTIFY_SOURCE | ||
#endif | ||
|
||
// The assert() function is decorated with __cold on macOS which | ||
// is incompatible with Clang's target multiversioning feature | ||
#ifndef NDEBUG | ||
#define NDEBUG | ||
#endif | ||
|
||
#define DECORATE_FUNC_I(a, b) a##b | ||
#define DECORATE_FUNC(a, b) DECORATE_FUNC_I(a, b) | ||
|
||
// Append an ISA suffix to the public RS API | ||
#define reed_solomon_init DECORATE_FUNC(reed_solomon_init, ISA_SUFFIX) | ||
#define reed_solomon_new DECORATE_FUNC(reed_solomon_new, ISA_SUFFIX) | ||
#define reed_solomon_new_static DECORATE_FUNC(reed_solomon_new_static, ISA_SUFFIX) | ||
#define reed_solomon_release DECORATE_FUNC(reed_solomon_release, ISA_SUFFIX) | ||
#define reed_solomon_decode DECORATE_FUNC(reed_solomon_decode, ISA_SUFFIX) | ||
#define reed_solomon_encode DECORATE_FUNC(reed_solomon_encode, ISA_SUFFIX) | ||
|
||
// Append an ISA suffix to internal functions to prevent multiple definition errors | ||
#define obl_axpy_ref DECORATE_FUNC(obl_axpy_ref, ISA_SUFFIX) | ||
#define obl_scal_ref DECORATE_FUNC(obl_scal_ref, ISA_SUFFIX) | ||
#define obl_axpyb32_ref DECORATE_FUNC(obl_axpyb32_ref, ISA_SUFFIX) | ||
#define obl_axpy DECORATE_FUNC(obl_axpy, ISA_SUFFIX) | ||
#define obl_scal DECORATE_FUNC(obl_scal, ISA_SUFFIX) | ||
#define obl_swap DECORATE_FUNC(obl_swap, ISA_SUFFIX) | ||
#define obl_axpyb32 DECORATE_FUNC(obl_axpyb32, ISA_SUFFIX) | ||
#define axpy DECORATE_FUNC(axpy, ISA_SUFFIX) | ||
#define scal DECORATE_FUNC(scal, ISA_SUFFIX) | ||
#define gemm DECORATE_FUNC(gemm, ISA_SUFFIX) | ||
#define invert_mat DECORATE_FUNC(invert_mat, ISA_SUFFIX) | ||
|
||
#if defined(__x86_64__) || defined(__i386__) | ||
|
||
// Compile a variant for SSSE3 | ||
#if defined(__clang__) | ||
#pragma clang attribute push(__attribute__((target("ssse3"))), apply_to = function) | ||
#else | ||
#pragma GCC push_options | ||
#pragma GCC target("ssse3") | ||
#endif | ||
#define ISA_SUFFIX _ssse3 | ||
#define OBLAS_SSE3 | ||
#include "./rs.c" | ||
#undef OBLAS_SSE3 | ||
#undef ISA_SUFFIX | ||
#if defined(__clang__) | ||
#pragma clang attribute pop | ||
#else | ||
#pragma GCC pop_options | ||
#endif | ||
|
||
// Compile a variant for AVX2 | ||
#if defined(__clang__) | ||
#pragma clang attribute push(__attribute__((target("avx2"))), apply_to = function) | ||
#else | ||
#pragma GCC push_options | ||
#pragma GCC target("avx2") | ||
#endif | ||
#define ISA_SUFFIX _avx2 | ||
#define OBLAS_AVX2 | ||
#include "./rs.c" | ||
#undef OBLAS_AVX2 | ||
#undef ISA_SUFFIX | ||
#if defined(__clang__) | ||
#pragma clang attribute pop | ||
#else | ||
#pragma GCC pop_options | ||
#endif | ||
|
||
// Compile a variant for AVX512BW | ||
#if defined(__clang__) | ||
#pragma clang attribute push(__attribute__((target("avx512f,avx512bw"))), apply_to = function) | ||
#else | ||
#pragma GCC push_options | ||
#pragma GCC target("avx512f,avx512bw") | ||
#endif | ||
#define ISA_SUFFIX _avx512 | ||
#define OBLAS_AVX512 | ||
#include "./rs.c" | ||
#undef OBLAS_AVX512 | ||
#undef ISA_SUFFIX | ||
#if defined(__clang__) | ||
#pragma clang attribute pop | ||
#else | ||
#pragma GCC pop_options | ||
#endif | ||
|
||
#endif | ||
|
||
// Compile a default variant | ||
#define ISA_SUFFIX _def | ||
#include "./autoshim.h" | ||
#include "./rs.c" | ||
#undef ISA_SUFFIX | ||
|
||
#undef reed_solomon_init | ||
#undef reed_solomon_new | ||
#undef reed_solomon_new_static | ||
#undef reed_solomon_release | ||
#undef reed_solomon_decode | ||
#undef reed_solomon_encode | ||
|
||
#include "rswrapper.h" | ||
|
||
reed_solomon_new_t reed_solomon_new_fn; | ||
reed_solomon_release_t reed_solomon_release_fn; | ||
reed_solomon_encode_t reed_solomon_encode_fn; | ||
reed_solomon_decode_t reed_solomon_decode_fn; | ||
|
||
/** | ||
* @brief This initializes the RS function pointers to the best vectorized version available. | ||
* @details The streaming code will directly invoke these function pointers during encoding. | ||
*/ | ||
void reed_solomon_init(void) { | ||
#if defined(__x86_64__) || defined(__i386__) | ||
if (__builtin_cpu_supports("avx512f") && __builtin_cpu_supports("avx512bw")) { | ||
reed_solomon_new_fn = reed_solomon_new_avx512; | ||
reed_solomon_release_fn = reed_solomon_release_avx512; | ||
reed_solomon_encode_fn = reed_solomon_encode_avx512; | ||
reed_solomon_decode_fn = reed_solomon_decode_avx512; | ||
reed_solomon_init_avx512(); | ||
} else if (__builtin_cpu_supports("avx2")) { | ||
reed_solomon_new_fn = reed_solomon_new_avx2; | ||
reed_solomon_release_fn = reed_solomon_release_avx2; | ||
reed_solomon_encode_fn = reed_solomon_encode_avx2; | ||
reed_solomon_decode_fn = reed_solomon_decode_avx2; | ||
reed_solomon_init_avx2(); | ||
} else if (__builtin_cpu_supports("ssse3")) { | ||
reed_solomon_new_fn = reed_solomon_new_ssse3; | ||
reed_solomon_release_fn = reed_solomon_release_ssse3; | ||
reed_solomon_encode_fn = reed_solomon_encode_ssse3; | ||
reed_solomon_decode_fn = reed_solomon_decode_ssse3; | ||
reed_solomon_init_ssse3(); | ||
} else | ||
#endif | ||
{ | ||
reed_solomon_new_fn = reed_solomon_new_def; | ||
reed_solomon_release_fn = reed_solomon_release_def; | ||
reed_solomon_encode_fn = reed_solomon_encode_def; | ||
reed_solomon_decode_fn = reed_solomon_decode_def; | ||
reed_solomon_init_def(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
/** | ||
* @file src/rswrapper.h | ||
* @brief Wrappers for nanors vectorization | ||
* @details This is a drop-in replacement for nanors rs.h | ||
*/ | ||
#pragma once | ||
|
||
#include "rs.h" | ||
#include <stdint.h> | ||
|
||
typedef struct _reed_solomon reed_solomon; | ||
|
||
typedef reed_solomon *(*reed_solomon_new_t)(int data_shards, int parity_shards); | ||
typedef void (*reed_solomon_release_t)(reed_solomon *rs); | ||
typedef int (*reed_solomon_encode_t)(reed_solomon *rs, uint8_t **shards, int nr_shards, int bs); | ||
typedef int (*reed_solomon_decode_t)(reed_solomon *rs, uint8_t **shards, uint8_t *marks, int nr_shards, int bs); | ||
|
||
extern reed_solomon_new_t reed_solomon_new_fn; | ||
extern reed_solomon_release_t reed_solomon_release_fn; | ||
extern reed_solomon_encode_t reed_solomon_encode_fn; | ||
extern reed_solomon_decode_t reed_solomon_decode_fn; |