Skip to content

Commit

Permalink
feat: added rswrapper thanks to cgutman
Browse files Browse the repository at this point in the history
  • Loading branch information
ABeltramo committed Aug 12, 2024
1 parent 37711e1 commit f76307b
Show file tree
Hide file tree
Showing 4 changed files with 190 additions and 14 deletions.
18 changes: 10 additions & 8 deletions src/moonlight-protocol/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,20 @@ FetchContent_GetProperties(nanors)
if (NOT nanors_POPULATED)
FetchContent_Populate(nanors)

add_library(nanors STATIC ${nanors_SOURCE_DIR}/rs.c)
add_library(nanors)
add_library(nanors::nanors ALIAS nanors)
target_include_directories(nanors PUBLIC ${nanors_SOURCE_DIR} ${nanors_SOURCE_DIR}/deps/obl/)
target_include_directories(nanors
PUBLIC
./nanors
${nanors_SOURCE_DIR}
${nanors_SOURCE_DIR}/deps/obl/)

target_sources(nanors
PRIVATE ${nanors_SOURCE_DIR}/rs.c
PUBLIC ${nanors_SOURCE_DIR}/rs.h)
PRIVATE ./nanors/rswrapper.c
PUBLIC ./nanors/rswrapper.h)

# TODO: There's a more advanced version of this with proper support for SSSE3,
# see: https://github.com/LizardByte/Sunshine/pull/2828
set_source_files_properties(${nanors_SOURCE_DIR}/rs.c
PROPERTIES COMPILE_FLAGS "-include deps/obl/autoshim.h -ftree-vectorize")
set_source_files_properties(./nanors/rswrapper.c
PROPERTIES COMPILE_FLAGS "-ftree-vectorize -funroll-loops")

target_link_libraries_system(moonlight PUBLIC nanors::nanors)
endif ()
Expand Down
12 changes: 6 additions & 6 deletions src/moonlight-protocol/moonlight/fec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include <memory>

extern "C" {
#include <rs.h>
#include "rswrapper.h"
}

/**
Expand Down Expand Up @@ -32,7 +32,7 @@ inline void init() {
/**
* A smart pointer to the reed_solomon data structure, it will release the memory when going out of scope
*/
using rs_ptr = std::unique_ptr<reed_solomon, decltype(&reed_solomon_release)>;
using rs_ptr = std::shared_ptr<reed_solomon>;

/**
* Creates and allocates the required Reed Solomon data structure.
Expand All @@ -43,8 +43,8 @@ using rs_ptr = std::unique_ptr<reed_solomon, decltype(&reed_solomon_release)>;
* @return A smart pointer, it will release the memory when going out of scope
*/
inline rs_ptr create(int data_shards, int parity_shards) {
auto rs = reed_solomon_new(data_shards, parity_shards);
return {rs, ::reed_solomon_release};
auto rs = reed_solomon_new_fn(data_shards, parity_shards);
return std::shared_ptr<reed_solomon>(rs, reed_solomon_release_fn);
}

/**
Expand All @@ -63,7 +63,7 @@ inline rs_ptr create(int data_shards, int parity_shards) {
* @return zero on success or an error code if failing.
*/
inline int encode(reed_solomon *rs, uint8_t **shards, int nr_shards, int block_size) {
return reed_solomon_encode(rs, shards, nr_shards, block_size);
return reed_solomon_encode_fn(rs, shards, nr_shards, block_size);
}

/**
Expand All @@ -82,7 +82,7 @@ inline int encode(reed_solomon *rs, uint8_t **shards, int nr_shards, int block_s
* @return zero on success or an error code if failing
*/
inline int decode(reed_solomon *rs, uint8_t **shards, uint8_t *marks, int nr_shards, int block_size) {
return reed_solomon_decode(rs, shards, marks, nr_shards, block_size);
return reed_solomon_decode_fn(rs, shards, marks, nr_shards, block_size);
}

} // namespace moonlight::fec
153 changes: 153 additions & 0 deletions src/moonlight-protocol/nanors/rswrapper.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/**
* @file src/rswrapper.c
* @brief Wrappers for nanors vectorization with different ISA options
*/

// _FORTIY_SOURCE can cause some versions of GCC to try to inline
// memset() with incompatible target options when compiling rs.c
#ifdef _FORTIFY_SOURCE
#undef _FORTIFY_SOURCE
#endif

// The assert() function is decorated with __cold on macOS which
// is incompatible with Clang's target multiversioning feature
#ifndef NDEBUG
#define NDEBUG
#endif

#define DECORATE_FUNC_I(a, b) a##b
#define DECORATE_FUNC(a, b) DECORATE_FUNC_I(a, b)

// Append an ISA suffix to the public RS API
#define reed_solomon_init DECORATE_FUNC(reed_solomon_init, ISA_SUFFIX)
#define reed_solomon_new DECORATE_FUNC(reed_solomon_new, ISA_SUFFIX)
#define reed_solomon_new_static DECORATE_FUNC(reed_solomon_new_static, ISA_SUFFIX)
#define reed_solomon_release DECORATE_FUNC(reed_solomon_release, ISA_SUFFIX)
#define reed_solomon_decode DECORATE_FUNC(reed_solomon_decode, ISA_SUFFIX)
#define reed_solomon_encode DECORATE_FUNC(reed_solomon_encode, ISA_SUFFIX)

// Append an ISA suffix to internal functions to prevent multiple definition errors
#define obl_axpy_ref DECORATE_FUNC(obl_axpy_ref, ISA_SUFFIX)
#define obl_scal_ref DECORATE_FUNC(obl_scal_ref, ISA_SUFFIX)
#define obl_axpyb32_ref DECORATE_FUNC(obl_axpyb32_ref, ISA_SUFFIX)
#define obl_axpy DECORATE_FUNC(obl_axpy, ISA_SUFFIX)
#define obl_scal DECORATE_FUNC(obl_scal, ISA_SUFFIX)
#define obl_swap DECORATE_FUNC(obl_swap, ISA_SUFFIX)
#define obl_axpyb32 DECORATE_FUNC(obl_axpyb32, ISA_SUFFIX)
#define axpy DECORATE_FUNC(axpy, ISA_SUFFIX)
#define scal DECORATE_FUNC(scal, ISA_SUFFIX)
#define gemm DECORATE_FUNC(gemm, ISA_SUFFIX)
#define invert_mat DECORATE_FUNC(invert_mat, ISA_SUFFIX)

#if defined(__x86_64__) || defined(__i386__)

// Compile a variant for SSSE3
#if defined(__clang__)
#pragma clang attribute push(__attribute__((target("ssse3"))), apply_to = function)
#else
#pragma GCC push_options
#pragma GCC target("ssse3")
#endif
#define ISA_SUFFIX _ssse3
#define OBLAS_SSE3
#include "./rs.c"
#undef OBLAS_SSE3
#undef ISA_SUFFIX
#if defined(__clang__)
#pragma clang attribute pop
#else
#pragma GCC pop_options
#endif

// Compile a variant for AVX2
#if defined(__clang__)
#pragma clang attribute push(__attribute__((target("avx2"))), apply_to = function)
#else
#pragma GCC push_options
#pragma GCC target("avx2")
#endif
#define ISA_SUFFIX _avx2
#define OBLAS_AVX2
#include "./rs.c"
#undef OBLAS_AVX2
#undef ISA_SUFFIX
#if defined(__clang__)
#pragma clang attribute pop
#else
#pragma GCC pop_options
#endif

// Compile a variant for AVX512BW
#if defined(__clang__)
#pragma clang attribute push(__attribute__((target("avx512f,avx512bw"))), apply_to = function)
#else
#pragma GCC push_options
#pragma GCC target("avx512f,avx512bw")
#endif
#define ISA_SUFFIX _avx512
#define OBLAS_AVX512
#include "./rs.c"
#undef OBLAS_AVX512
#undef ISA_SUFFIX
#if defined(__clang__)
#pragma clang attribute pop
#else
#pragma GCC pop_options
#endif

#endif

// Compile a default variant
#define ISA_SUFFIX _def
#include "./autoshim.h"
#include "./rs.c"
#undef ISA_SUFFIX

#undef reed_solomon_init
#undef reed_solomon_new
#undef reed_solomon_new_static
#undef reed_solomon_release
#undef reed_solomon_decode
#undef reed_solomon_encode

#include "rswrapper.h"

reed_solomon_new_t reed_solomon_new_fn;
reed_solomon_release_t reed_solomon_release_fn;
reed_solomon_encode_t reed_solomon_encode_fn;
reed_solomon_decode_t reed_solomon_decode_fn;

/**
* @brief This initializes the RS function pointers to the best vectorized version available.
* @details The streaming code will directly invoke these function pointers during encoding.
*/
void reed_solomon_init(void) {
#if defined(__x86_64__) || defined(__i386__)
if (__builtin_cpu_supports("avx512f") && __builtin_cpu_supports("avx512bw")) {
reed_solomon_new_fn = reed_solomon_new_avx512;
reed_solomon_release_fn = reed_solomon_release_avx512;
reed_solomon_encode_fn = reed_solomon_encode_avx512;
reed_solomon_decode_fn = reed_solomon_decode_avx512;
reed_solomon_init_avx512();
} else if (__builtin_cpu_supports("avx2")) {
reed_solomon_new_fn = reed_solomon_new_avx2;
reed_solomon_release_fn = reed_solomon_release_avx2;
reed_solomon_encode_fn = reed_solomon_encode_avx2;
reed_solomon_decode_fn = reed_solomon_decode_avx2;
reed_solomon_init_avx2();
} else if (__builtin_cpu_supports("ssse3")) {
reed_solomon_new_fn = reed_solomon_new_ssse3;
reed_solomon_release_fn = reed_solomon_release_ssse3;
reed_solomon_encode_fn = reed_solomon_encode_ssse3;
reed_solomon_decode_fn = reed_solomon_decode_ssse3;
reed_solomon_init_ssse3();
} else
#endif
{
reed_solomon_new_fn = reed_solomon_new_def;
reed_solomon_release_fn = reed_solomon_release_def;
reed_solomon_encode_fn = reed_solomon_encode_def;
reed_solomon_decode_fn = reed_solomon_decode_def;
reed_solomon_init_def();
}
}
21 changes: 21 additions & 0 deletions src/moonlight-protocol/nanors/rswrapper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/**
* @file src/rswrapper.h
* @brief Wrappers for nanors vectorization
* @details This is a drop-in replacement for nanors rs.h
*/
#pragma once

#include "rs.h"
#include <stdint.h>

typedef struct _reed_solomon reed_solomon;

typedef reed_solomon *(*reed_solomon_new_t)(int data_shards, int parity_shards);
typedef void (*reed_solomon_release_t)(reed_solomon *rs);
typedef int (*reed_solomon_encode_t)(reed_solomon *rs, uint8_t **shards, int nr_shards, int bs);
typedef int (*reed_solomon_decode_t)(reed_solomon *rs, uint8_t **shards, uint8_t *marks, int nr_shards, int bs);

extern reed_solomon_new_t reed_solomon_new_fn;
extern reed_solomon_release_t reed_solomon_release_fn;
extern reed_solomon_encode_t reed_solomon_encode_fn;
extern reed_solomon_decode_t reed_solomon_decode_fn;

0 comments on commit f76307b

Please sign in to comment.