Skip to content

Commit

Permalink
[librpcpu] Improve ifunc dispatch functions
Browse files Browse the repository at this point in the history
  • Loading branch information
ccawley2011 committed Apr 10, 2020
1 parent 390e8f1 commit e39cc2a
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 200 deletions.
47 changes: 26 additions & 21 deletions src/gtk/GdkImageConv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ namespace LibRpTexture {

#if defined(RP_CPU_I386) || defined(RP_CPU_AMD64)
# include "librpcpu/cpuflags_x86.h"
# ifdef RP_HAS_IFUNC
# define GDKIMAGECONV_HAS_IFUNC 1
# endif
# define GDKIMAGECONV_HAS_SSSE3 1
#endif

Expand Down Expand Up @@ -51,36 +54,38 @@ class GdkImageConv
static GdkPixbuf *rp_image_to_GdkPixbuf_ssse3(const LibRpTexture::rp_image *img);
#endif /* GDKIMAGECONV_HAS_SSSE3 */

#ifdef GDKIMAGECONV_HAS_IFUNC
/* System has IFUNC. Use it for dispatching. */

/**
* Convert an rp_image to GdkPixbuf.
* @param img [in] rp_image.
* @return GdkPixbuf, or nullptr on error.
*/
static IFUNC_INLINE GdkPixbuf *rp_image_to_GdkPixbuf(const LibRpTexture::rp_image *img);
};

#if !defined(RP_HAS_IFUNC) || (!defined(RP_CPU_I386) && !defined(RP_CPU_AMD64))
static GdkPixbuf *rp_image_to_GdkPixbuf(const LibRpTexture::rp_image *img);

// System does not support IFUNC, or we don't have optimizations for these CPUs.
// Use standard inline dispatch.
#else
// System does not support IFUNC, or we don't have optimizations for these CPUs.
// Use standard inline dispatch.

/**
* Convert an rp_image to GdkPixbuf.
* @param img rp_image.
* @return GdkPixbuf, or nullptr on error.
*/
inline GdkPixbuf *GdkImageConv::rp_image_to_GdkPixbuf(const LibRpTexture::rp_image *img)
{
/**
* Convert an rp_image to GdkPixbuf.
* @param img [in] rp_image.
* @return GdkPixbuf, or nullptr on error.
*/
static inline GdkPixbuf *rp_image_to_GdkPixbuf(const LibRpTexture::rp_image *img)
{
#ifdef GDKIMAGECONV_HAS_SSSE3
if (RP_CPU_HasSSSE3()) {
return rp_image_to_GdkPixbuf_ssse3(img);
} else
if (RP_CPU_HasSSSE3()) {
return rp_image_to_GdkPixbuf_ssse3(img);
} else
#endif /* GDKIMAGECONV_HAS_SSSE3 */
{
return rp_image_to_GdkPixbuf_cpp(img);
}
}
{
return rp_image_to_GdkPixbuf_cpp(img);
}
}

#endif /* !defined(RP_HAS_IFUNC) || (!defined(RP_CPU_I386) && !defined(RP_CPU_AMD64)) */
#endif
};

#endif /* __ROMPROPERTIES_GTK_GDKIMAGECONV_HPP__ */
84 changes: 36 additions & 48 deletions src/libromdata/utils/SuperMagicDrive.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
#endif
#ifdef RP_CPU_AMD64
# define SMD_ALWAYS_HAS_SSE2 1
#elif defined(RP_CPU_I386)
# ifdef RP_HAS_IFUNC
# define SMD_HAS_IFUNC 1
# endif
#endif

namespace LibRomData {
Expand Down Expand Up @@ -77,70 +81,54 @@ class SuperMagicDrive
// SMD block size.
static const unsigned int SMD_BLOCK_SIZE = 16384;

// TODO: Use gcc target-specific function attributes if available?
// (IFUNC dispatcher, etc.)

#ifdef SMD_HAS_IFUNC
/* System has IFUNC. Use it for dispatching. */

/**
* Decode a Super Magic Drive interleaved block.
* NOTE: Pointers must be 16-byte aligned if using SSE2.
* @param pDest [out] Destination block. (Must be 16 KB.)
* @param pSrc [in] Source block. (Must be 16 KB.)
*/
static IFUNC_SSE2_INLINE void decodeBlock(uint8_t *RESTRICT pDest, const uint8_t *RESTRICT pSrc);
};

// TODO: Use gcc target-specific function attributes if available?
// (IFUNC dispatcher, etc.)

/** Dispatch functions. **/

#if defined(RP_HAS_IFUNC) && defined(SMD_ALWAYS_HAS_SSE2)

// System does support IFUNC, but it's always guaranteed to have SSE2.
// Eliminate the IFUNC dispatch on this system.

/**
* Decode a Super Magic Drive interleaved block.
* NOTE: Pointers must be 16-byte aligned if using SSE2.
* @param dest [out] Destination block. (Must be 16 KB.)
* @param src [in] Source block. (Must be 16 KB.)
*/
inline void SuperMagicDrive::decodeBlock(uint8_t *RESTRICT pDest, const uint8_t *RESTRICT pSrc)
{
// amd64 always has SSE2.
decodeBlock_sse2(pDest, pSrc);
}

#endif /* defined(RP_HAS_IFUNC) && defined(SMD_ALWAYS_HAS_SSE2) */
static void decodeBlock(uint8_t *RESTRICT pDest, const uint8_t *RESTRICT pSrc);

#if !defined(RP_HAS_IFUNC) || (!defined(RP_CPU_I386) && !defined(RP_CPU_AMD64))
#else
// System does not support IFUNC, or we don't have optimizations for these CPUs.
// Use standard inline dispatch.

/**
* Decode a Super Magic Drive interleaved block.
* NOTE: Pointers must be 16-byte aligned if using SSE2.
* @param dest [out] Destination block. (Must be 16 KB.)
* @param src [in] Source block. (Must be 16 KB.)
*/
inline void SuperMagicDrive::decodeBlock(uint8_t *RESTRICT pDest, const uint8_t *RESTRICT pSrc)
{
/**
* Decode a Super Magic Drive interleaved block.
* NOTE: Pointers must be 16-byte aligned if using SSE2.
* @param dest [out] Destination block. (Must be 16 KB.)
* @param src [in] Source block. (Must be 16 KB.)
*/
static inline void decodeBlock(uint8_t *RESTRICT pDest, const uint8_t *RESTRICT pSrc)
{
#ifdef SMD_ALWAYS_HAS_SSE2
// amd64 always has SSE2.
decodeBlock_sse2(pDest, pSrc);
// amd64 always has SSE2.
decodeBlock_sse2(pDest, pSrc);
#else /* SMD_ALWAYS_HAS_SSE2 */
# ifdef SMD_HAS_SSE2
if (RP_CPU_HasSSE2()) {
decodeBlock_sse2(pDest, pSrc);
} else
if (RP_CPU_HasSSE2()) {
decodeBlock_sse2(pDest, pSrc);
} else
# endif /* SMD_HAS_SSE2 */
# ifdef SMD_HAS_MMX
if (RP_CPU_HasMMX()) {
decodeBlock_mmx(pDest, pSrc);
} else
if (RP_CPU_HasMMX()) {
decodeBlock_mmx(pDest, pSrc);
} else
#endif /* SMD_HAS_MMX */
{
decodeBlock_cpp(pDest, pSrc);
}
{
decodeBlock_cpp(pDest, pSrc);
}
#endif /* SMD_ALWAYS_HAS_SSE2 */
}
}

#endif /* !defined(RP_HAS_IFUNC) || (!defined(RP_CPU_I386) && !defined(RP_CPU_AMD64)) */
#endif
};

}

Expand Down
5 changes: 4 additions & 1 deletion src/librpcpu/byteswap.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@

#if defined(RP_CPU_I386) || defined(RP_CPU_AMD64)
# include "cpuflags_x86.h"
# ifdef RP_HAS_IFUNC
# define BYTESWAP_HAS_IFUNC 1
# endif
/* MSVC does not support MMX intrinsics in 64-bit builds. */
/* Reference: https://msdn.microsoft.com/en-us/library/08x3t697(v=vs.110).aspx */
/* In addition, amd64 CPUs all support SSE2 as a minimum, */
Expand Down Expand Up @@ -186,7 +189,7 @@ void __byte_swap_16_array_ssse3(uint16_t *ptr, size_t n);
void __byte_swap_32_array_ssse3(uint32_t *ptr, size_t n);
#endif /* BYTESWAP_HAS_SSSE3 */

#if defined(RP_HAS_IFUNC)
#ifdef BYTESWAP_HAS_IFUNC
/* System has IFUNC. Use it for dispatching. */

/**
Expand Down
14 changes: 0 additions & 14 deletions src/librpcpu/cpu_dispatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,23 +65,9 @@
#endif

// IFUNC attribute.
// - IFUNC_SSE2_INLINE: inline if CPU always has SSE2.
#ifdef RP_HAS_IFUNC
# define IFUNC_INLINE
# define IFUNC_STATIC_INLINE
# ifdef RP_CPU_AMD64
# define IFUNC_SSE2_INLINE inline
# define IFUNC_SSE2_STATIC_INLINE static inline
# else
# define IFUNC_SSE2_INLINE
# define IFUNC_SSE2_STATIC_INLINE
# endif
# define IFUNC_ATTR(func) __attribute__((ifunc(#func)))
#else
# define IFUNC_INLINE inline
# define IFUNC_STATIC_INLINE static inline
# define IFUNC_SSE2_INLINE inline
# define IFUNC_SSE2_STATIC_INLINE static inline
# define IFUNC_ATTR(func)
#endif

Expand Down
Loading

0 comments on commit e39cc2a

Please sign in to comment.