Skip to content

Commit

Permalink
Merge pull request #19017 from ghalliday/issue32468
Browse files Browse the repository at this point in the history
HPCC-32468 Optimize MemoryBuffer::read(int/short/int64)

Reviewed-by: Mark Kelly [email protected]
Reviewed-by: Jake Smith <[email protected]>
Merged-by: Gavin Halliday <[email protected]>
  • Loading branch information
ghalliday authored Aug 23, 2024
2 parents 7e9a9ec + bb52776 commit acae664
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 28 deletions.
34 changes: 15 additions & 19 deletions system/jlib/jbuff.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,15 @@ MemoryBuffer &MemoryBuffer::appendFile(const char *fileName)
return *this;
}

#define DO_READ_ENDIAN(len, value) \
CHECKREADPOS(len); \
if (swapEndian) \
doCopyRev<len>(value, buffer + readPos); \
else \
memcpy_iflen(value, buffer + readPos, len); \
readPos += len; \
return *this;

MemoryBuffer & MemoryBuffer::read(char & value)
{
CHECKREADPOS(sizeof(value));
Expand Down Expand Up @@ -761,44 +770,32 @@ MemoryBuffer & MemoryBuffer::read(float & value)

MemoryBuffer & MemoryBuffer::read(short & value)
{
return readEndian(sizeof(value), &value);
DO_READ_ENDIAN(sizeof(value), &value);
}

MemoryBuffer & MemoryBuffer::read(unsigned short & value)
{
return readEndian(sizeof(value), &value);
DO_READ_ENDIAN(sizeof(value), &value);
}

MemoryBuffer & MemoryBuffer::read(int & value)
{
return readEndian(sizeof(value), &value);
DO_READ_ENDIAN(sizeof(value), &value);
}

MemoryBuffer & MemoryBuffer::read(unsigned & value)
{
return readEndian(sizeof(value), &value);
DO_READ_ENDIAN(sizeof(value), &value);
}

#if 0
MemoryBuffer & MemoryBuffer::read(unsigned long & value)
{
return readEndian(sizeof(value), &value);
}

MemoryBuffer & MemoryBuffer::read(long & value)
{
return readEndian(sizeof(value), &value);
}
#endif

MemoryBuffer & MemoryBuffer::read(unsigned __int64 & value)
{
return readEndian(sizeof(value), &value);
DO_READ_ENDIAN(sizeof(value), &value);
}

MemoryBuffer & MemoryBuffer::read(__int64 & value)
{
return readEndian(sizeof(value), &value);
DO_READ_ENDIAN(sizeof(value), &value);
}

const byte * MemoryBuffer::readDirect(size32_t len)
Expand Down Expand Up @@ -861,7 +858,6 @@ void MemoryBuffer::writeEndianDirect(size32_t pos,size32_t len,const void *buf)
memcpy_iflen(buffer+pos,buf,len);
}


MemoryBuffer & MemoryBuffer::readEndian(size32_t len, void * value)
{
CHECKREADPOS(len);
Expand Down
1 change: 0 additions & 1 deletion system/jlib/jbuff.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,6 @@ class jlib_decl MemoryBuffer
inline const char * toByteArray() const { return curLen ? buffer : nullptr; }
inline const byte * bytes() const { return curLen ? (const byte *)buffer : nullptr; }


private:
MemoryBuffer & read(unsigned long & value); // unimplemented
MemoryBuffer & read(long & value); // unimplemented
Expand Down
57 changes: 49 additions & 8 deletions system/jlib/jmisc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,16 +120,28 @@ extern jlib_decl void _rev(size32_t len, void * ptr);
#endif

inline void _cpyrev2(void * _tgt, const void * _src) {
char * tgt = (char *)_tgt; const char * src = (const char *)_src;
tgt[1]=src[0]; tgt[0] = src[1];
//Technically undefined behaviour because the _src is likely to be a byte stream
//but this will work on all known architectures
unsigned value = *(const unsigned short *)_src;
//NOTE: Optimized by the compiler
value = ((value & 0xFF00) >> 8) |
((value & 0x00FF) << 8);
*(unsigned short *)_tgt = value;
}
inline void _cpyrev3(void * _tgt, const void * _src) {
char * tgt = (char *)_tgt; const char * src = (const char *)_src;
tgt[2] = src[0]; tgt[1]=src[1]; tgt[0] = src[2];
}
inline void _cpyrev4(void * _tgt, const void * _src) {
char * tgt = (char *)_tgt; const char * src = (const char *)_src;
tgt[3]=src[0]; tgt[2] = src[1]; tgt[1]=src[2]; tgt[0] = src[3];
inline void _cpyrev4(void * _tgt, const void * _src) {
//Technically undefined behaviour because the _src is likely to be a byte stream
//but this will work on all known architectures
unsigned value = *(const unsigned *)_src;
//NOTE: The compiler spots this pattern an optimizes it into a byte-swap operation
value = ((value & 0xFF000000) >> 24) |
((value & 0x00FF0000) >> 8) |
((value & 0x0000FF00) << 8) |
((value & 0x000000FF) << 24);
*(unsigned *)_tgt = value;
}
inline void _cpyrev5(void * _tgt, const void * _src) {
char * tgt = (char *)_tgt; const char * src = (const char *)_src;
Expand All @@ -147,9 +159,19 @@ inline void _cpyrev7(void * _tgt, const void * _src) {
tgt[3] = src[3]; tgt[2]=src[4]; tgt[1]=src[5]; tgt[0]=src[6];
}
inline void _cpyrev8(void * _tgt, const void * _src) {
char * tgt = (char *)_tgt; const char * src = (const char *)_src;
tgt[7]=src[0]; tgt[6] = src[1]; tgt[5]=src[2]; tgt[4] = src[3];
tgt[3]=src[4]; tgt[2] = src[5]; tgt[1]=src[6]; tgt[0] = src[7];
//Technically undefined behaviour because the _src is likely to be a byte stream
//but this will work on all known architectures
unsigned __int64 value = *(const unsigned __int64 *)_src;
//NOTE: The compiler spots this pattern an optimizes it into a byte-swap operation
value = ((value & 0xFF00000000000000ULL) >> 56) |
((value & 0x00FF000000000000ULL) >> 40) |
((value & 0x0000FF0000000000ULL) >> 24) |
((value & 0x000000FF00000000ULL) >> 8) |
((value & 0x00000000FF000000ULL) << 8) |
((value & 0x0000000000FF0000ULL) << 24) |
((value & 0x000000000000FF00ULL) << 40) |
((value & 0x00000000000000FFULL) << 56);
*(unsigned __int64 *)_tgt = value;
}
inline void _cpyrevn(void * _tgt, const void * _src, unsigned len) {
char * tgt = (char *)_tgt; const char * src = (const char *)_src+len;
Expand All @@ -158,6 +180,25 @@ inline void _cpyrevn(void * _tgt, const void * _src, unsigned len) {
}
}

//Define a template class to allow the common byte reversal operations to be optimized
template <unsigned LEN>
inline void doCopyRev(void * tgt, const void * src) {
_cpyrevn(tgt, src, LEN);
}

template <>
inline void doCopyRev<2>(void * tgt, const void * src) {
_cpyrev2(tgt, src);
}
template <>
inline void doCopyRev<4>(void * tgt, const void * src) {
_cpyrev4(tgt, src);
}
template <>
inline void doCopyRev<8>(void * tgt, const void * src) {
_cpyrev8(tgt, src);
}

#if __BYTE_ORDER == __LITTLE_ENDIAN
#define _WINCPYREV(x, y, len) _cpyrevn(x, y, len)
#define _WINCPYREV2(x, y) _cpyrev2(x, y)
Expand Down

0 comments on commit acae664

Please sign in to comment.