Skip to content

Commit

Permalink
Merge pull request #18940 from richardkchapman/arm64
Browse files Browse the repository at this point in the history
HPCC-32171 ARM64 constant-folding support

Reviewed-by: Gordon Smith <[email protected]>
Reviewed-by: Gavin Halliday <[email protected]>
Merged-by: Gavin Halliday <[email protected]>
  • Loading branch information
ghalliday authored Aug 28, 2024
2 parents 9505b2d + 8650bf3 commit 1284138
Show file tree
Hide file tree
Showing 7 changed files with 210 additions and 11 deletions.
5 changes: 5 additions & 0 deletions cmake_modules/vcpkg.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,13 @@ if(WIN32)
set(VCPKG_HOST_TRIPLET "x64-windows" CACHE STRING "host triplet")
set(VCPKG_TARGET_TRIPLET "x64-windows" CACHE STRING "target triplet")
elseif(APPLE)
if (CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
set(VCPKG_HOST_TRIPLET "arm64-osx" CACHE STRING "host triplet")
set(VCPKG_TARGET_TRIPLET "arm64-osx" CACHE STRING "target triplet")
else()
set(VCPKG_HOST_TRIPLET "x64-osx" CACHE STRING "host triplet")
set(VCPKG_TARGET_TRIPLET "x64-osx" CACHE STRING "target triplet")
endif()
elseif(UNIX)
set(VCPKG_HOST_TRIPLET "x64-linux-dynamic" CACHE STRING "host triplet")
set(VCPKG_TARGET_TRIPLET "x64-linux-dynamic" CACHE STRING "target triplet")
Expand Down
3 changes: 2 additions & 1 deletion common/thorhelper/thorcommon.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,8 +362,9 @@ class BlockedActivityTimer
}
};
#else
struct ActivityTimer
class ActivityTimer
{
public:
inline ActivityTimer(ActivityTimeAccumulator &_accumulator, const bool _enabled) { }
};
struct SimpleActivityTimer
Expand Down
137 changes: 136 additions & 1 deletion ecl/hql/hqlfold.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1248,7 +1248,142 @@ IValue * doFoldExternalCall(IHqlExpression* expr, unsigned foldOptions, const ch
}
#elif defined(_ARCH_ARM64_)
// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055c/IHI0055C_beta_aapcs64.pdf
UNIMPLEMENTED;
#ifdef MAXFPREGS
void * floatstack = fstack.getFloatMem();
if (floatstack) {
unsigned * floatSizes = fstack.getFloatSizes();
__asm__ __volatile__ (
".doparm0: \n\t"
"ldr w0,[%[sizes],#0] \n\t"
"cmp w0, #4 \n\t"
"blt 9f \n\t"
"beq 0f \n\t"
"ldr d0,[%[vals], #0] \n\t"
"b 1f \n\t"
"0: \n\t"
"ldr s0,[%[vals], #0] \n\t"
"1: \n\t"
"ldr w0,[%[sizes],#4] \n\t"
"cmp w0, #4 \n\t"
"blt 9f \n\t"
"beq 0f \n\t"
"ldr d1,[%[vals], #8] \n\t"
"b 1f \n\t"
"0: \n\t"
"ldr s1,[%[vals], #8] \n\t"
"1: \n\t"
"ldr w0,[%[sizes],#8] \n\t"
"cmp w0, #4 \n\t"
"blt 9f \n\t"
"beq 0f \n\t"
"ldr d2,[%[vals], #16] \n\t"
"b 1f \n\t"
"0: \n\t"
"ldr s2,[%[vals], #16] \n\t"
"1: \n\t"
"ldr w0,[%[sizes],#12] \n\t"
"cmp w0, #4 \n\t"
"blt 9f \n\t"
"beq 0f \n\t"
"ldr d3,[%[vals], #24] \n\t"
"b 1f \n\t"
"0: \n\t"
"ldr s3,[%[vals], #24] \n\t"
"1: \n\t"
"ldr w0,[%[sizes],#16] \n\t"
"cmp w0, #4 \n\t"
"blt 9f \n\t"
"beq 0f \n\t"
"ldr d4,[%[vals], #32] \n\t"
"b 1f \n\t"
"0: \n\t"
"ldr s4,[%[vals], #32] \n\t"
"1: \n\t"
"ldr w0,[%[sizes],#20] \n\t"
"cmp w0, #4 \n\t"
"blt 9f \n\t"
"beq 0f \n\t"
"ldr d5,[%[vals], #40] \n\t"
"b 1f \n\t"
"0: \n\t"
"ldr s5, [%[vals], #40] \n\t"
"1: \n\t"
"ldr w0,[%[sizes],#24] \n\t"
"cmp w0, #4 \n\t"
"blt 9f \n\t"
"beq 0f \n\t"
"ldr d6,[%[vals], #48] \n\t"
"b 1f \n\t"
"0: \n\t"
"ldr s6,[%[vals], #48] \n\t"
"1: \n\t"
"ldr w0,[%[sizes],#28] \n\t"
"cmp w0, #4 \n\t"
"blt 9f \n\t"
"beq 0f \n\t"
"ldr d7,[%[vals], #56] \n\t"
"b 9f \n\t"
"0: \n\t"
"ldr s7,[%[vals], #56] \n\t"
"9: \n\t"
"nop \n\t"
:
: [vals] "r"(floatstack), [sizes] "r"(floatSizes)
: "r0"
);
}
#endif
assertex((len & 15) == 0); // Stack must always be 16-byte aligned
register unsigned __int64 _int64result asm("x0"); // Specific register for result
register unsigned __int64 _len asm("x1") = len;
register unsigned __int64 _poplen asm("x19") = len-REGPARAMS*REGSIZE; // Needs to survive the call
register void *_fh asm("x8") = fh; // Needs to survive until the call
__asm__ __volatile__ (
"sub sp, sp, %[_len] \n\t" // Make space on stack
"mov x2, sp \n\t" // r2 = destination for loop
".repLoop: \n\t"
"ldrb w3, [%[strbuf]], #1 \n\t" // copy a byte from src array to r3
"strb w3, [x2], #1 \n\t" // and then from r3 onto stack
"subs %[_len], %[_len], #1 \n\t" // decrement and repeat
"bne .repLoop \n\t"
"ldp x0, x1, [sp] \n\t"
"ldp x2, x3, [sp, #16] \n\t"
"ldp x4, x5, [sp, #32] \n\t"
"ldp x6, x7, [sp, #48] \n\t"
"add sp, sp, #64 \n\t" // first 8 parameters go in registers
"blr %[fh] \n\t" // make the call
"add sp, sp, %[_poplen] \n\t" // Restore stack pointer (note, have already popped 8 registers, so poplen is len - 64)
: "=r"(_int64result)
: [_len] "r"(_len), [_poplen] "r"(_poplen), [strbuf] "r"(strbuf), [fh] "r"(_fh)
: "x2","x3","x4","x5","x6","x7","lr" // function we call may corrupt lr
);
int64result = _int64result;
if (isRealvalue)
{
#ifdef MAXFPREGS
if(resultsize <= 4)
{
__asm__ __volatile__(
"str s0,[%[fresult]] \n\t"
:"=m"(floatresult)
: [fresult] "r"(&(floatresult))
);
}
else
{
__asm__ __volatile__(
"str d0,[%[fresult]] \n\t"
:"=m"(doubleresult)
: [fresult] "r"(&(doubleresult))
);
}
#else
if(resultsize <= 4)
floatresult = *(float*)&intresult;
else
doubleresult = *(double*)&intresult;
#endif
}
#else
// Unknown architecture
UNIMPLEMENTED;
Expand Down
10 changes: 2 additions & 8 deletions ecl/hql/hqlstack.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,8 @@
#define ALIGN_USES_ELEMENTSIZE
#define REGSIZE 8
#define REGPARAMS 8
#define ODD_STACK_ALIGNMENT
#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) \
&& defined(__ARM_EABI__) && !defined(__ARM_PCS_VFP) && !defined(__ARM_PCS)
#error "Can't identify floating point calling conventions.\nPlease ensure that your toolchain defines __ARM_PCS or __ARM_PCS_VFP."
#endif
#if defined(__ARM_PCS_VFP)
#define MAXFPREGS 8 // d0-d7
#endif
#define MAXFPREGS 8
#define EVEN_STACK_ALIGNMENT
#elif defined (_ARCH_ARM32_)
#define ALIGNMENT 4
#define ALIGN_USES_ELEMENTSIZE
Expand Down
48 changes: 47 additions & 1 deletion plugins/exampleplugin/exampleplugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ ECL_EXAMPLE_PLUGIN_API bool getECLPluginDefinition(ECLPluginDefinitionBlock *pb)

pb->magicVersion = PLUGIN_VERSION;
pb->version = EXAMPLE_PLUGIN_VERSION;
pb->moduleName = "lib_redis";
pb->moduleName = "lib_exampleplugin";
pb->ECL = NULL;
pb->flags = PLUGIN_IMPLICIT_MODULE;
pb->description = "ECL plugin library for BLAH BLAH BLAH";
Expand All @@ -64,4 +64,50 @@ ECL_EXAMPLE_PLUGIN_API void ECL_EXAMPLE_PLUGIN_CALL func2 (ICodeContext * _ctx,
return;
}

ECL_EXAMPLE_PLUGIN_API void ECL_EXAMPLE_PLUGIN_CALL test1(size32_t & returnLength, char * & returnValue,
uint8_t p1, uint16_t p2, uint32_t p3, __uint64 p4,
char p5, int16_t p6, int32_t p7, __int64 p8, __uint64 p9, __uint64 p10)
{
VStringBuffer buffer("%u %u %u %llu %d %d %d %lld %llu %llu", p1, p2, p3, p4, p5, p6, p7, p8, p9, p10);
returnLength = buffer.length();
returnValue = buffer.detach();
return;
}

ECL_EXAMPLE_PLUGIN_API void ECL_EXAMPLE_PLUGIN_CALL test2(size32_t & returnLength, char * & returnValue,
float p1, float p2, float p3, float p4,
double p5, double p6, double p7, double p8)
{
VStringBuffer buffer("%.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f", p1, p2, p3, p4, p5, p6, p7, p8);
returnLength = buffer.length();
returnValue = buffer.detach();
return;
}

ECL_EXAMPLE_PLUGIN_API void ECL_EXAMPLE_PLUGIN_CALL test3(size32_t & returnLength, char * & returnValue,
uint8_t p1, uint16_t p2, uint32_t p3, __uint64 p4,
char p5, int16_t p6, int32_t p7, __int64 p8, __uint64 p9, __uint64 p10,
float r1, float r2, float r3, float r4,
double r5, double r6, double r7, double r8)
{
VStringBuffer buffer("%u %u %u %llu %d %d %d %lld %llu %llu", p1, p2, p3, p4, p5, p6, p7, p8, p9, p10);
buffer.appendf(" %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f", r1, r2, r3, r4, r5, r6, r7, r8);
returnLength = buffer.length();
returnValue = buffer.detach();
return;
}

ECL_EXAMPLE_PLUGIN_API void ECL_EXAMPLE_PLUGIN_CALL test4(size32_t & returnLength, char * & returnValue,
size32_t slen, const char *s,
const char *s10,
const char *v,
const char *v10)
{
VStringBuffer buffer("%.*s,%.10s,%s,%s", slen, s, s10, v, v10);
returnLength = buffer.length();
returnValue = buffer.detach();
return;
}


}//close namespace
12 changes: 12 additions & 0 deletions plugins/exampleplugin/lib_exampleplugin.ecllib
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,18 @@
EXPORT exampleplugin := SERVICE : plugin('exampleplugin'), namespace('ExamplePlugin')
UNSIGNED4 func1(CONST VARSTRING param1, CONST VARSTRING param2, UNSIGNED4 param3) : cpp,action,context,time,entrypoint='func1';
STRING func2(CONST VARSTRING param1, CONST VARSTRING param2, CONST STRING param3) : cpp,action,context,fold,entrypoint='func2';
// These are used for testing compile-time constant-folding, as well as illustrating how different parameters are passed
// Passing multiple integer parameters of different sizes
STRING test1(UNSIGNED1 p1, UNSIGNED2 p2, UNSIGNED4 p3, UNSIGNED8 p4, INTEGER1 p5, INTEGER2 p6, INTEGER4 p7, INTEGER8 p8, UNSIGNED p9, UNSIGNED p10) : cpp,fold,entrypoint='test1';
// Passing multiple floating point parameters of different sizes
// Note that more than 8 FP parameters cannot be folded
STRING test2(REAL4 p1, REAL4 p2, REAL4 p3, REAL4 p4, REAL8 p5, REAL8 p6, REAL8 p7, REAL8 p8) : cpp,fold,entrypoint='test2';
// Passing a mix of real and integer
STRING test3(
UNSIGNED1 p1, UNSIGNED2 p2, UNSIGNED4 p3, UNSIGNED8 p4, INTEGER1 p5, INTEGER2 p6, INTEGER4 p7, INTEGER8 p8, UNSIGNED p9, UNSIGNED p10,
REAL4 r1, REAL4 r2, REAL4 r3, REAL4 r4, REAL8 r5, REAL8 r6, REAL8 r7, REAL8 r8) : cpp,fold,entrypoint='test3';
// Passing strings - variable, fixed, zero terminated
STRING test4(STRING s, STRING10 s10, VARSTRING v, VARSTRING10 v10) : cpp,fold,entrypoint='test4';
END;

EXPORT Wrapper(VARSTRING param1, VARSTRING param2 = '') := MODULE
Expand Down
6 changes: 6 additions & 0 deletions testing/regress/ecl/testfold.ecl
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import lib_exampleplugin;

s := service : fold,library('eclrtl')
integer4 rtlCompareVStrVStr(const varstring a, const varstring b) : pure,CPP;
END;

ASSERT(s.rtlCompareVStrVStr('1','1')=0, CONST);
ASSERT(s.rtlCompareVStrVStr('1','0')>0, CONST);
ASSERT(s.rtlCompareVStrVStr('1','2')<0, CONST);
ASSERT(exampleplugin.test1(1,2,3,4,5,6,7,8,9,10)= '1 2 3 4 5 6 7 8 9 10', CONST);
ASSERT(exampleplugin.test2(1,2,3,4,5,6,7,8)= '1.000 2.000 3.000 4.000 5.000 6.000 7.000 8.000', CONST);
ASSERT(exampleplugin.test3(1,2,3,4,5,6,7,8,9,10,101,102,103,104,105,106,107,108) = '1 2 3 4 5 6 7 8 9 10 101.000 102.000 103.000 104.000 105.000 106.000 107.000 108.000', CONST);
ASSERT(exampleplugin.test4('123', '456', '789', '101') = '123,456 ,789,101', CONST);
OUTPUT('ok');

0 comments on commit 1284138

Please sign in to comment.