Merge pull request #18940 from richardkchapman/arm64

HPCC-32171 ARM64 constant-folding support Reviewed-by: Gordon Smith <[email protected]> Reviewed-by: Gavin Halliday <[email protected]> Merged-by: Gavin Halliday <[email protected]>
hpcc-systems · Aug 28, 2024 · 1284138 · 1284138
2 parents 9505b2d + 8650bf3
commit 1284138
Show file tree

Hide file tree

Showing 7 changed files with 210 additions and 11 deletions.
diff --git a/cmake_modules/vcpkg.cmake b/cmake_modules/vcpkg.cmake
@@ -12,8 +12,13 @@ if(WIN32)
     set(VCPKG_HOST_TRIPLET "x64-windows" CACHE STRING "host triplet")
     set(VCPKG_TARGET_TRIPLET "x64-windows" CACHE STRING "target triplet")
 elseif(APPLE)
+  if (CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
+    set(VCPKG_HOST_TRIPLET "arm64-osx" CACHE STRING "host triplet")
+    set(VCPKG_TARGET_TRIPLET "arm64-osx" CACHE STRING "target triplet")
+  else()
     set(VCPKG_HOST_TRIPLET "x64-osx" CACHE STRING "host triplet")
     set(VCPKG_TARGET_TRIPLET "x64-osx" CACHE STRING "target triplet")
+  endif()
 elseif(UNIX)
     set(VCPKG_HOST_TRIPLET "x64-linux-dynamic" CACHE STRING "host triplet")
     set(VCPKG_TARGET_TRIPLET "x64-linux-dynamic" CACHE STRING "target triplet")

diff --git a/common/thorhelper/thorcommon.hpp b/common/thorhelper/thorcommon.hpp
@@ -362,8 +362,9 @@ class BlockedActivityTimer
     }
 };
 #else
-struct ActivityTimer
+class ActivityTimer
 {
+public:
     inline ActivityTimer(ActivityTimeAccumulator &_accumulator, const bool _enabled) { }
 };
 struct SimpleActivityTimer

diff --git a/ecl/hql/hqlfold.cpp b/ecl/hql/hqlfold.cpp
@@ -1248,7 +1248,142 @@ IValue * doFoldExternalCall(IHqlExpression* expr, unsigned foldOptions, const ch
         }
  #elif defined(_ARCH_ARM64_)
         // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055c/IHI0055C_beta_aapcs64.pdf
-        UNIMPLEMENTED;
+  #ifdef MAXFPREGS
+        void * floatstack = fstack.getFloatMem();
+        if (floatstack) {
+            unsigned * floatSizes = fstack.getFloatSizes();
+           __asm__ __volatile__ (
+           ".doparm0: \n\t"
+               "ldr  w0,[%[sizes],#0] \n\t"
+               "cmp  w0, #4 \n\t"
+               "blt  9f \n\t"
+               "beq  0f \n\t"
+               "ldr  d0,[%[vals], #0] \n\t"
+               "b 1f \n\t"
+           "0: \n\t"
+               "ldr  s0,[%[vals], #0] \n\t"
+           "1: \n\t"
+               "ldr  w0,[%[sizes],#4] \n\t"
+               "cmp  w0, #4 \n\t"
+               "blt  9f \n\t"
+               "beq  0f \n\t"
+               "ldr  d1,[%[vals], #8] \n\t"
+               "b 1f \n\t"
+           "0: \n\t"
+               "ldr  s1,[%[vals], #8] \n\t"
+           "1: \n\t"
+               "ldr  w0,[%[sizes],#8] \n\t"
+               "cmp  w0, #4 \n\t"
+               "blt  9f \n\t"
+               "beq  0f \n\t"
+               "ldr  d2,[%[vals], #16] \n\t"
+               "b 1f \n\t"
+           "0: \n\t"
+               "ldr  s2,[%[vals], #16] \n\t"
+           "1: \n\t"
+               "ldr  w0,[%[sizes],#12] \n\t"
+               "cmp  w0, #4 \n\t"
+               "blt  9f \n\t"
+               "beq  0f \n\t"
+               "ldr  d3,[%[vals], #24] \n\t"
+               "b 1f \n\t"
+           "0: \n\t"
+               "ldr  s3,[%[vals], #24] \n\t"
+           "1: \n\t"
+               "ldr  w0,[%[sizes],#16] \n\t"
+               "cmp  w0, #4 \n\t"
+               "blt  9f \n\t"
+               "beq  0f \n\t"
+               "ldr  d4,[%[vals], #32] \n\t"
+               "b 1f \n\t"
+           "0: \n\t"
+               "ldr  s4,[%[vals], #32] \n\t"
+           "1: \n\t"
+               "ldr  w0,[%[sizes],#20] \n\t"
+               "cmp  w0, #4 \n\t"
+               "blt  9f \n\t"
+               "beq  0f \n\t"
+               "ldr  d5,[%[vals], #40] \n\t"
+               "b 1f \n\t"
+           "0: \n\t"
+               "ldr  s5, [%[vals], #40] \n\t"
+           "1: \n\t"
+               "ldr  w0,[%[sizes],#24] \n\t"
+               "cmp  w0, #4 \n\t"
+               "blt  9f \n\t"
+               "beq  0f \n\t"
+               "ldr  d6,[%[vals], #48] \n\t"
+               "b 1f \n\t"
+           "0: \n\t"
+               "ldr  s6,[%[vals], #48] \n\t"
+           "1: \n\t"
+               "ldr  w0,[%[sizes],#28] \n\t"
+               "cmp  w0, #4 \n\t"
+               "blt  9f \n\t"
+               "beq  0f \n\t"
+               "ldr  d7,[%[vals], #56] \n\t"
+               "b 9f \n\t"
+           "0: \n\t"
+               "ldr  s7,[%[vals], #56] \n\t"
+           "9: \n\t"
+               "nop \n\t"
+            :
+            : [vals] "r"(floatstack), [sizes] "r"(floatSizes)
+            : "r0"
+           );
+        }
+  #endif
+        assertex((len & 15) == 0);                                              // Stack must always be 16-byte aligned
+        register unsigned __int64 _int64result asm("x0");                       // Specific register for result
+        register unsigned __int64 _len asm("x1") = len;
+        register unsigned __int64 _poplen asm("x19") = len-REGPARAMS*REGSIZE;   // Needs to survive the call
+        register void *_fh asm("x8") = fh;                                      // Needs to survive until the call
+        __asm__ __volatile__ (
+            "sub sp, sp, %[_len] \n\t"        // Make space on stack
+            "mov x2, sp \n\t"                 // r2 = destination for loop
+            ".repLoop: \n\t"
+            "ldrb w3, [%[strbuf]], #1 \n\t"   // copy a byte from src array to r3
+            "strb w3, [x2], #1 \n\t"          // and then from r3 onto stack
+            "subs %[_len], %[_len], #1 \n\t"  // decrement and repeat
+            "bne .repLoop \n\t"
+            "ldp x0, x1, [sp] \n\t"
+            "ldp x2, x3, [sp, #16] \n\t"
+            "ldp x4, x5, [sp, #32] \n\t"
+            "ldp x6, x7, [sp, #48] \n\t"
+            "add sp, sp, #64 \n\t"            // first 8 parameters go in registers
+            "blr %[fh] \n\t"                  // make the call
+            "add sp, sp, %[_poplen] \n\t"     // Restore stack pointer (note, have already popped 8 registers, so poplen is len - 64)
+            : "=r"(_int64result)
+            : [_len] "r"(_len), [_poplen] "r"(_poplen), [strbuf] "r"(strbuf), [fh] "r"(_fh)
+            : "x2","x3","x4","x5","x6","x7","lr"                  // function we call may corrupt lr
+            );
+        int64result = _int64result;
+        if (isRealvalue)
+        {
+  #ifdef MAXFPREGS
+            if(resultsize <= 4)
+            {
+                __asm__  __volatile__(
+                    "str  s0,[%[fresult]] \n\t"
+                    :"=m"(floatresult)
+                    : [fresult] "r"(&(floatresult))
+                );
+            }
+            else
+            {
+                __asm__  __volatile__(
+                    "str  d0,[%[fresult]] \n\t"
+                    :"=m"(doubleresult)
+                    : [fresult] "r"(&(doubleresult))
+                );
+            }
+  #else
+            if(resultsize <= 4)
+                floatresult = *(float*)&intresult;
+            else
+                doubleresult = *(double*)&intresult;
+  #endif
+        }
  #else
         // Unknown architecture
         UNIMPLEMENTED;

diff --git a/ecl/hql/hqlstack.hpp b/ecl/hql/hqlstack.hpp
@@ -58,14 +58,8 @@
  #define ALIGN_USES_ELEMENTSIZE
  #define REGSIZE 8
  #define REGPARAMS 8
- #define ODD_STACK_ALIGNMENT
- #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) \
-     && defined(__ARM_EABI__) && !defined(__ARM_PCS_VFP) && !defined(__ARM_PCS)
-  #error "Can't identify floating point calling conventions.\nPlease ensure that your toolchain defines __ARM_PCS or __ARM_PCS_VFP."
- #endif
- #if defined(__ARM_PCS_VFP)
-  #define MAXFPREGS 8 // d0-d7
- #endif
+ #define MAXFPREGS 8
+ #define EVEN_STACK_ALIGNMENT
 #elif defined (_ARCH_ARM32_)
  #define ALIGNMENT 4
  #define ALIGN_USES_ELEMENTSIZE

diff --git a/plugins/exampleplugin/exampleplugin.cpp b/plugins/exampleplugin/exampleplugin.cpp
@@ -37,7 +37,7 @@ ECL_EXAMPLE_PLUGIN_API bool getECLPluginDefinition(ECLPluginDefinitionBlock *pb)
 
     pb->magicVersion = PLUGIN_VERSION;
     pb->version = EXAMPLE_PLUGIN_VERSION;
-    pb->moduleName = "lib_redis";
+    pb->moduleName = "lib_exampleplugin";
     pb->ECL = NULL;
     pb->flags = PLUGIN_IMPLICIT_MODULE;
     pb->description = "ECL plugin library for BLAH BLAH BLAH";
@@ -64,4 +64,50 @@ ECL_EXAMPLE_PLUGIN_API void ECL_EXAMPLE_PLUGIN_CALL func2 (ICodeContext * _ctx,
     return;
 }
 
+ECL_EXAMPLE_PLUGIN_API void ECL_EXAMPLE_PLUGIN_CALL test1(size32_t & returnLength, char * & returnValue,
+    uint8_t p1, uint16_t p2, uint32_t p3, __uint64 p4, 
+    char p5, int16_t p6, int32_t p7, __int64 p8, __uint64 p9, __uint64 p10)
+{
+    VStringBuffer buffer("%u %u %u %llu %d %d %d %lld %llu %llu", p1, p2, p3, p4, p5, p6, p7, p8, p9, p10);
+    returnLength = buffer.length();
+    returnValue = buffer.detach();
+    return;
+}
+
+ECL_EXAMPLE_PLUGIN_API void ECL_EXAMPLE_PLUGIN_CALL test2(size32_t & returnLength, char * & returnValue,
+    float p1, float p2, float p3, float p4, 
+    double p5, double p6, double p7, double p8)
+{
+    VStringBuffer buffer("%.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f", p1, p2, p3, p4, p5, p6, p7, p8);
+    returnLength = buffer.length();
+    returnValue = buffer.detach();
+    return;
+}
+
+ECL_EXAMPLE_PLUGIN_API void ECL_EXAMPLE_PLUGIN_CALL test3(size32_t & returnLength, char * & returnValue,
+    uint8_t p1, uint16_t p2, uint32_t p3, __uint64 p4, 
+    char p5, int16_t p6, int32_t p7, __int64 p8, __uint64 p9, __uint64 p10,
+    float r1, float r2, float r3, float r4, 
+    double r5, double r6, double r7, double r8)
+{
+    VStringBuffer buffer("%u %u %u %llu %d %d %d %lld %llu %llu", p1, p2, p3, p4, p5, p6, p7, p8, p9, p10);
+    buffer.appendf(" %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f", r1, r2, r3, r4, r5, r6, r7, r8);
+    returnLength = buffer.length();
+    returnValue = buffer.detach();
+    return;
+}
+
+ECL_EXAMPLE_PLUGIN_API void ECL_EXAMPLE_PLUGIN_CALL test4(size32_t & returnLength, char * & returnValue,
+    size32_t slen, const char *s,
+    const char *s10,
+    const char *v,
+    const char *v10)
+{
+    VStringBuffer buffer("%.*s,%.10s,%s,%s", slen, s, s10, v, v10);
+    returnLength = buffer.length();
+    returnValue = buffer.detach();
+    return;
+}
+
+
 }//close namespace
diff --git a/plugins/exampleplugin/lib_exampleplugin.ecllib b/plugins/exampleplugin/lib_exampleplugin.ecllib
@@ -19,6 +19,18 @@
 EXPORT exampleplugin := SERVICE : plugin('exampleplugin'), namespace('ExamplePlugin')
   UNSIGNED4 func1(CONST VARSTRING param1, CONST VARSTRING param2, UNSIGNED4 param3) : cpp,action,context,time,entrypoint='func1';
   STRING func2(CONST VARSTRING param1, CONST VARSTRING param2, CONST STRING param3) : cpp,action,context,fold,entrypoint='func2';
+  // These are used for testing compile-time constant-folding, as well as illustrating how different parameters are passed
+  // Passing multiple integer parameters of different sizes
+  STRING test1(UNSIGNED1 p1, UNSIGNED2 p2, UNSIGNED4 p3, UNSIGNED8 p4, INTEGER1 p5, INTEGER2 p6, INTEGER4 p7, INTEGER8 p8, UNSIGNED p9, UNSIGNED p10) : cpp,fold,entrypoint='test1';
+  // Passing multiple floating point parameters of different sizes
+  // Note that more than 8 FP parameters cannot be folded
+  STRING test2(REAL4 p1, REAL4 p2, REAL4 p3, REAL4 p4, REAL8 p5, REAL8 p6, REAL8 p7, REAL8 p8) : cpp,fold,entrypoint='test2';
+  // Passing a mix of real and integer
+  STRING test3(
+     UNSIGNED1 p1, UNSIGNED2 p2, UNSIGNED4 p3, UNSIGNED8 p4, INTEGER1 p5, INTEGER2 p6, INTEGER4 p7, INTEGER8 p8, UNSIGNED p9, UNSIGNED p10,
+     REAL4 r1, REAL4 r2, REAL4 r3, REAL4 r4, REAL8 r5, REAL8 r6, REAL8 r7, REAL8 r8) : cpp,fold,entrypoint='test3';
+  // Passing strings -  variable, fixed, zero terminated
+  STRING test4(STRING s, STRING10 s10, VARSTRING v, VARSTRING10 v10) : cpp,fold,entrypoint='test4';
 END;
 
 EXPORT Wrapper(VARSTRING param1, VARSTRING  param2 = '') := MODULE

diff --git a/testing/regress/ecl/testfold.ecl b/testing/regress/ecl/testfold.ecl
@@ -1,8 +1,14 @@
+import lib_exampleplugin;
+
 s := service : fold,library('eclrtl')
   integer4 rtlCompareVStrVStr(const varstring a, const varstring b) : pure,CPP;
 END;
 
 ASSERT(s.rtlCompareVStrVStr('1','1')=0, CONST);
 ASSERT(s.rtlCompareVStrVStr('1','0')>0, CONST);
 ASSERT(s.rtlCompareVStrVStr('1','2')<0, CONST);
+ASSERT(exampleplugin.test1(1,2,3,4,5,6,7,8,9,10)= '1 2 3 4 5 6 7 8 9 10', CONST);
+ASSERT(exampleplugin.test2(1,2,3,4,5,6,7,8)= '1.000 2.000 3.000 4.000 5.000 6.000 7.000 8.000', CONST);
+ASSERT(exampleplugin.test3(1,2,3,4,5,6,7,8,9,10,101,102,103,104,105,106,107,108) = '1 2 3 4 5 6 7 8 9 10 101.000 102.000 103.000 104.000 105.000 106.000 107.000 108.000', CONST);
+ASSERT(exampleplugin.test4('123', '456', '789', '101') = '123,456       ,789,101', CONST);
 OUTPUT('ok');