diff --git a/README.md b/README.md index 7917affc..fc4e9f22 100644 --- a/README.md +++ b/README.md @@ -27,14 +27,20 @@ YASK contains a domain-specific compiler to convert stencil-equation specificati for multi-socket and multi-node operation or Intel(R) Parallel Studio XE Composer Edition for C++ Linux for single-socket only - (2018 or later; 2019 update 5 (2019.0.5 or 2019u5) or later recommended). + (2018 or later; 2019 update 5 (2019.0.5) recommended; + 2019.1.x has an issue that prevents compilation). * There was an issue in Intel(R) MPI versions 2019u1 and 2019u2 that caused the application to crash when allocating very large shared-memory (shm) regions, so those versions are not recommended when using the `-use_shm` feature. This issue was resolved in MPI version 2019u3. + * There is an issue in the Intel C++ compiler 2019.1.x that causes + an internal error when building YASK kernels. + Use 2019.0.5 to work around until a patch is released. * If you are using g++ version 8.x or later, Intel(R) C++ version 2019 is required. + * Building a YASK kernel with clang is possible; however, + SIMD operations for functions such as sin() is not supported. * Building a YASK kernel with the Gnu C++ compiler is possible. Limited testing with g++ 8.2.0 shows the "iso3dfd" kernel runs about 30% slower compared to the same kernel built with diff --git a/src/common/common_utils.cpp b/src/common/common_utils.cpp index 36863f30..d3fbe57a 100644 --- a/src/common/common_utils.cpp +++ b/src/common/common_utils.cpp @@ -43,7 +43,7 @@ namespace yask { // for numbers above 9 (at least up to 99). // Format: "major.minor.patch". - const string version = "3.03.00"; + const string version = "3.03.01"; string yask_get_version_string() { return version; diff --git a/src/kernel/lib/alloc.cpp b/src/kernel/lib/alloc.cpp index 59c0b627..b341a04b 100644 --- a/src/kernel/lib/alloc.cpp +++ b/src/kernel/lib/alloc.cpp @@ -35,7 +35,7 @@ namespace yask { static inline int getnode() { #ifdef SYS_getcpu int node, status; - status = syscall(SYS_getcpu, NULL, &node, NULL); + status = syscall(SYS_getcpu, NULL, &node, NULL); return (status == -1) ? status : node; #else return -1; // unavailable diff --git a/src/kernel/lib/realv.hpp b/src/kernel/lib/realv.hpp index 2eee84bc..df6866be 100644 --- a/src/kernel/lib/realv.hpp +++ b/src/kernel/lib/realv.hpp @@ -218,9 +218,9 @@ namespace yask { // copy whole vector. ALWAYS_INLINE real_vec_t& operator=(const real_vec_t& rhs) { #ifdef NO_INTRINSICS - REAL_VEC_LOOP(i) u.r[i] = rhs[i]; + REAL_VEC_LOOP(i) u.ci[i] = rhs.u.ci[i]; #else - u.mr = rhs.u.mr; + u.mi = rhs.u.mi; #endif return *this; } @@ -233,6 +233,10 @@ namespace yask { u.mr = rhs; return *this; } + ALWAYS_INLINE real_vec_t& operator=(const isimd_t& rhs) { + u.mi = rhs; + return *this; + } #endif // assignment: single value broadcast. @@ -360,38 +364,37 @@ namespace yask { } // less-than comparator. + // uses int comparison for reliable results, even w/NaNs. bool operator<(const real_vec_t& rhs) const { for (int j = 0; j < VLEN; j++) { - if (u.r[j] < rhs.u.r[j]) + if (u.ci[j] < rhs.u.ci[j]) return true; - else if (u.r[j] > rhs.u.r[j]) + else if (u.ci[j] > rhs.u.ci[j]) return false; } return false; } // greater-than comparator. + // uses int comparison for reliable results, even w/NaNs. bool operator>(const real_vec_t& rhs) const { for (int j = 0; j < VLEN; j++) { - if (u.r[j] > rhs.u.r[j]) + if (u.ci[j] > rhs.u.ci[j]) return true; - else if (u.r[j] < rhs.u.r[j]) + else if (u.ci[j] < rhs.u.ci[j]) return false; } return false; } // equal-to comparator. + // uses int comparison for reliable results, even w/NaNs. ALWAYS_INLINE bool operator==(const real_vec_t& rhs) const { - #ifdef NO_SIMD_COMPARE for (int j = 0; j < VLEN; j++) { - if (u.r[j] != rhs.u.r[j]) + if (u.ci[j] != rhs.u.ci[j]) return false; } return true; - #else - return u.r == rhs.u.r; - #endif } // not-equal-to comparator.