From 33be1b17ea0ec580b8e62dc626863a378960c22c Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Thu, 16 May 2024 13:57:48 +0100 Subject: [PATCH 01/13] Update oclmath to add AArch64 support. This replaces fpcontrol.h and rounding_mode.cpp/.h with the versions from OpenCL CTS. The old version included in SYCL-CTS did not support AArch64, and although it would be possible to just make the smallest possible change to get that to work, it seems like it will be easier for future changes for the OpenCL and SYCL versions of these files to be the same. --- oclmath/fpcontrol.h | 172 +++++++++------ oclmath/rounding_mode.cpp | 428 ++++++++++++++++++++------------------ oclmath/rounding_mode.h | 97 +++++---- 3 files changed, 381 insertions(+), 316 deletions(-) diff --git a/oclmath/fpcontrol.h b/oclmath/fpcontrol.h index d4acf1849..cf9e9569d 100644 --- a/oclmath/fpcontrol.h +++ b/oclmath/fpcontrol.h @@ -1,75 +1,125 @@ - +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// #ifndef _fpcontrol_h #define _fpcontrol_h -// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware -// to FTZ mode if the device hardware is running in that mode. We have explored all other options short of writing correctly rounded operations -// in integer code, and have found this is the only way to correctly verify operation. +#include + +// In order to get tests for correctly rounded operations (e.g. multiply) to +// work properly we need to be able to set the reference hardware to FTZ mode if +// the device hardware is running in that mode. We have explored all other +// options short of writing correctly rounded operations in integer code, and +// have found this is the only way to correctly verify operation. // -// Non-Apple implementations will need to provide their own implentation for these features. If the reference hardware and device are both -// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty. If the device is running in non-default -// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode. -#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__) - typedef int FPU_mode_type; -#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ ) - #include -#elif defined( __PPC__ ) - #include - extern __thread fpu_control_t fpu_control; -#endif - // Set the reference hardware floating point unit to FTZ mode - static inline void ForceFTZ( FPU_mode_type *mode ) - { -#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__) - *mode = _mm_getcsr(); - _mm_setcsr( *mode | 0x8040); -#elif defined( __PPC__ ) - *mode = fpu_control; - fpu_control |= _FPU_MASK_NI; -#elif defined ( __arm__ ) - unsigned fpscr; - __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr)); - *mode = fpscr; - __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24))); +// Non-Apple implementations will need to provide their own implentation for +// these features. If the reference hardware and device are both running in the +// same state (either FTZ or IEEE compliant modes) then these functions may be +// empty. If the device is running in non-default rounding mode (e.g. round +// toward zero), then these functions should also set the reference device into +// that rounding mode. +#if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__) || \ + defined(__MINGW32__) +#ifdef _MSC_VER +typedef int FPU_mode_type; #else - #error ForceFTZ needs an implentation +typedef int64_t FPU_mode_type; #endif - } - - // Disable the denorm flush to zero - static inline void DisableFTZ( FPU_mode_type *mode ) - { -#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__) - *mode = _mm_getcsr(); - _mm_setcsr( *mode & ~0x8040); -#elif defined( __PPC__ ) - *mode = fpu_control; - fpu_control &= ~_FPU_MASK_NI; -#elif defined ( __arm__ ) - unsigned fpscr; - __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr)); - *mode = fpscr; - __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24))); +#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) || \ + defined(__MINGW32__) +#include +#elif defined(__PPC__) +#include +extern __thread fpu_control_t fpu_control; +#elif defined(__mips__) +#include "mips/m32c1.h" +#endif +// Set the reference hardware floating point unit to FTZ mode +inline void ForceFTZ(FPU_mode_type* mode) { +#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) || \ + defined(__MINGW32__) + *mode = _mm_getcsr(); + _mm_setcsr(*mode | 0x8040); +#elif defined(__PPC__) + *mode = fpu_control; + fpu_control |= _FPU_MASK_NI; +#elif defined(__arm__) + unsigned fpscr; + __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr)); + *mode = fpscr; + __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24))); + // Add 64 bit support +#elif defined(__aarch64__) + uint64_t fpscr; + __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr)); + *mode = fpscr; + __asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24))); +#elif defined(__mips__) + fpa_bissr(FPA_CSR_FS); +#else +#error ForceFTZ needs an implentation +#endif +} + +// Disable the denorm flush to zero +inline void DisableFTZ(FPU_mode_type* mode) { +#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) || \ + defined(__MINGW32__) + *mode = _mm_getcsr(); + _mm_setcsr(*mode & ~0x8040); +#elif defined(__PPC__) + *mode = fpu_control; + fpu_control &= ~_FPU_MASK_NI; +#elif defined(__arm__) + unsigned fpscr; + __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr)); + *mode = fpscr; + __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24))); + // Add 64 bit support +#elif defined(__aarch64__) + uint64_t fpscr; + __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr)); + *mode = fpscr; + __asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24))); +#elif defined(__mips__) + fpa_bicsr(FPA_CSR_FS); #else #error DisableFTZ needs an implentation -#endif - } +#endif +} - // Restore the reference hardware to floating point state indicated by *mode - static inline void RestoreFPState( FPU_mode_type *mode ) - { -#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__) - _mm_setcsr( *mode ); -#elif defined( __PPC__) - fpu_control = *mode; -#elif defined (__arm__) - __asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode)); +// Restore the reference hardware to floating point state indicated by *mode +inline void RestoreFPState(FPU_mode_type* mode) { +#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) || \ + defined(__MINGW32__) + _mm_setcsr(*mode); +#elif defined(__PPC__) + fpu_control = *mode; +#elif defined(__arm__) + __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode)); + // Add 64 bit support +#elif defined(__aarch64__) + __asm__ volatile("msr fpcr, %0" ::"r"(*mode)); +#elif defined(__mips__) + // Mips runs by default with DAZ=1 FTZ=1 #else - #error RestoreFPState needs an implementation +#error RestoreFPState needs an implementation #endif - } +} #else - #error ForceFTZ and RestoreFPState need implentations +#error ForceFTZ and RestoreFPState need implentations #endif -#endif \ No newline at end of file +#endif diff --git a/oclmath/rounding_mode.cpp b/oclmath/rounding_mode.cpp index 399e40414..24e8ea999 100644 --- a/oclmath/rounding_mode.cpp +++ b/oclmath/rounding_mode.cpp @@ -1,238 +1,254 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "rounding_mode.h" -/****************************************************************** - // - // OpenCL Conformance Tests - // - // Copyright: (c) 2008-2013 by Apple Inc. All Rights Reserved. - // - ******************************************************************/ - +#if (defined(__arm__) || defined(__aarch64__)) +#define FPSCR_FZ (1 << 24) // Flush-To-Zero mode +#define FPSCR_ROUND_MASK (3 << 22) // Rounding mode: -#include "rounding_mode.h" +#define _ARM_FE_FTZ 0x1000000 +#define _ARM_FE_NFTZ 0x0 +#if defined(__aarch64__) +#define _FPU_GETCW(cw) __asm__("MRS %0,FPCR" : "=r"(cw)) +#define _FPU_SETCW(cw) __asm__("MSR FPCR,%0" : : "ri"(cw)) +#else +#define _FPU_GETCW(cw) __asm__("VMRS %0,FPSCR" : "=r"(cw)) +#define _FPU_SETCW(cw) __asm__("VMSR FPSCR,%0" : : "ri"(cw)) +#endif +#endif -#if !(defined(_WIN32) && defined(_MSC_VER)) -RoundingMode set_round( RoundingMode r, Type outType ) -{ - static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO }; - static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO }; - const int *p = int_rounds; - if( outType == kfloat || outType == kdouble ) - p = flt_rounds; - int oldRound = fegetround(); - fesetround( p[r] ); - - switch( oldRound ) - { - case FE_TONEAREST: - return kRoundToNearestEven; - case FE_UPWARD: - return kRoundUp; - case FE_DOWNWARD: - return kRoundDown; - case FE_TOWARDZERO: - return kRoundTowardZero; - default: - abort(); // ??! - } - return kDefaultRoundingMode; //never happens +#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) +#define _ARM_FE_TONEAREST 0x0 +#define _ARM_FE_UPWARD 0x400000 +#define _ARM_FE_DOWNWARD 0x800000 +#define _ARM_FE_TOWARDZERO 0xc00000 +RoundingMode set_round(RoundingMode r, Type outType) { + static const int flt_rounds[kRoundingModeCount] = { + _ARM_FE_TONEAREST, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, + _ARM_FE_TOWARDZERO}; + static const int int_rounds[kRoundingModeCount] = { + _ARM_FE_TOWARDZERO, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, + _ARM_FE_TOWARDZERO}; + const int* p = int_rounds; + if (outType == kfloat || outType == kdouble) p = flt_rounds; + + int64_t fpscr = 0; + RoundingMode oldRound = get_round(); + + _FPU_GETCW(fpscr); + _FPU_SETCW(p[r] | (fpscr & ~FPSCR_ROUND_MASK)); + + return oldRound; } -RoundingMode get_round( void ) -{ - int oldRound = fegetround(); - - switch( oldRound ) - { - case FE_TONEAREST: - return kRoundToNearestEven; - case FE_UPWARD: - return kRoundUp; - case FE_DOWNWARD: - return kRoundDown; - case FE_TOWARDZERO: - return kRoundTowardZero; - } - - return kDefaultRoundingMode; +RoundingMode get_round(void) { + int64_t fpscr; + int oldRound; + + _FPU_GETCW(fpscr); + oldRound = (fpscr & FPSCR_ROUND_MASK); + + switch (oldRound) { + case _ARM_FE_TONEAREST: + return kRoundToNearestEven; + case _ARM_FE_UPWARD: + return kRoundUp; + case _ARM_FE_DOWNWARD: + return kRoundDown; + case _ARM_FE_TOWARDZERO: + return kRoundTowardZero; + } + + return kDefaultRoundingMode; } -#elif defined( __arm__ ) && defined( __GNUC__ ) -#define _ARM_FE_TONEAREST 0x0 -#define _ARM_FE_UPWARD 0x400000 -#define _ARM_FE_DOWNWARD 0x800000 -#define _ARM_FE_TOWARDZERO 0xc00000 -#define _FPU_GETCW(cw) __asm__ ("VMRS %0,FPSCR" : "=r" (cw)) -#define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw)) -RoundingMode set_round( RoundingMode r, Type outType ) -{ - static const int flt_rounds[ kRoundingModeCount ] = { _ARM_FE_TONEAREST, - _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO }; - static const int int_rounds[ kRoundingModeCount ] = { _ARM_FE_TOWARDZERO, - _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO }; - const int *p = int_rounds; - if( outType == kfloat || outType == kdouble ) - p = flt_rounds; - int oldRound; - _FPU_GETCW(oldRound); - _FPU_SETCW( p[r] ); - - switch( oldRound ) - { - case _ARM_FE_TONEAREST: - return kRoundToNearestEven; - case _ARM_FE_UPWARD: - return kRoundUp; - case _ARM_FE_DOWNWARD: - return kRoundDown; - case _ARM_FE_TOWARDZERO: - return kRoundTowardZero; - default: - abort(); // ??! - } - return kDefaultRoundingMode; //never happens +#elif !(defined(_WIN32) && defined(_MSC_VER)) +RoundingMode set_round(RoundingMode r, Type outType) { + static const int flt_rounds[kRoundingModeCount] = { + FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO}; + static const int int_rounds[kRoundingModeCount] = { + FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO}; + const int* p = int_rounds; + if (outType == kfloat || outType == kdouble) p = flt_rounds; + int oldRound = fegetround(); + fesetround(p[r]); + + switch (oldRound) { + case FE_TONEAREST: + return kRoundToNearestEven; + case FE_UPWARD: + return kRoundUp; + case FE_DOWNWARD: + return kRoundDown; + case FE_TOWARDZERO: + return kRoundTowardZero; + default: + abort(); // ??! + } + return kDefaultRoundingMode; // never happens } -RoundingMode get_round( void ) -{ - int oldRound; - _FPU_GETCW(oldRound); - - switch( oldRound ) - { - case _ARM_FE_TONEAREST: - return kRoundToNearestEven; - case _ARM_FE_UPWARD: - return kRoundUp; - case _ARM_FE_DOWNWARD: - return kRoundDown; - case _ARM_FE_TOWARDZERO: - return kRoundTowardZero; - } - - return kDefaultRoundingMode; +RoundingMode get_round(void) { + int oldRound = fegetround(); + + switch (oldRound) { + case FE_TONEAREST: + return kRoundToNearestEven; + case FE_UPWARD: + return kRoundUp; + case FE_DOWNWARD: + return kRoundDown; + case FE_TOWARDZERO: + return kRoundTowardZero; + } + + return kDefaultRoundingMode; } - + #else -RoundingMode set_round( RoundingMode r, Type outType ) -{ - static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP }; - static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP }; - const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds; - unsigned int oldRound; - - int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound - if (err) { -// vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__); - return kDefaultRoundingMode; //what else never happens - } - - oldRound &= _MCW_RC; - - RoundingMode old = - (oldRound == _RC_NEAR)? kRoundToNearestEven : - (oldRound == _RC_UP)? kRoundUp : - (oldRound == _RC_DOWN)? kRoundDown : - (oldRound == _RC_CHOP)? kRoundTowardZero: - kDefaultRoundingMode; - - _controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode - return old; //returning old rounding mode +RoundingMode set_round(RoundingMode r, Type outType) { + static const int flt_rounds[kRoundingModeCount] = {_RC_NEAR, _RC_NEAR, _RC_UP, + _RC_DOWN, _RC_CHOP}; + static const int int_rounds[kRoundingModeCount] = {_RC_CHOP, _RC_NEAR, _RC_UP, + _RC_DOWN, _RC_CHOP}; + const int* p = + (outType == kfloat || outType == kdouble) ? flt_rounds : int_rounds; + unsigned int oldRound; + + int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound + if (err) { + vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, + __LINE__); + return kDefaultRoundingMode; // what else never happens + } + + oldRound &= _MCW_RC; + + RoundingMode old = (oldRound == _RC_NEAR) ? kRoundToNearestEven + : (oldRound == _RC_UP) ? kRoundUp + : (oldRound == _RC_DOWN) ? kRoundDown + : (oldRound == _RC_CHOP) ? kRoundTowardZero + : kDefaultRoundingMode; + + _controlfp_s(&oldRound, p[r], _MCW_RC); // setting new rounding mode + return old; // returning old rounding mode } -RoundingMode get_round( void ) -{ - unsigned int oldRound; - - int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound - oldRound &= _MCW_RC; - return - (oldRound == _RC_NEAR)? kRoundToNearestEven : - (oldRound == _RC_UP)? kRoundUp : - (oldRound == _RC_DOWN)? kRoundDown : - (oldRound == _RC_CHOP)? kRoundTowardZero: - kDefaultRoundingMode; +RoundingMode get_round(void) { + unsigned int oldRound; + + int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound + oldRound &= _MCW_RC; + return (oldRound == _RC_NEAR) ? kRoundToNearestEven + : (oldRound == _RC_UP) ? kRoundUp + : (oldRound == _RC_DOWN) ? kRoundDown + : (oldRound == _RC_CHOP) ? kRoundTowardZero + : kDefaultRoundingMode; } #endif // -// FlushToZero() sets the host processor into ftz mode. It is intended to have a remote effect on the behavior of the code in -// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in -// software by testing against FLT_MIN or DBL_MIN in that file. +// FlushToZero() sets the host processor into ftz mode. It is intended to have +// a remote effect on the behavior of the code in basic_test_conversions.c. Some +// host processors may not support this mode, which case you'll need to do some +// clamping in software by testing against FLT_MIN or DBL_MIN in that file. // -// Note: IEEE-754 says conversions are basic operations. As such they do *NOT* have the behavior in section 7.5.3 of -// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic +// Note: IEEE-754 says conversions are basic operations. As such they do *NOT* +// have the behavior in section 7.5.3 of the OpenCL spec. They *ALWAYS* flush to +// zero for subnormal inputs or outputs when FTZ mode is on like other basic // operators do (e.g. add, subtract, multiply, divide, etc.) // -// Configuring hardware to FTZ mode varies by platform. -// CAUTION: Some C implementations may also fail to behave properly in this mode. +// Configuring hardware to FTZ mode varies by platform. +// CAUTION: Some C implementations may also fail to behave properly in this +// mode. // // On PowerPC, it is done by setting the FPSCR into non-IEEE mode. -// On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2 -// is used for floating point computation! If your OS uses x87, you'll need to figure out how -// to turn that off for the conversions code in basic_test_conversions.c so that they flush to -// zero properly. Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c -// in which case, these function are at liberty to do nothing. +// On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- +// provided that SSE/SSE2 +// is used for floating point computation! If your OS uses x87, you'll +// need to figure out how to turn that off for the conversions code in +// basic_test_conversions.c so that they flush to zero properly. +// Otherwise, you'll need to add appropriate software clamping to +// basic_test_conversions.c in which case, these function are at +// liberty to do nothing. // -#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32) - #include -#elif defined( __PPC__ ) - #include +#if defined(__i386__) || defined(__x86_64__) || defined(_WIN32) +#include +#elif defined(__PPC__) +#include +#elif defined(__mips__) +#include "mips/m32c1.h" #endif -void *FlushToZero( void ) -{ -#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32) - #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER) - union{ unsigned int i; void *p; }u = { _mm_getcsr() }; - _mm_setcsr( u.i | 0x8040 ); - return u.p; - #elif defined( __arm__ ) - #define _ARM_FE_FTZ 0x1000000 - #define _ARM_FE_NFTZ 0x0 - #define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw)) - - static const int ftz_modes[ kRoundingModeCount ] = { _ARM_FE_FTZ, _ARM_FE_NFTZ }; - const int *f = ftz_modes; - _FPU_SETCW( f[0] ); - return NULL; - #elif defined( __PPC__ ) - fpu_control_t flags = 0; - _FPU_GETCW(flags); - flags |= _FPU_MASK_NI; - _FPU_SETCW(flags); - return NULL; - #else - #error Unknown arch - #endif +void* FlushToZero(void) { +#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32) +#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) + union { + unsigned int i; + void* p; + } u = {_mm_getcsr()}; + _mm_setcsr(u.i | 0x8040); + return u.p; +#elif defined(__arm__) || defined(__aarch64__) + int64_t fpscr; + _FPU_GETCW(fpscr); + _FPU_SETCW(fpscr | FPSCR_FZ); + return NULL; +#elif defined(__PPC__) + fpu_control_t flags = 0; + _FPU_GETCW(flags); + flags |= _FPU_MASK_NI; + _FPU_SETCW(flags); + return NULL; +#elif defined(__mips__) + fpa_bissr(FPA_CSR_FS); + return NULL; #else - #error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system. +#error Unknown arch +#endif +#else +#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system. #endif } -// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p. -void UnFlushToZero( void *p) -{ -#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32) - #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER) - union{ void *p; int i; }u = { p }; - _mm_setcsr( u.i ); - #elif defined( __arm__ ) - #define _ARM_FE_FTZ 0x1000000 - #define _ARM_FE_NFTZ 0x0 - #define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw)) - - static const int ftz_modes[ kRoundingModeCount ] = { _ARM_FE_FTZ, _ARM_FE_NFTZ }; - const int *f = ftz_modes; - _FPU_SETCW( f[1] ); - #elif defined( __PPC__) - fpu_control_t flags = 0; - _FPU_GETCW(flags); - flags &= ~_FPU_MASK_NI; - _FPU_SETCW(flags); - #else - #error Unknown arch - #endif +// Undo the effects of FlushToZero above, restoring the host to default +// behavior, using the information passed in p. +void UnFlushToZero(void* p) { +#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32) +#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) + union { + void* p; + int i; + } u = {p}; + _mm_setcsr(u.i); +#elif defined(__arm__) || defined(__aarch64__) + int64_t fpscr; + _FPU_GETCW(fpscr); + _FPU_SETCW(fpscr & ~FPSCR_FZ); +#elif defined(__PPC__) + fpu_control_t flags = 0; + _FPU_GETCW(flags); + flags &= ~_FPU_MASK_NI; + _FPU_SETCW(flags); +#elif defined(__mips__) + fpa_bicsr(FPA_CSR_FS); +#else +#error Unknown arch +#endif #else - #error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system. +#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system. #endif } diff --git a/oclmath/rounding_mode.h b/oclmath/rounding_mode.h index e4cf14e97..38c9e4c38 100644 --- a/oclmath/rounding_mode.h +++ b/oclmath/rounding_mode.h @@ -1,58 +1,57 @@ - -/****************************************************************** - // - // OpenCL Conformance Tests - // - // Copyright: (c) 2008-2013 by Apple Inc. All Rights Reserved. - // - ******************************************************************/ - +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// #ifndef __ROUNDING_MODE_H__ #define __ROUNDING_MODE_H__ #include "compat.h" -typedef enum -{ - kDefaultRoundingMode = 0, - kRoundToNearestEven, - kRoundUp, - kRoundDown, - kRoundTowardZero, - - kRoundingModeCount -}RoundingMode; - -typedef enum -{ - kuchar = 0, - kchar = 1, - kushort = 2, - kshort = 3, - kuint = 4, - kint = 5, - kfloat = 6, - kdouble = 7, - kulong = 8, - klong = 9, - - //This goes last - kTypeCount -}Type; - -#ifdef __cplusplus -extern "C" { -#endif - -extern RoundingMode set_round( RoundingMode r, Type outType ); -extern RoundingMode get_round( void ); -extern void *FlushToZero( void ); -extern void UnFlushToZero( void *p); - -#ifdef __cplusplus -} +#if (defined(_WIN32) && defined(_MSC_VER)) +#include "errorHelpers.h" +#include "testHarness.h" #endif - +typedef enum { + kDefaultRoundingMode = 0, + kRoundToNearestEven, + kRoundUp, + kRoundDown, + kRoundTowardZero, + + kRoundingModeCount +} RoundingMode; + +typedef enum { + kuchar = 0, + kchar = 1, + kushort = 2, + kshort = 3, + kuint = 4, + kint = 5, + kfloat = 6, + kdouble = 7, + kulong = 8, + klong = 9, + + // This goes last + kTypeCount +} Type; + +extern RoundingMode set_round(RoundingMode r, Type outType); +extern RoundingMode get_round(void); +extern void* FlushToZero(void); +extern void UnFlushToZero(void* p); #endif /* __ROUNDING_MODE_H__ */ From 8866c74b04abbc2db24a46a78efc992b762632a6 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Sun, 6 Oct 2024 19:57:35 -0700 Subject: [PATCH 02/13] Implement tests for `sycl_ext_oneapi_default_context` extension Signed-off-by: Michael Aziz --- CMakeLists.txt | 4 + .../oneapi_default_context/CMakeLists.txt | 5 ++ .../default_context.cpp | 80 +++++++++++++++++++ 3 files changed, 89 insertions(+) create mode 100644 tests/extension/oneapi_default_context/CMakeLists.txt create mode 100644 tests/extension/oneapi_default_context/default_context.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 971c6c10c..fa62b227b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,6 +103,10 @@ add_cts_option(SYCL_CTS_ENABLE_EXT_ONEAPI_ENQUEUE_FUNCTIONS_TESTS "Enable extension oneAPI enqueue_functions tests" OFF FORCE_ON ${SYCL_CTS_ENABLE_EXT_ONEAPI_TESTS}) +add_cts_option(SYCL_CTS_ENABLE_EXT_ONEAPI_DEFAULT_CONTEXT_TESTS + "Enable extension oneAPI default_context tests" OFF + FORCE_ON ${SYCL_CTS_ENABLE_EXT_ONEAPI_TESTS}) + # TODO: Deprecated - remove add_cts_option(SYCL_CTS_ENABLE_VERBOSE_LOG "Enable debug-level logs (deprecated)" OFF) diff --git a/tests/extension/oneapi_default_context/CMakeLists.txt b/tests/extension/oneapi_default_context/CMakeLists.txt new file mode 100644 index 000000000..ee005a726 --- /dev/null +++ b/tests/extension/oneapi_default_context/CMakeLists.txt @@ -0,0 +1,5 @@ +if(SYCL_CTS_ENABLE_EXT_ONEAPI_DEFAULT_CONTEXT_TESTS) + file(GLOB test_cases_list *.cpp) + + add_cts_test(${test_cases_list}) +endif() diff --git a/tests/extension/oneapi_default_context/default_context.cpp b/tests/extension/oneapi_default_context/default_context.cpp new file mode 100644 index 000000000..7f83f77bb --- /dev/null +++ b/tests/extension/oneapi_default_context/default_context.cpp @@ -0,0 +1,80 @@ +/******************************************************************************* +// +// SYCL 2020 Conformance Test Suite +// +// Copyright (c) 2024 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +*******************************************************************************/ + +#include "../../common/common.h" + +namespace default_context::tests { + +#ifdef SYCL_EXT_ONEAPI_DEFAULT_CONTEXT + +static void testDefaultContext() { + sycl::platform platform{}; + sycl::context defaultContext = platform.ext_oneapi_get_default_context(); + CHECK(defaultContext.get_devices() == platform.get_devices()); +} + +static void testQueueConstructors() { + const sycl::property_list& propList = {}; + cts_async_handler asyncHandler; + const auto& deviceSelector = sycl::default_selector_v; + sycl::device syclDevice; + sycl::context syclContext; + sycl::context defaultContext = + sycl::platform{}.ext_oneapi_get_default_context(); + + // Check that a default-constructed context is not the default context. + CHECK(syclContext != defaultContext); + + // Default context constructor + CHECK(defaultContext == sycl::queue{propList}.get_context()); + CHECK(defaultContext == sycl::queue{asyncHandler, propList}.get_context()); + CHECK(defaultContext == sycl::queue{deviceSelector, propList}.get_context()); + CHECK(defaultContext == + sycl::queue{deviceSelector, asyncHandler, propList}.get_context()); + CHECK(defaultContext == sycl::queue{syclDevice, propList}.get_context()); + CHECK(defaultContext == + sycl::queue{syclDevice, asyncHandler, propList}.get_context()); + + // Non-default context constructors + CHECK(syclContext == + sycl::queue{syclContext, deviceSelector, propList}.get_context()); + CHECK(syclContext == + sycl::queue{syclContext, deviceSelector, asyncHandler, propList} + .get_context()); + CHECK(syclContext == + sycl::queue{syclContext, syclDevice, propList}.get_context()); + CHECK(syclContext == + sycl::queue{syclContext, syclDevice, asyncHandler, propList} + .get_context()); +} + +#endif + +TEST_CASE("Test case for \"Default Context\" extension", + "[oneapi_default_context") { +#ifndef SYCL_EXT_ONEAPI_DEFAULT_CONTEXT + SKIP("SYCL_EXT_ONEAPI_DEFAULT_CONTEXT is not defined"); +#else + testDefaultContext(); + testQueueConstructors(); +#endif +} + +} // namespace default_context::tests From 313a2847efc63246770b80209f2cb7b99bd3cf6e Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Sat, 12 Oct 2024 14:08:50 -0700 Subject: [PATCH 03/13] Change tests to test KHR extension Signed-off-by: Michael Aziz --- CMakeLists.txt | 9 ++++++--- .../CMakeLists.txt | 2 +- .../default_context.cpp | 14 +++++++------- 3 files changed, 14 insertions(+), 11 deletions(-) rename tests/extension/{oneapi_default_context => khr_default_context}/CMakeLists.txt (61%) rename tests/extension/{oneapi_default_context => khr_default_context}/default_context.cpp (88%) diff --git a/CMakeLists.txt b/CMakeLists.txt index fa62b227b..f8cf5aeaf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,6 +43,9 @@ add_cts_option(SYCL_CTS_ENABLE_DEPRECATED_FEATURES_TESTS add_cts_option(SYCL_CTS_ENABLE_EXT_ONEAPI_TESTS "Enable all extension oneAPI tests" OFF) +add_cts_option(SYCL_CTS_ENABLE_EXT_KHR_TESTS + "Enable all extension Khronos tests" OFF) + add_cts_option(SYCL_CTS_ENABLE_EXT_ONEAPI_PROPERTIES_TESTS "Enable extension oneAPI compile-time property list tests" OFF FORCE_ON ${SYCL_CTS_ENABLE_EXT_ONEAPI_TESTS}) @@ -103,9 +106,9 @@ add_cts_option(SYCL_CTS_ENABLE_EXT_ONEAPI_ENQUEUE_FUNCTIONS_TESTS "Enable extension oneAPI enqueue_functions tests" OFF FORCE_ON ${SYCL_CTS_ENABLE_EXT_ONEAPI_TESTS}) -add_cts_option(SYCL_CTS_ENABLE_EXT_ONEAPI_DEFAULT_CONTEXT_TESTS - "Enable extension oneAPI default_context tests" OFF - FORCE_ON ${SYCL_CTS_ENABLE_EXT_ONEAPI_TESTS}) +add_cts_option(SYCL_CTS_ENABLE_EXT_KHR_DEFAULT_CONTEXT_TESTS + "Enable extension Khronos default_context tests" OFF + FORCE_ON ${SYCL_CTS_ENABLE_EXT_KHR_TESTS}) # TODO: Deprecated - remove add_cts_option(SYCL_CTS_ENABLE_VERBOSE_LOG diff --git a/tests/extension/oneapi_default_context/CMakeLists.txt b/tests/extension/khr_default_context/CMakeLists.txt similarity index 61% rename from tests/extension/oneapi_default_context/CMakeLists.txt rename to tests/extension/khr_default_context/CMakeLists.txt index ee005a726..abffcf950 100644 --- a/tests/extension/oneapi_default_context/CMakeLists.txt +++ b/tests/extension/khr_default_context/CMakeLists.txt @@ -1,4 +1,4 @@ -if(SYCL_CTS_ENABLE_EXT_ONEAPI_DEFAULT_CONTEXT_TESTS) +if(SYCL_CTS_ENABLE_EXT_KHR_DEFAULT_CONTEXT_TESTS) file(GLOB test_cases_list *.cpp) add_cts_test(${test_cases_list}) diff --git a/tests/extension/oneapi_default_context/default_context.cpp b/tests/extension/khr_default_context/default_context.cpp similarity index 88% rename from tests/extension/oneapi_default_context/default_context.cpp rename to tests/extension/khr_default_context/default_context.cpp index 7f83f77bb..4aabf2973 100644 --- a/tests/extension/oneapi_default_context/default_context.cpp +++ b/tests/extension/khr_default_context/default_context.cpp @@ -22,11 +22,11 @@ namespace default_context::tests { -#ifdef SYCL_EXT_ONEAPI_DEFAULT_CONTEXT +#ifdef SYCL_KHR_DEFAULT_CONTEXT static void testDefaultContext() { sycl::platform platform{}; - sycl::context defaultContext = platform.ext_oneapi_get_default_context(); + sycl::context defaultContext = platform.khr_get_default_context(); CHECK(defaultContext.get_devices() == platform.get_devices()); } @@ -37,12 +37,12 @@ static void testQueueConstructors() { sycl::device syclDevice; sycl::context syclContext; sycl::context defaultContext = - sycl::platform{}.ext_oneapi_get_default_context(); + sycl::platform{}.khr_get_default_context(); // Check that a default-constructed context is not the default context. CHECK(syclContext != defaultContext); - // Default context constructor + // Default context constructors CHECK(defaultContext == sycl::queue{propList}.get_context()); CHECK(defaultContext == sycl::queue{asyncHandler, propList}.get_context()); CHECK(defaultContext == sycl::queue{deviceSelector, propList}.get_context()); @@ -68,9 +68,9 @@ static void testQueueConstructors() { #endif TEST_CASE("Test case for \"Default Context\" extension", - "[oneapi_default_context") { -#ifndef SYCL_EXT_ONEAPI_DEFAULT_CONTEXT - SKIP("SYCL_EXT_ONEAPI_DEFAULT_CONTEXT is not defined"); + "[khr_default_context") { +#ifndef SYCL_KHR_DEFAULT_CONTEXT + SKIP("SYCL_KHR_DEFAULT_CONTEXT is not defined"); #else testDefaultContext(); testQueueConstructors(); From cfbd17ac08d0083a627da183434464bb3ee7a00a Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Sat, 12 Oct 2024 14:12:16 -0700 Subject: [PATCH 04/13] Run clang-format Signed-off-by: Michael Aziz --- tests/extension/khr_default_context/default_context.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/extension/khr_default_context/default_context.cpp b/tests/extension/khr_default_context/default_context.cpp index 4aabf2973..26e1fff3c 100644 --- a/tests/extension/khr_default_context/default_context.cpp +++ b/tests/extension/khr_default_context/default_context.cpp @@ -36,8 +36,7 @@ static void testQueueConstructors() { const auto& deviceSelector = sycl::default_selector_v; sycl::device syclDevice; sycl::context syclContext; - sycl::context defaultContext = - sycl::platform{}.khr_get_default_context(); + sycl::context defaultContext = sycl::platform{}.khr_get_default_context(); // Check that a default-constructed context is not the default context. CHECK(syclContext != defaultContext); From 7325477beb9d722fbb6862022c9ce43e8148d97b Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Tue, 15 Oct 2024 19:20:05 -0700 Subject: [PATCH 05/13] Remove EXT prefix and fix test tag Signed-off-by: Michael Aziz --- CMakeLists.txt | 6 +++--- tests/extension/khr_default_context/default_context.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f8cf5aeaf..8795bd677 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,7 +43,7 @@ add_cts_option(SYCL_CTS_ENABLE_DEPRECATED_FEATURES_TESTS add_cts_option(SYCL_CTS_ENABLE_EXT_ONEAPI_TESTS "Enable all extension oneAPI tests" OFF) -add_cts_option(SYCL_CTS_ENABLE_EXT_KHR_TESTS +add_cts_option(SYCL_CTS_ENABLE_KHR_TESTS "Enable all extension Khronos tests" OFF) add_cts_option(SYCL_CTS_ENABLE_EXT_ONEAPI_PROPERTIES_TESTS @@ -106,9 +106,9 @@ add_cts_option(SYCL_CTS_ENABLE_EXT_ONEAPI_ENQUEUE_FUNCTIONS_TESTS "Enable extension oneAPI enqueue_functions tests" OFF FORCE_ON ${SYCL_CTS_ENABLE_EXT_ONEAPI_TESTS}) -add_cts_option(SYCL_CTS_ENABLE_EXT_KHR_DEFAULT_CONTEXT_TESTS +add_cts_option(SYCL_CTS_ENABLE_KHR_DEFAULT_CONTEXT_TESTS "Enable extension Khronos default_context tests" OFF - FORCE_ON ${SYCL_CTS_ENABLE_EXT_KHR_TESTS}) + FORCE_ON ${SYCL_CTS_ENABLE_KHR_TESTS}) # TODO: Deprecated - remove add_cts_option(SYCL_CTS_ENABLE_VERBOSE_LOG diff --git a/tests/extension/khr_default_context/default_context.cpp b/tests/extension/khr_default_context/default_context.cpp index 26e1fff3c..a3f7ed221 100644 --- a/tests/extension/khr_default_context/default_context.cpp +++ b/tests/extension/khr_default_context/default_context.cpp @@ -67,7 +67,7 @@ static void testQueueConstructors() { #endif TEST_CASE("Test case for \"Default Context\" extension", - "[khr_default_context") { + "[khr_default_context]") { #ifndef SYCL_KHR_DEFAULT_CONTEXT SKIP("SYCL_KHR_DEFAULT_CONTEXT is not defined"); #else From cffbf6e25c0e359741fd068a46918b598d93675a Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Tue, 15 Oct 2024 19:33:52 -0700 Subject: [PATCH 06/13] Remove EXT prefix Signed-off-by: Michael Aziz --- tests/extension/khr_default_context/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/extension/khr_default_context/CMakeLists.txt b/tests/extension/khr_default_context/CMakeLists.txt index abffcf950..e7d855de5 100644 --- a/tests/extension/khr_default_context/CMakeLists.txt +++ b/tests/extension/khr_default_context/CMakeLists.txt @@ -1,4 +1,4 @@ -if(SYCL_CTS_ENABLE_EXT_KHR_DEFAULT_CONTEXT_TESTS) +if(SYCL_CTS_ENABLE_KHR_DEFAULT_CONTEXT_TESTS) file(GLOB test_cases_list *.cpp) add_cts_test(${test_cases_list}) From 92579dd387ee4ccc8f9ff4ae86fa8808a21eddd8 Mon Sep 17 00:00:00 2001 From: KornevNikita Date: Tue, 15 Oct 2024 07:27:47 -0700 Subject: [PATCH 07/13] Fix accessor using unsuitable property Accessor shouldn't use buffer::property. It looks like there is no real need for `properties` to contain any property, as the test just checks all possible constructors. --- tests/accessor_legacy/accessor_constructors_buffer_utility.h | 3 +-- tests/accessor_legacy/accessor_constructors_image_utility.h | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/accessor_legacy/accessor_constructors_buffer_utility.h b/tests/accessor_legacy/accessor_constructors_buffer_utility.h index 07a440a3d..266d0ad24 100644 --- a/tests/accessor_legacy/accessor_constructors_buffer_utility.h +++ b/tests/accessor_legacy/accessor_constructors_buffer_utility.h @@ -170,8 +170,7 @@ class check_all_accessor_constructors_buffer { using verifier = check_accessor_constructor_buffer; auto context = util::get_cts_object::context(); - property_list properties { - sycl::property::buffer::context_bound(context)}; + property_list properties; { const auto constructorName = usesHander ? diff --git a/tests/accessor_legacy/accessor_constructors_image_utility.h b/tests/accessor_legacy/accessor_constructors_image_utility.h index 377aa92d9..72eb56546 100644 --- a/tests/accessor_legacy/accessor_constructors_image_utility.h +++ b/tests/accessor_legacy/accessor_constructors_image_utility.h @@ -90,8 +90,7 @@ class check_all_accessor_constructors_image { using verifier = check_accessor_constructor_image; auto context = util::get_cts_object::context(); - property_list properties { - sycl::property::buffer::context_bound(context)}; + property_list properties; const auto constructorName = usesHander ? "constructor(image, handler, property_list)" : From 8f39ce41233504c54a05a0752c820597d415866c Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Thu, 17 Oct 2024 05:03:57 -0700 Subject: [PATCH 08/13] Remove `SKIP` macro Signed-off-by: Michael Aziz --- .../khr_default_context/default_context.cpp | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/tests/extension/khr_default_context/default_context.cpp b/tests/extension/khr_default_context/default_context.cpp index a3f7ed221..137333e1a 100644 --- a/tests/extension/khr_default_context/default_context.cpp +++ b/tests/extension/khr_default_context/default_context.cpp @@ -22,15 +22,23 @@ namespace default_context::tests { -#ifdef SYCL_KHR_DEFAULT_CONTEXT +TEST_CASE( + "the default context extension defines the SYCL_KHR_DEFAULT_CONTEXT macro", + "[khr_default_context]") { +#ifndef SYCL_KHR_DEFAULT_CONTEXT + static_assert(false, "SYCL_KHR_DEFAULT_CONTEXT is not defined"); +#endif +} -static void testDefaultContext() { +TEST_CASE("the default context contains all of the default platform's devices", + "[khr_default_context]") { sycl::platform platform{}; sycl::context defaultContext = platform.khr_get_default_context(); CHECK(defaultContext.get_devices() == platform.get_devices()); } -static void testQueueConstructors() { +TEST_CASE("queue constructors use the default context or the context parameter", + "[khr_default_context]") { const sycl::property_list& propList = {}; cts_async_handler asyncHandler; const auto& deviceSelector = sycl::default_selector_v; @@ -64,16 +72,4 @@ static void testQueueConstructors() { .get_context()); } -#endif - -TEST_CASE("Test case for \"Default Context\" extension", - "[khr_default_context]") { -#ifndef SYCL_KHR_DEFAULT_CONTEXT - SKIP("SYCL_KHR_DEFAULT_CONTEXT is not defined"); -#else - testDefaultContext(); - testQueueConstructors(); -#endif -} - } // namespace default_context::tests From c6a9f641ae9c00ea4f2f607bccae261a4f768400 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Fri, 8 Nov 2024 11:47:07 -0800 Subject: [PATCH 09/13] Fix branch name in CI workflow and links to test plans. --- .github/workflows/cts_ci.yml | 2 +- test_plans/kernel.asciidoc | 2 +- test_plans/kernel_bundle.asciidoc | 4 ++-- test_plans/queue_constructors.asciidoc | 2 +- tests/invoke/parallel_for_simplifications.cpp | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/cts_ci.yml b/.github/workflows/cts_ci.yml index d64d0c3ca..72e9e5086 100644 --- a/.github/workflows/cts_ci.yml +++ b/.github/workflows/cts_ci.yml @@ -6,7 +6,7 @@ on: push: branches: # Run on our default base branch to prime ccache for faster CI runs in PRs. - - SYCL-2020 + - main jobs: # Pushing container images requires DockerHub credentials, provided as GitHub secrets. diff --git a/test_plans/kernel.asciidoc b/test_plans/kernel.asciidoc index dacfed202..a145f4a79 100644 --- a/test_plans/kernel.asciidoc +++ b/test_plans/kernel.asciidoc @@ -7,7 +7,7 @@ This is a test plan cover sycl::kernel functionality that is not covered by curr == Testing scope -Tests intend to cover functions from 4.11.13. The kernel class that are not covered in https://github.com/KhronosGroup/SYCL-CTS/tree/SYCL-2020/tests/kernel +Tests intend to cover functions from 4.11.13. The kernel class that are not covered in https://github.com/KhronosGroup/SYCL-CTS/tree/main/tests/kernel === Device coverage diff --git a/test_plans/kernel_bundle.asciidoc b/test_plans/kernel_bundle.asciidoc index b1dd0cb08..1723017b5 100644 --- a/test_plans/kernel_bundle.asciidoc +++ b/test_plans/kernel_bundle.asciidoc @@ -7,7 +7,7 @@ This is a test plan cover kernel_bundle functionality that is not covered by cur == Testing scope -Tests intend to cover functions from 4.11. Kernel bundles that are not covered in https://github.com/KhronosGroup/SYCL-CTS/tree/SYCL-2020/tests/kernel_bundle +Tests intend to cover functions from 4.11. Kernel bundles that are not covered in https://github.com/KhronosGroup/SYCL-CTS/tree/main/tests/kernel_bundle === Device coverage @@ -116,7 +116,7 @@ Check that result of `get_kernel_ids`: === Tests for working with specialization constants -Partially tested in https://github.com/KhronosGroup/SYCL-CTS/blob/SYCL-2020/tests/specialization_constants/specialization_constants_via_kernel_bundle.h +Partially tested in https://github.com/KhronosGroup/SYCL-CTS/blob/main/tests/spec_constants/spec_constants_via_kernel_bundle.h There are two spec constant defined: `SpecName` and `OtherSpecName`. kernel_handler::get_specialization_constant() shouldn't be used in any kernel in the application. diff --git a/test_plans/queue_constructors.asciidoc b/test_plans/queue_constructors.asciidoc index fb51040c9..2da812e7f 100644 --- a/test_plans/queue_constructors.asciidoc +++ b/test_plans/queue_constructors.asciidoc @@ -7,7 +7,7 @@ This is a test plan for the for new constructors of class `sycl::queue` describe == Testing scope -Tests intend to cover not covered constructors from 4.6.5.1. Queue interface that are not covered in https://github.com/KhronosGroup/SYCL-CTS/blob/SYCL-2020/tests/queue/queue_constructors.cpp +Tests intend to cover not covered constructors from 4.6.5.1. Queue interface that are not covered in https://github.com/KhronosGroup/SYCL-CTS/blob/main/tests/queue/queue_constructors.cpp === Device coverage diff --git a/tests/invoke/parallel_for_simplifications.cpp b/tests/invoke/parallel_for_simplifications.cpp index 3d432babb..3d2717e7d 100644 --- a/tests/invoke/parallel_for_simplifications.cpp +++ b/tests/invoke/parallel_for_simplifications.cpp @@ -21,8 +21,8 @@ // parallel_for({N}, some_kernel) // parallel_for({N1, N2}, some_kernel) // parallel_for({N1, N2, N3}, some_kernel) -// Test plan: -https://github.com/KhronosGroup/SYCL-CTS/blob/SYCL-2020/test_plans/parallel_for_simplifications.asciidoc +// +// Test plan: /test_plans/parallel_for_simplifications.asciidoc // *******************************************************************************/ From fc478be92f4caf7e437c7ab9a554f71c74377274 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Tue, 12 Nov 2024 14:59:10 +0800 Subject: [PATCH 10/13] [range_api] Fix bitwise shift rhs out-of-range In kernel test_range_kernels, range[2] is 64 and range_two[2] is 128. It is undefined behavior to use either of them as rhs of bitwise shift operator because the rhs is not smaller than the number of bits in range value type. This PR divides rhs by 4 so that it is smaller than 64. --- tests/range/range_api.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/range/range_api.cpp b/tests/range/range_api.cpp index 8f133ddb9..99aeb9426 100644 --- a/tests/range/range_api.cpp +++ b/tests/range/range_api.cpp @@ -50,6 +50,9 @@ void test_range_kernels( const sycl::range range_two_const(range_two); const sycl::range range_const(range); + sycl::range range_quarter(range / 4); + const sycl::range range_quarter_const(range_quarter); + // friend bool operator==(const T& lhs, const T& rhs) INDEX_EQ_KERNEL_TEST(==, range, range_two); @@ -62,8 +65,8 @@ void test_range_kernels( INDEX_KERNEL_TEST(*, range, range_two_const, result); INDEX_KERNEL_TEST(/, range, range_two_const, result); INDEX_KERNEL_TEST(%, range, range_two_const, result); - INDEX_KERNEL_TEST(<<, range, range_two_const, result); - INDEX_KERNEL_TEST(>>, range, range_two_const, result); + INDEX_KERNEL_TEST(<<, range, range_quarter_const, result); + INDEX_KERNEL_TEST(>>, range, range_quarter_const, result); INDEX_KERNEL_TEST(&, range, range_two_const, result); INDEX_KERNEL_TEST(|, range, range_two_const, result); INDEX_KERNEL_TEST(^, range, range_two_const, result); @@ -81,8 +84,8 @@ void test_range_kernels( DUAL_SIZE_INDEX_KERNEL_TEST(*, range, integer, result); DUAL_SIZE_INDEX_KERNEL_TEST(/, range, integer, result); DUAL_SIZE_INDEX_KERNEL_TEST(%, range, integer, result); - DUAL_SIZE_INDEX_KERNEL_TEST(<<, range, integer, result); - DUAL_SIZE_INDEX_KERNEL_TEST(>>, range, integer, result); + DUAL_SIZE_INDEX_KERNEL_TEST(<<, range_quarter, integer, result); + DUAL_SIZE_INDEX_KERNEL_TEST(>>, range_quarter, integer, result); DUAL_SIZE_INDEX_KERNEL_TEST(&, range, integer, result); DUAL_SIZE_INDEX_KERNEL_TEST(|, range, integer, result); DUAL_SIZE_INDEX_KERNEL_TEST(^, range, integer, result); @@ -99,8 +102,8 @@ void test_range_kernels( INDEX_ASSIGNMENT_TESTS(*=, *, range, range_two, result); INDEX_ASSIGNMENT_TESTS(/=, /, range, range_two, result); INDEX_ASSIGNMENT_TESTS(%=, %, range, range_two, result); - INDEX_ASSIGNMENT_TESTS(<<=, <<, range, range_two, result); - INDEX_ASSIGNMENT_TESTS(>>=, >>, range, range_two, result); + INDEX_ASSIGNMENT_TESTS(<<=, <<, range, range_quarter, result); + INDEX_ASSIGNMENT_TESTS(>>=, >>, range, range_quarter, result); INDEX_ASSIGNMENT_TESTS(&=, &, range, range_two, result); INDEX_ASSIGNMENT_TESTS(|=, |, range, range_two, result); INDEX_ASSIGNMENT_TESTS(^=, ^, range, range_two, result); @@ -111,8 +114,8 @@ void test_range_kernels( INDEX_ASSIGNMENT_INTEGER_TESTS(*=, *, range, integer, result); INDEX_ASSIGNMENT_INTEGER_TESTS(/=, /, range, integer, result); INDEX_ASSIGNMENT_INTEGER_TESTS(%=, %, range, integer, result); - INDEX_ASSIGNMENT_INTEGER_TESTS(<<=, <<, range, integer, result); - INDEX_ASSIGNMENT_INTEGER_TESTS(>>=, >>, range, integer, result); + INDEX_ASSIGNMENT_INTEGER_TESTS(<<=, <<, range_quarter, integer, result); + INDEX_ASSIGNMENT_INTEGER_TESTS(>>=, >>, range_quarter, integer, result); INDEX_ASSIGNMENT_INTEGER_TESTS(&=, &, range, integer, result); INDEX_ASSIGNMENT_INTEGER_TESTS(|=, |, range, integer, result); INDEX_ASSIGNMENT_INTEGER_TESTS(^=, ^, range, integer, result); From 7d11d59dc9edbdba1fc8422897f718786566777d Mon Sep 17 00:00:00 2001 From: Jie Zhang Date: Mon, 14 Oct 2024 15:23:51 +0800 Subject: [PATCH 11/13] Gather linker flags to prevent /link break linking stage --- cmake/FindDPCPP.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/FindDPCPP.cmake b/cmake/FindDPCPP.cmake index 3345c40c3..f9f285d0e 100644 --- a/cmake/FindDPCPP.cmake +++ b/cmake/FindDPCPP.cmake @@ -72,8 +72,8 @@ set_target_properties(DPCPP::Runtime PROPERTIES set(CMAKE_CXX_COMPILER ${DPCPP_CXX_EXECUTABLE}) # Use DPC++ compiler instead of default linker for building SYCL application -set(CMAKE_CXX_LINK_EXECUTABLE "${DPCPP_CXX_EXECUTABLE} \ - -o ") +set(CMAKE_CXX_LINK_EXECUTABLE "${DPCPP_CXX_EXECUTABLE} -o \ + ") function(add_sycl_to_target) set(options) From ae4e294056a8aa0ca568c4387a226c439bd8e6dc Mon Sep 17 00:00:00 2001 From: Wenju He Date: Wed, 13 Nov 2024 13:09:08 +0800 Subject: [PATCH 12/13] add comment in code --- tests/range/range_api.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/range/range_api.cpp b/tests/range/range_api.cpp index 99aeb9426..fc1aa019a 100644 --- a/tests/range/range_api.cpp +++ b/tests/range/range_api.cpp @@ -50,6 +50,7 @@ void test_range_kernels( const sycl::range range_two_const(range_two); const sycl::range range_const(range); + // make sure bitwise shift rhs is smaller than number of bits in size_t. sycl::range range_quarter(range / 4); const sycl::range range_quarter_const(range_quarter); From a6f63f97ffb2272248c3e58a023fc41c405ce308 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Fri, 15 Nov 2024 12:45:20 +0100 Subject: [PATCH 13/13] Bump DPCPP version to 6456fe89646deb8bf30c0eb32827a62ff6e58ffb Signed-off-by: Steffen Larsen --- .github/workflows/cts_ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cts_ci.yml b/.github/workflows/cts_ci.yml index 72e9e5086..8964ae5bf 100644 --- a/.github/workflows/cts_ci.yml +++ b/.github/workflows/cts_ci.yml @@ -56,7 +56,7 @@ jobs: matrix: include: - sycl-impl: dpcpp - version: 20a088e1231c4ac85fd74c0de79c563977d2f38c + version: 6456fe89646deb8bf30c0eb32827a62ff6e58ffb - sycl-impl: hipsycl version: 3d8b1cd steps: @@ -114,7 +114,7 @@ jobs: matrix: include: - sycl-impl: dpcpp - version: 20a088e1231c4ac85fd74c0de79c563977d2f38c + version: 6456fe89646deb8bf30c0eb32827a62ff6e58ffb - sycl-impl: hipsycl version: 3d8b1cd env: