Skip to content

Commit

Permalink
Merge branch 'main' into steffen/increase_fragment_size
Browse files Browse the repository at this point in the history
  • Loading branch information
steffenlarsen authored Nov 18, 2024
2 parents f91640a + d5adc54 commit ab795cd
Show file tree
Hide file tree
Showing 15 changed files with 493 additions and 339 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/cts_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ on:
push:
branches:
# Run on our default base branch to prime ccache for faster CI runs in PRs.
- SYCL-2020
- main

jobs:
# Pushing container images requires DockerHub credentials, provided as GitHub secrets.
Expand Down Expand Up @@ -56,7 +56,7 @@ jobs:
matrix:
include:
- sycl-impl: dpcpp
version: 20a088e1231c4ac85fd74c0de79c563977d2f38c
version: 6456fe89646deb8bf30c0eb32827a62ff6e58ffb
- sycl-impl: hipsycl
version: 3d8b1cd
steps:
Expand Down Expand Up @@ -114,7 +114,7 @@ jobs:
matrix:
include:
- sycl-impl: dpcpp
version: 20a088e1231c4ac85fd74c0de79c563977d2f38c
version: 6456fe89646deb8bf30c0eb32827a62ff6e58ffb
- sycl-impl: hipsycl
version: 3d8b1cd
env:
Expand Down
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ add_cts_option(SYCL_CTS_ENABLE_DEPRECATED_FEATURES_TESTS
add_cts_option(SYCL_CTS_ENABLE_EXT_ONEAPI_TESTS
"Enable all extension oneAPI tests" OFF)

add_cts_option(SYCL_CTS_ENABLE_KHR_TESTS
"Enable all extension Khronos tests" OFF)

add_cts_option(SYCL_CTS_ENABLE_EXT_ONEAPI_PROPERTIES_TESTS
"Enable extension oneAPI compile-time property list tests" OFF
FORCE_ON ${SYCL_CTS_ENABLE_EXT_ONEAPI_TESTS})
Expand Down Expand Up @@ -103,6 +106,10 @@ add_cts_option(SYCL_CTS_ENABLE_EXT_ONEAPI_ENQUEUE_FUNCTIONS_TESTS
"Enable extension oneAPI enqueue_functions tests" OFF
FORCE_ON ${SYCL_CTS_ENABLE_EXT_ONEAPI_TESTS})

add_cts_option(SYCL_CTS_ENABLE_KHR_DEFAULT_CONTEXT_TESTS
"Enable extension Khronos default_context tests" OFF
FORCE_ON ${SYCL_CTS_ENABLE_KHR_TESTS})

# TODO: Deprecated - remove
add_cts_option(SYCL_CTS_ENABLE_VERBOSE_LOG
"Enable debug-level logs (deprecated)" OFF)
Expand Down
4 changes: 2 additions & 2 deletions cmake/FindDPCPP.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ set_target_properties(DPCPP::Runtime PROPERTIES

set(CMAKE_CXX_COMPILER ${DPCPP_CXX_EXECUTABLE})
# Use DPC++ compiler instead of default linker for building SYCL application
set(CMAKE_CXX_LINK_EXECUTABLE "${DPCPP_CXX_EXECUTABLE} <FLAGS> \
<CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
set(CMAKE_CXX_LINK_EXECUTABLE "${DPCPP_CXX_EXECUTABLE} <FLAGS> <OBJECTS> -o <TARGET> \
<CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <LINK_LIBRARIES>")

function(add_sycl_to_target)
set(options)
Expand Down
172 changes: 111 additions & 61 deletions oclmath/fpcontrol.h
Original file line number Diff line number Diff line change
@@ -1,75 +1,125 @@

//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _fpcontrol_h
#define _fpcontrol_h

// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
// to FTZ mode if the device hardware is running in that mode. We have explored all other options short of writing correctly rounded operations
// in integer code, and have found this is the only way to correctly verify operation.
#include <cstdint>

// In order to get tests for correctly rounded operations (e.g. multiply) to
// work properly we need to be able to set the reference hardware to FTZ mode if
// the device hardware is running in that mode. We have explored all other
// options short of writing correctly rounded operations in integer code, and
// have found this is the only way to correctly verify operation.
//
// Non-Apple implementations will need to provide their own implentation for these features. If the reference hardware and device are both
// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty. If the device is running in non-default
// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
typedef int FPU_mode_type;
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ )
#include <xmmintrin.h>
#elif defined( __PPC__ )
#include <fpu_control.h>
extern __thread fpu_control_t fpu_control;
#endif
// Set the reference hardware floating point unit to FTZ mode
static inline void ForceFTZ( FPU_mode_type *mode )
{
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr( *mode | 0x8040);
#elif defined( __PPC__ )
*mode = fpu_control;
fpu_control |= _FPU_MASK_NI;
#elif defined ( __arm__ )
unsigned fpscr;
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
// Non-Apple implementations will need to provide their own implentation for
// these features. If the reference hardware and device are both running in the
// same state (either FTZ or IEEE compliant modes) then these functions may be
// empty. If the device is running in non-default rounding mode (e.g. round
// toward zero), then these functions should also set the reference device into
// that rounding mode.
#if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__) || \
defined(__MINGW32__)
#ifdef _MSC_VER
typedef int FPU_mode_type;
#else
#error ForceFTZ needs an implentation
typedef int64_t FPU_mode_type;
#endif
}

// Disable the denorm flush to zero
static inline void DisableFTZ( FPU_mode_type *mode )
{
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr( *mode & ~0x8040);
#elif defined( __PPC__ )
*mode = fpu_control;
fpu_control &= ~_FPU_MASK_NI;
#elif defined ( __arm__ )
unsigned fpscr;
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) || \
defined(__MINGW32__)
#include <xmmintrin.h>
#elif defined(__PPC__)
#include <fpu_control.h>
extern __thread fpu_control_t fpu_control;
#elif defined(__mips__)
#include "mips/m32c1.h"
#endif
// Set the reference hardware floating point unit to FTZ mode
inline void ForceFTZ(FPU_mode_type* mode) {
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) || \
defined(__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr(*mode | 0x8040);
#elif defined(__PPC__)
*mode = fpu_control;
fpu_control |= _FPU_MASK_NI;
#elif defined(__arm__)
unsigned fpscr;
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
// Add 64 bit support
#elif defined(__aarch64__)
uint64_t fpscr;
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
#elif defined(__mips__)
fpa_bissr(FPA_CSR_FS);
#else
#error ForceFTZ needs an implentation
#endif
}

// Disable the denorm flush to zero
inline void DisableFTZ(FPU_mode_type* mode) {
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) || \
defined(__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr(*mode & ~0x8040);
#elif defined(__PPC__)
*mode = fpu_control;
fpu_control &= ~_FPU_MASK_NI;
#elif defined(__arm__)
unsigned fpscr;
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
// Add 64 bit support
#elif defined(__aarch64__)
uint64_t fpscr;
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
#elif defined(__mips__)
fpa_bicsr(FPA_CSR_FS);
#else
#error DisableFTZ needs an implentation
#endif
}
#endif
}

// Restore the reference hardware to floating point state indicated by *mode
static inline void RestoreFPState( FPU_mode_type *mode )
{
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
_mm_setcsr( *mode );
#elif defined( __PPC__)
fpu_control = *mode;
#elif defined (__arm__)
__asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
// Restore the reference hardware to floating point state indicated by *mode
inline void RestoreFPState(FPU_mode_type* mode) {
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) || \
defined(__MINGW32__)
_mm_setcsr(*mode);
#elif defined(__PPC__)
fpu_control = *mode;
#elif defined(__arm__)
__asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
// Add 64 bit support
#elif defined(__aarch64__)
__asm__ volatile("msr fpcr, %0" ::"r"(*mode));
#elif defined(__mips__)
// Mips runs by default with DAZ=1 FTZ=1
#else
#error RestoreFPState needs an implementation
#error RestoreFPState needs an implementation
#endif
}
}
#else
#error ForceFTZ and RestoreFPState need implentations
#error ForceFTZ and RestoreFPState need implentations
#endif

#endif
#endif
Loading

0 comments on commit ab795cd

Please sign in to comment.