diff --git a/include/common/core/common.hpp b/include/common/core/common.hpp index 7f8358f85..f7a5534f2 100644 --- a/include/common/core/common.hpp +++ b/include/common/core/common.hpp @@ -228,8 +228,13 @@ enum class atomic_op : uint8_t { /// xetla dpas argument typ enum class argument_type : uint8_t { Invalid = 0, +#if __INTEL_LLVM_COMPILER >= 20240200 U1 __SYCL_DEPRECATED("u1 is reserved/unsupported") = 1, // unsigned 1 bit S1 __SYCL_DEPRECATED("s1 is reserved/unsupported") = 2, // signed 1 bit +#else + U1 = 1, // unsigned 1 bit + S1 = 2, // signed 1 bit +#endif U2 = 3, // unsigned 2 bits S2 = 4, // signed 2 bits U4 = 5, // unsigned 4 bits diff --git a/include/common/core/memory.hpp b/include/common/core/memory.hpp index 1f8beb439..ba6ebb09b 100644 --- a/include/common/core/memory.hpp +++ b/include/common/core/memory.hpp @@ -303,10 +303,21 @@ __XETLA_API void xetla_prefetch_global( T* p, xetla_vector byte_offsets, xetla_mask mask = 1) { +#if __INTEL_LLVM_COMPILER >= 20240200 __ESIMD_NS::properties props{ __ESIMD_NS::cache_hint_L1, __ESIMD_NS::cache_hint_L2}; __ESIMD_NS::prefetch(p, byte_offsets, mask, props); +#else + constexpr data_size DS = data_size::default_size; + __ESIMD_ENS::lsc_prefetch< + T, + VS, + gpu::xetla::detail::get_data_size(DS), + gpu::xetla::detail::get_cache_hint(L1H), + gpu::xetla::detail::get_cache_hint(L2H), + N / VS>(p, byte_offsets, mask); +#endif } /// template -__XETLA_API void xetla_prefetch_global(T* p, uint64_t offset = 0) { +__XETLA_API void xetla_prefetch_global(T* p, uint64_t byte_offset = 0) { +#if __INTEL_LLVM_COMPILER >= 20240200 __ESIMD_NS::properties props{ __ESIMD_NS::cache_hint_L1, __ESIMD_NS::cache_hint_L2}; - __ESIMD_NS::prefetch(p, offset, props); + __ESIMD_NS::prefetch(p, byte_offset, props); +#else + constexpr data_size DS = data_size::default_size; + __ESIMD_ENS::lsc_prefetch< + T, + VS, + gpu::xetla::detail::get_data_size(DS), + gpu::xetla::detail::get_cache_hint(L1H), + gpu::xetla::detail::get_cache_hint(L2H), + 1>(p, (byte_offset / sizeof(T))); +#endif } /// simd block_load(const T* ptr, size_t byte_offset, @@ -523,12 +545,22 @@ __XETLA_API xetla_vector xetla_load_global( T* p, xetla_vector byte_offsets, xetla_mask mask = 1) { +#if __INTEL_LLVM_COMPILER >= 20240200 __ESIMD_NS::properties props{ __ESIMD_NS::cache_hint_L1, __ESIMD_NS::cache_hint_L2, __ESIMD_NS::alignment}; - return __ESIMD_NS::gather(p, byte_offsets, mask, props); +#else + constexpr data_size DS = data_size::default_size; + return __ESIMD_ENS::lsc_gather< + T, + VS, + gpu::xetla::detail::get_data_size(DS), + gpu::xetla::detail::get_cache_hint(L1H), + gpu::xetla::detail::get_cache_hint(L2H), + N / VS>(p, byte_offsets, mask); +#endif } /// template byte_offsets, xetla_vector vals, xetla_mask mask = 1) { +#if __INTEL_LLVM_COMPILER >= 20240200 __ESIMD_NS::properties props{ __ESIMD_NS::cache_hint_L1, __ESIMD_NS::cache_hint_L2, __ESIMD_NS::alignment}; __ESIMD_NS::scatter(p, byte_offsets, vals, mask, props); +#else + constexpr data_size DS = data_size::default_size; + __ESIMD_ENS::lsc_scatter< + T, + VS, + gpu::xetla::detail::get_data_size(DS), + gpu::xetla::detail::get_cache_hint(L1H), + gpu::xetla::detail::get_cache_hint(L2H), + N / VS>((T*)p, byte_offsets, vals, mask); +#endif } /// void block_store(T* ptr, size_t byte_offset, // (usm-bs-2)