Skip to content

Commit

Permalink
[ROCm] redo hipify of version controlled files (#22449)
Browse files Browse the repository at this point in the history
### Description
Updates the ROCm EP opsets to match the current CUDA EP opsets. Also
enable the test CApiTest.basic_cuda_graph_with_annotation.

Note that some changes are whitespace-only. These changes were made to
improve the comparison of corresponding ROCm and CUDA EP source files
when using a side by side diff tool.

### Motivation and Context
The ROCm EP derives from the CUDA EP. Many source files are shared
between the EPs and "hipified" during the ROCm EP build, however quite a
few files within the ROCm EP are under source control after their
initial hipification. Over time these ROCm EP files get stale relative
to their CUDA EP counterparts. It becomes necessary to re-hipify these
otherwise static files in order to pick up important changes such as
opset differences.
  • Loading branch information
jeffdaily authored Oct 18, 2024
1 parent d2a5ee2 commit 5aabc53
Show file tree
Hide file tree
Showing 16 changed files with 766 additions and 734 deletions.
4 changes: 0 additions & 4 deletions cmake/onnxruntime_rocm_hipify.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,6 @@ set(provider_excluded_files
"cuda_execution_provider_info.h"
"cuda_execution_provider.cc"
"cuda_execution_provider.h"
"cuda_memory_check.cc"
"cuda_memory_check.h"
"cuda_fence.cc"
"cuda_fence.h"
"cuda_kernel.h"
"cuda_pch.cc"
"cuda_pch.h"
Expand Down
6 changes: 5 additions & 1 deletion include/onnxruntime/core/providers/rocm/rocm_resource.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,9 @@
enum RocmResource : int {
hip_stream_t = rocm_resource_offset,
miopen_handle_t,
hipblas_handle_t
hipblas_handle_t,
deferred_cpu_allocator_t,
// below are rocm ep options
device_id_t, // 10004
arena_extend_strategy_t
};
70 changes: 62 additions & 8 deletions onnxruntime/core/providers/rocm/cu_inc/common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
#include <stdint.h>
#include <vector>
#include <mutex>
#include <limits>
#include <assert.h>
#include <math.h>
#include <hip/hip_runtime.h>
#include <hip/hip_fp16.h>
//#include <hip/hip_bf16.h>
#include "core/providers/rocm/rocm_common.h"
#include "core/providers/rocm/shared_inc/rocm_call.h"

Expand Down Expand Up @@ -242,12 +245,63 @@ __device__ __inline__ double _Pow(double a, double b) { return pow(a, b); }
template <>
__device__ __inline__ half _Pow(half a, half b) { return half(powf((float)a, (float)b)); }

#define ISNAN_BFLOAT16(v__) static_cast<uint16_t>(*reinterpret_cast<const uint16_t*>(&v__) & ~BFloat16::kSignMask) \
> BFloat16::kPositiveInfinityBits

// Note that there is no consistent canonical NaN for FP16 and BF16;
// HIP uses 0x7FFF for HIPRT_NAN_BF16, but ONNX Runtime uses 0x7FC1.
// (see BFloat16Impl::kPositiveQNaNBits).
#define NAN_BFLOAT16 BFloat16::FromBits((uint16_t)0x7FFFU)

template <typename T>
__device__ __inline__ T _Min(T a, T b) { return a < b ? a : b; }

template <>
__device__ __inline__ float _Min(float a, float b) {
return (isnan(a) || isnan(b)) ? std::numeric_limits<float>::quiet_NaN() : ( a < b ? a : b );
}

template <>
__device__ __inline__ double _Min(double a, double b) {
return (isnan(a) || isnan(b)) ? std::numeric_limits<double>::quiet_NaN() : ( a < b ? a : b );
}

template <>
__device__ __inline__ half _Min(half a, half b) {
return __hmin_nan(a, b);
}

template <>
__device__ __inline__ BFloat16 _Min(BFloat16 a, BFloat16 b) {
return (ISNAN_BFLOAT16(a) || ISNAN_BFLOAT16(b)) ? NAN_BFLOAT16 : (a < b ? a : b);
}

template <typename T>
__device__ __inline__ T _Max(T a, T b) { return a > b ? a : b; }

template <>
__device__ __inline__ float _Max(float a, float b) {
return (isnan(a) || isnan(b)) ? std::numeric_limits<float>::quiet_NaN() : ( a > b ? a : b );
}

template <>
__device__ __inline__ double _Max(double a, double b) {
return (isnan(a) || isnan(b)) ? std::numeric_limits<double>::quiet_NaN() : ( a > b ? a : b );
}

template <>
__device__ __inline__ half _Max(half a, half b) {
return __hmax_nan(a, b);
}

template <>
__device__ __inline__ BFloat16 _Max(BFloat16 a, BFloat16 b) {
return (ISNAN_BFLOAT16(a) || ISNAN_BFLOAT16(b)) ? NAN_BFLOAT16 : (a > b ? a : b);
}

#undef ISNAN_BFLOAT16
#undef NAN_BFLOAT16

template <typename T>
__device__ __inline__ T _Abs(T a) { return a > (T)0 ? a : -a; }

Expand Down Expand Up @@ -443,44 +497,44 @@ struct _IsNan {
template <>
struct _IsNan<half> {
__device__ __inline__ bool operator()(half a) const {
return static_cast<uint16_t>(*reinterpret_cast<const uint16_t*>(&a) & ~MLFloat16::kSignMask)
> MLFloat16::kPositiveInfinityBits;
return static_cast<uint16_t>(*reinterpret_cast<const uint16_t*>(&a) & ~MLFloat16::kSignMask)
> MLFloat16::kPositiveInfinityBits;
}
};

template <>
struct _IsNan<BFloat16> {
__device__ __inline__ bool operator()(BFloat16 a) const {
return static_cast<uint16_t>(*reinterpret_cast<const uint16_t*>(&a) & ~BFloat16::kSignMask)
> BFloat16::kPositiveInfinityBits;
return static_cast<uint16_t>(*reinterpret_cast<const uint16_t*>(&a) & ~BFloat16::kSignMask)
> BFloat16::kPositiveInfinityBits;
}
};

#if !defined(DISABLE_FLOAT8_TYPES)

template <>
template<>
struct _IsNan<Float8E4M3FN> {
__device__ __inline__ bool operator()(Float8E4M3FN a) const {
return (*reinterpret_cast<const uint8_t*>(&a) & 0x7f) == 0x7f;
}
};

template <>
template<>
struct _IsNan<Float8E4M3FNUZ> {
__device__ __inline__ bool operator()(Float8E4M3FNUZ a) const {
return *reinterpret_cast<const uint8_t*>(&a) == 0x80;
}
};

template <>
template<>
struct _IsNan<Float8E5M2> {
__device__ __inline__ bool operator()(Float8E5M2 a) const {
uint8_t c = *reinterpret_cast<const uint8_t*>(&a);
return ((c & 0x7c) == 0x7c) && ((c & 0x03) != 0x00);
}
};

template <>
template<>
struct _IsNan<Float8E5M2FNUZ> {
__device__ __inline__ bool operator()(Float8E5M2FNUZ a) const {
return *reinterpret_cast<const uint8_t*>(&a) == 0x80;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#include "hip/hip_runtime.h"
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

Expand Down
Loading

0 comments on commit 5aabc53

Please sign in to comment.