Skip to content

Commit

Permalink
Fix AMD builds and enable testing NHWC CUDA ops in one GPU CI (#17972)
Browse files Browse the repository at this point in the history
### Description
This PR:

(1) Fixes AMD builds after #17200 broke them (Need to remember to run
AMD builds while trying to merge external CUDA PRs next time)

(2) Turn on the NHWC CUDA feature in the Linux GPU CI. The extra time
spent in building a few more files and running a few more tests will not
be much.

Test Linux GPU CI run :
https://dev.azure.com/onnxruntime/onnxruntime/_build/results?buildId=1170770

### Motivation and Context
Keep the NHWC CUDA ops tested
(#17200) and guard against
regressions
  • Loading branch information
hariharans29 authored Oct 17, 2023
1 parent 6832b68 commit 9356986
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 11 deletions.
2 changes: 2 additions & 0 deletions cmake/onnxruntime_rocm_hipify.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ set(provider_excluded_files
"gpu_data_transfer.h"
"integer_gemm.cc"
"tunable/*"
"cuda_nhwc_kernels.cc"
"cuda_nhwc_kernels.h"
)

set(training_ops_excluded_files
Expand Down
7 changes: 6 additions & 1 deletion onnxruntime/core/providers/rocm/miopen_common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,12 @@ Status MiopenTensor::CreateTensorIfNeeded() {
return Status::OK();
}

Status MiopenTensor::Set(gsl::span<const int64_t> input_dims, miopenDataType_t dataType) {
Status MiopenTensor::Set(gsl::span<const int64_t> input_dims, miopenDataType_t dataType, bool is_nhwc) {
if (is_nhwc) {
return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED,
"NHWC Tensor usage is not supported in AMD builds for now");
}

ORT_RETURN_IF_ERROR(CreateTensorIfNeeded());

int rank = gsl::narrow_cast<int>(input_dims.size());
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/rocm/miopen_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class MiopenTensor final {
~MiopenTensor();
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(MiopenTensor);

Status Set(gsl::span<const int64_t> input_dims, miopenDataType_t dataType);
Status Set(gsl::span<const int64_t> input_dims, miopenDataType_t dataType, bool is_nhwc = false);
Status Set(miopenDataType_t dataType, miopenTensorLayout_t tensor_layout, int n, int c, int h, int w);
Status Set(const MiopenTensor& x_desc, miopenBatchNormMode_t mode);

Expand Down
12 changes: 6 additions & 6 deletions onnxruntime/core/providers/rocm/nn/conv_transpose.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,28 @@ namespace rocm {
T, \
kRocmExecutionProvider, \
(*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
ConvTranspose<T>); \
ConvTranspose<T, false>); \
ONNX_OPERATOR_TYPED_KERNEL_EX( \
ConvTranspose, \
kOnnxDomain, \
11, \
T, \
kRocmExecutionProvider, \
(*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
ConvTranspose<T>);
ConvTranspose<T, false>);

REGISTER_KERNEL_TYPED(float)
// not yet supported in MIOpen
// REGISTER_KERNEL_TYPED(double)
REGISTER_KERNEL_TYPED(MLFloat16)

template <typename T>
Status ConvTranspose<T>::ComputeInternal(OpKernelContext* context) const {
template <typename T, bool NHWC>
Status ConvTranspose<T, NHWC>::ComputeInternal(OpKernelContext* context) const {
return DoConvTranspose(context, false);
}

template <typename T>
Status ConvTranspose<T>::DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const {
template <typename T, bool NHWC>
Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const {
typedef typename ToHipType<T>::MappedType HipT;

const Tensor* X = context->Input<Tensor>(0);
Expand Down
6 changes: 4 additions & 2 deletions onnxruntime/core/providers/rocm/nn/conv_transpose.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
namespace onnxruntime {
namespace rocm {

template <typename T>
template <typename T, bool NHWC>
class ConvTranspose : public RocmKernel {
public:
ConvTranspose(const OpKernelInfo& info) : RocmKernel(info), conv_transpose_attrs_(info){};
ConvTranspose(const OpKernelInfo& info) : RocmKernel(info), conv_transpose_attrs_(info) {
static_assert(!NHWC, "AMD builds don't support usage of NHWC ops");
};
Status ComputeInternal(OpKernelContext* context) const override;
Status DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const;

Expand Down
2 changes: 2 additions & 0 deletions tools/ci_build/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ def convert_arg_line_to_args(self, arg_line):
"--cudnn_home is not specified.",
)
parser.add_argument("--enable_cuda_line_info", action="store_true", help="Enable CUDA line info.")
parser.add_argument("--enable_cuda_nhwc_ops", action="store_true", help="Enable CUDA NHWC ops in build.")

# Python bindings
parser.add_argument("--enable_pybind", action="store_true", help="Enable Python Bindings.")
Expand Down Expand Up @@ -1025,6 +1026,7 @@ def generate_build_tree(
"-Donnxruntime_USE_MPI=" + ("ON" if args.use_mpi else "OFF"),
"-Donnxruntime_ENABLE_MEMORY_PROFILE=" + ("ON" if args.enable_memory_profile else "OFF"),
"-Donnxruntime_ENABLE_CUDA_LINE_NUMBER_INFO=" + ("ON" if args.enable_cuda_line_info else "OFF"),
"-Donnxruntime_USE_CUDA_NHWC_OPS=" + ("ON" if args.enable_cuda_nhwc_ops else "OFF"),
"-Donnxruntime_BUILD_WEBASSEMBLY_STATIC_LIB=" + ("ON" if args.build_wasm_static_lib else "OFF"),
"-Donnxruntime_ENABLE_WEBASSEMBLY_EXCEPTION_CATCHING="
+ ("OFF" if args.disable_wasm_exception_catching else "ON"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ jobs:
--parallel \
--build_wheel \
--enable_onnx_tests --use_cuda --cuda_version=${{variables.common_cuda_version}} --cuda_home=/usr/local/cuda-${{variables.common_cuda_version}} --cudnn_home=/usr/local/cuda-${{variables.common_cuda_version}} \
--enable_cuda_profiling \
--enable_cuda_profiling --enable_cuda_nhwc_ops \
--enable_pybind --build_java \
--use_cache \
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75; \
Expand Down

0 comments on commit 9356986

Please sign in to comment.