From cae70fb1d71f1f4a15427c3c91a6cbe068e7a060 Mon Sep 17 00:00:00 2001 From: Hari Seshadri Date: Mon, 16 Oct 2023 11:41:28 -0700 Subject: [PATCH 1/7] Initial commit --- tools/ci_build/build.py | 2 ++ tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index d6a5761d54042..dd39adb1c6233 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -247,6 +247,7 @@ def convert_arg_line_to_args(self, arg_line): "--cudnn_home is not specified.", ) parser.add_argument("--enable_cuda_line_info", action="store_true", help="Enable CUDA line info.") + parser.add_argument("--enable_cuda_nhwc_ops", action="store_true", help="Enable CUDA NHWC ops in build.") # Python bindings parser.add_argument("--enable_pybind", action="store_true", help="Enable Python Bindings.") @@ -1025,6 +1026,7 @@ def generate_build_tree( "-Donnxruntime_USE_MPI=" + ("ON" if args.use_mpi else "OFF"), "-Donnxruntime_ENABLE_MEMORY_PROFILE=" + ("ON" if args.enable_memory_profile else "OFF"), "-Donnxruntime_ENABLE_CUDA_LINE_NUMBER_INFO=" + ("ON" if args.enable_cuda_line_info else "OFF"), + "-Donnxruntime_USE_CUDA_NHWC_OPSs=" + ("ON" if args.enable_cuda_nhwc_ops else "OFF"), "-Donnxruntime_BUILD_WEBASSEMBLY_STATIC_LIB=" + ("ON" if args.build_wasm_static_lib else "OFF"), "-Donnxruntime_ENABLE_WEBASSEMBLY_EXCEPTION_CATCHING=" + ("OFF" if args.disable_wasm_exception_catching else "ON"), diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml index 981cbec4ef50f..71a580f348f6f 100644 --- a/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml @@ -106,7 +106,7 @@ jobs: --parallel \ --build_wheel \ --enable_onnx_tests --use_cuda --cuda_version=${{variables.common_cuda_version}} --cuda_home=/usr/local/cuda-${{variables.common_cuda_version}} --cudnn_home=/usr/local/cuda-${{variables.common_cuda_version}} \ - --enable_cuda_profiling \ + --enable_cuda_profiling --enable_cuda_nhwc_ops \ --enable_pybind --build_java \ --use_cache \ --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75; \ From 8ac2ebba5d0dbea1792120f0262438975ff6e66f Mon Sep 17 00:00:00 2001 From: Hari Seshadri Date: Mon, 16 Oct 2023 12:24:00 -0700 Subject: [PATCH 2/7] Fix typo --- tools/ci_build/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index dd39adb1c6233..806e536cb4ddb 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1026,7 +1026,7 @@ def generate_build_tree( "-Donnxruntime_USE_MPI=" + ("ON" if args.use_mpi else "OFF"), "-Donnxruntime_ENABLE_MEMORY_PROFILE=" + ("ON" if args.enable_memory_profile else "OFF"), "-Donnxruntime_ENABLE_CUDA_LINE_NUMBER_INFO=" + ("ON" if args.enable_cuda_line_info else "OFF"), - "-Donnxruntime_USE_CUDA_NHWC_OPSs=" + ("ON" if args.enable_cuda_nhwc_ops else "OFF"), + "-Donnxruntime_USE_CUDA_NHWC_OPS=" + ("ON" if args.enable_cuda_nhwc_ops else "OFF"), "-Donnxruntime_BUILD_WEBASSEMBLY_STATIC_LIB=" + ("ON" if args.build_wasm_static_lib else "OFF"), "-Donnxruntime_ENABLE_WEBASSEMBLY_EXCEPTION_CATCHING=" + ("OFF" if args.disable_wasm_exception_catching else "ON"), From 6f1f61d48bc68b391ee68b1b0451f8cd4f552e38 Mon Sep 17 00:00:00 2001 From: Hari Seshadri Date: Mon, 16 Oct 2023 15:09:52 -0700 Subject: [PATCH 3/7] Try fix AMD builds --- cmake/onnxruntime_rocm_hipify.cmake | 2 ++ onnxruntime/core/providers/rocm/miopen_common.cc | 7 ++++++- onnxruntime/core/providers/rocm/miopen_common.h | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/cmake/onnxruntime_rocm_hipify.cmake b/cmake/onnxruntime_rocm_hipify.cmake index af95d0203544c..fe3e577b4fc36 100644 --- a/cmake/onnxruntime_rocm_hipify.cmake +++ b/cmake/onnxruntime_rocm_hipify.cmake @@ -168,6 +168,8 @@ set(provider_excluded_files "gpu_data_transfer.h" "integer_gemm.cc" "tunable/*" + "cuda_nhwc_kernels.cc" + "cuda_nhwc_kernels.h" ) set(training_ops_excluded_files diff --git a/onnxruntime/core/providers/rocm/miopen_common.cc b/onnxruntime/core/providers/rocm/miopen_common.cc index e77bbab2d5750..2e0297614e685 100644 --- a/onnxruntime/core/providers/rocm/miopen_common.cc +++ b/onnxruntime/core/providers/rocm/miopen_common.cc @@ -73,7 +73,12 @@ Status MiopenTensor::CreateTensorIfNeeded() { return Status::OK(); } -Status MiopenTensor::Set(gsl::span input_dims, miopenDataType_t dataType) { +Status MiopenTensor::Set(gsl::span input_dims, miopenDataType_t dataType, bool is_nhwc) { + if (is_nhwc) { + return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED, + "NHWC Tensor usage is not supported in AMD builds for now"); + } + ORT_RETURN_IF_ERROR(CreateTensorIfNeeded()); int rank = gsl::narrow_cast(input_dims.size()); diff --git a/onnxruntime/core/providers/rocm/miopen_common.h b/onnxruntime/core/providers/rocm/miopen_common.h index 7d9cabb0951c6..a88c6f8b7ec90 100644 --- a/onnxruntime/core/providers/rocm/miopen_common.h +++ b/onnxruntime/core/providers/rocm/miopen_common.h @@ -33,7 +33,7 @@ class MiopenTensor final { ~MiopenTensor(); ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(MiopenTensor); - Status Set(gsl::span input_dims, miopenDataType_t dataType); + Status Set(gsl::span input_dims, miopenDataType_t dataType, bool is_nhwc=false); Status Set(miopenDataType_t dataType, miopenTensorLayout_t tensor_layout, int n, int c, int h, int w); Status Set(const MiopenTensor& x_desc, miopenBatchNormMode_t mode); From a21ea6758c60fc0aa891ac8782cc014d81c37241 Mon Sep 17 00:00:00 2001 From: Hari Seshadri Date: Mon, 16 Oct 2023 16:00:07 -0700 Subject: [PATCH 4/7] Try fix AMD builds 2 --- onnxruntime/core/providers/rocm/nn/conv_transpose.cc | 12 ++++++------ onnxruntime/core/providers/rocm/nn/conv_transpose.h | 6 ++++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/onnxruntime/core/providers/rocm/nn/conv_transpose.cc b/onnxruntime/core/providers/rocm/nn/conv_transpose.cc index 475d26d2e306d..7447113fdf847 100644 --- a/onnxruntime/core/providers/rocm/nn/conv_transpose.cc +++ b/onnxruntime/core/providers/rocm/nn/conv_transpose.cc @@ -16,7 +16,7 @@ namespace rocm { T, \ kRocmExecutionProvider, \ (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType()), \ - ConvTranspose); \ + ConvTranspose); \ ONNX_OPERATOR_TYPED_KERNEL_EX( \ ConvTranspose, \ kOnnxDomain, \ @@ -24,20 +24,20 @@ namespace rocm { T, \ kRocmExecutionProvider, \ (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType()), \ - ConvTranspose); + ConvTranspose); REGISTER_KERNEL_TYPED(float) // not yet supported in MIOpen // REGISTER_KERNEL_TYPED(double) REGISTER_KERNEL_TYPED(MLFloat16) -template -Status ConvTranspose::ComputeInternal(OpKernelContext* context) const { +template +Status ConvTranspose::ComputeInternal(OpKernelContext* context) const { return DoConvTranspose(context, false); } -template -Status ConvTranspose::DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const { +template +Status ConvTranspose::DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const { typedef typename ToHipType::MappedType HipT; const Tensor* X = context->Input(0); diff --git a/onnxruntime/core/providers/rocm/nn/conv_transpose.h b/onnxruntime/core/providers/rocm/nn/conv_transpose.h index b4b80aeec9421..8df67bdc40b72 100644 --- a/onnxruntime/core/providers/rocm/nn/conv_transpose.h +++ b/onnxruntime/core/providers/rocm/nn/conv_transpose.h @@ -12,10 +12,12 @@ namespace onnxruntime { namespace rocm { -template +template class ConvTranspose : public RocmKernel { public: - ConvTranspose(const OpKernelInfo& info) : RocmKernel(info), conv_transpose_attrs_(info){}; + ConvTranspose(const OpKernelInfo& info) : RocmKernel(info), conv_transpose_attrs_(info){ + static_assert(!NHWC, "AMD builds don't support usage of NHWC ops"); + }; Status ComputeInternal(OpKernelContext* context) const override; Status DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const; From 51635984ef62a010a3a6f823e91e2c5e3c0299f5 Mon Sep 17 00:00:00 2001 From: Hari Seshadri Date: Mon, 16 Oct 2023 16:26:09 -0700 Subject: [PATCH 5/7] Fix Lint failure --- onnxruntime/core/providers/rocm/nn/conv_transpose.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/rocm/nn/conv_transpose.h b/onnxruntime/core/providers/rocm/nn/conv_transpose.h index 8df67bdc40b72..091c627710b36 100644 --- a/onnxruntime/core/providers/rocm/nn/conv_transpose.h +++ b/onnxruntime/core/providers/rocm/nn/conv_transpose.h @@ -15,7 +15,7 @@ namespace rocm { template class ConvTranspose : public RocmKernel { public: - ConvTranspose(const OpKernelInfo& info) : RocmKernel(info), conv_transpose_attrs_(info){ + ConvTranspose(const OpKernelInfo& info) : RocmKernel(info), conv_transpose_attrs_(info) { static_assert(!NHWC, "AMD builds don't support usage of NHWC ops"); }; Status ComputeInternal(OpKernelContext* context) const override; From bbb805d9e5e7e4813873403bfdc8dbc7ba6e6267 Mon Sep 17 00:00:00 2001 From: Hari Seshadri Date: Mon, 16 Oct 2023 16:35:43 -0700 Subject: [PATCH 6/7] Lint --- onnxruntime/core/providers/rocm/miopen_common.cc | 2 +- onnxruntime/core/providers/rocm/miopen_common.h | 2 +- onnxruntime/core/providers/rocm/nn/conv_transpose.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/onnxruntime/core/providers/rocm/miopen_common.cc b/onnxruntime/core/providers/rocm/miopen_common.cc index 2e0297614e685..675d225c2672f 100644 --- a/onnxruntime/core/providers/rocm/miopen_common.cc +++ b/onnxruntime/core/providers/rocm/miopen_common.cc @@ -78,7 +78,7 @@ Status MiopenTensor::Set(gsl::span input_dims, miopenDataType_t d return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED, "NHWC Tensor usage is not supported in AMD builds for now"); } - + ORT_RETURN_IF_ERROR(CreateTensorIfNeeded()); int rank = gsl::narrow_cast(input_dims.size()); diff --git a/onnxruntime/core/providers/rocm/miopen_common.h b/onnxruntime/core/providers/rocm/miopen_common.h index a88c6f8b7ec90..eb4eb745b3692 100644 --- a/onnxruntime/core/providers/rocm/miopen_common.h +++ b/onnxruntime/core/providers/rocm/miopen_common.h @@ -33,7 +33,7 @@ class MiopenTensor final { ~MiopenTensor(); ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(MiopenTensor); - Status Set(gsl::span input_dims, miopenDataType_t dataType, bool is_nhwc=false); + Status Set(gsl::span input_dims, miopenDataType_t dataType, bool is_nhwc = false); Status Set(miopenDataType_t dataType, miopenTensorLayout_t tensor_layout, int n, int c, int h, int w); Status Set(const MiopenTensor& x_desc, miopenBatchNormMode_t mode); diff --git a/onnxruntime/core/providers/rocm/nn/conv_transpose.h b/onnxruntime/core/providers/rocm/nn/conv_transpose.h index 091c627710b36..55a84cc59fe92 100644 --- a/onnxruntime/core/providers/rocm/nn/conv_transpose.h +++ b/onnxruntime/core/providers/rocm/nn/conv_transpose.h @@ -16,7 +16,7 @@ template class ConvTranspose : public RocmKernel { public: ConvTranspose(const OpKernelInfo& info) : RocmKernel(info), conv_transpose_attrs_(info) { - static_assert(!NHWC, "AMD builds don't support usage of NHWC ops"); + static_assert(!NHWC, "AMD builds don't support usage of NHWC ops"); }; Status ComputeInternal(OpKernelContext* context) const override; Status DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const; From 1efe9b8ab844f432a0c80c69ed5f6df62c72ee16 Mon Sep 17 00:00:00 2001 From: Hari Seshadri Date: Mon, 16 Oct 2023 16:44:46 -0700 Subject: [PATCH 7/7] Lint --- onnxruntime/core/providers/rocm/miopen_common.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/rocm/miopen_common.cc b/onnxruntime/core/providers/rocm/miopen_common.cc index 675d225c2672f..6b01f02ae49b5 100644 --- a/onnxruntime/core/providers/rocm/miopen_common.cc +++ b/onnxruntime/core/providers/rocm/miopen_common.cc @@ -75,7 +75,7 @@ Status MiopenTensor::CreateTensorIfNeeded() { Status MiopenTensor::Set(gsl::span input_dims, miopenDataType_t dataType, bool is_nhwc) { if (is_nhwc) { - return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED, + return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED, "NHWC Tensor usage is not supported in AMD builds for now"); }