Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix AMD builds and enable testing NHWC CUDA ops in one GPU CI #17972

Merged
merged 7 commits into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmake/onnxruntime_rocm_hipify.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ set(provider_excluded_files
"gpu_data_transfer.h"
"integer_gemm.cc"
"tunable/*"
"cuda_nhwc_kernels.cc"
"cuda_nhwc_kernels.h"
)

set(training_ops_excluded_files
Expand Down
7 changes: 6 additions & 1 deletion onnxruntime/core/providers/rocm/miopen_common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,12 @@ Status MiopenTensor::CreateTensorIfNeeded() {
return Status::OK();
}

Status MiopenTensor::Set(gsl::span<const int64_t> input_dims, miopenDataType_t dataType) {
Status MiopenTensor::Set(gsl::span<const int64_t> input_dims, miopenDataType_t dataType, bool is_nhwc) {
if (is_nhwc) {
return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED,
"NHWC Tensor usage is not supported in AMD builds for now");
}

ORT_RETURN_IF_ERROR(CreateTensorIfNeeded());

int rank = gsl::narrow_cast<int>(input_dims.size());
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/rocm/miopen_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class MiopenTensor final {
~MiopenTensor();
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(MiopenTensor);

Status Set(gsl::span<const int64_t> input_dims, miopenDataType_t dataType);
Status Set(gsl::span<const int64_t> input_dims, miopenDataType_t dataType, bool is_nhwc = false);
Status Set(miopenDataType_t dataType, miopenTensorLayout_t tensor_layout, int n, int c, int h, int w);
Status Set(const MiopenTensor& x_desc, miopenBatchNormMode_t mode);

Expand Down
12 changes: 6 additions & 6 deletions onnxruntime/core/providers/rocm/nn/conv_transpose.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,28 @@ namespace rocm {
T, \
kRocmExecutionProvider, \
(*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
ConvTranspose<T>); \
ConvTranspose<T, false>); \
ONNX_OPERATOR_TYPED_KERNEL_EX( \
ConvTranspose, \
kOnnxDomain, \
11, \
T, \
kRocmExecutionProvider, \
(*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
ConvTranspose<T>);
ConvTranspose<T, false>);

REGISTER_KERNEL_TYPED(float)
// not yet supported in MIOpen
// REGISTER_KERNEL_TYPED(double)
REGISTER_KERNEL_TYPED(MLFloat16)

template <typename T>
Status ConvTranspose<T>::ComputeInternal(OpKernelContext* context) const {
template <typename T, bool NHWC>
Status ConvTranspose<T, NHWC>::ComputeInternal(OpKernelContext* context) const {
return DoConvTranspose(context, false);
}

template <typename T>
Status ConvTranspose<T>::DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const {
template <typename T, bool NHWC>
Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const {
typedef typename ToHipType<T>::MappedType HipT;

const Tensor* X = context->Input<Tensor>(0);
Expand Down
6 changes: 4 additions & 2 deletions onnxruntime/core/providers/rocm/nn/conv_transpose.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
namespace onnxruntime {
namespace rocm {

template <typename T>
template <typename T, bool NHWC>
class ConvTranspose : public RocmKernel {
public:
ConvTranspose(const OpKernelInfo& info) : RocmKernel(info), conv_transpose_attrs_(info){};
ConvTranspose(const OpKernelInfo& info) : RocmKernel(info), conv_transpose_attrs_(info) {

Check warning on line 18 in onnxruntime/core/providers/rocm/nn/conv_transpose.h

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/rocm/nn/conv_transpose.h#L18

Single-parameter constructors should be marked explicit. [runtime/explicit] [5]
Raw output
onnxruntime/core/providers/rocm/nn/conv_transpose.h:18:  Single-parameter constructors should be marked explicit.  [runtime/explicit] [5]
static_assert(!NHWC, "AMD builds don't support usage of NHWC ops");
};

Check warning on line 20 in onnxruntime/core/providers/rocm/nn/conv_transpose.h

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/rocm/nn/conv_transpose.h#L20

You don't need a ; after a } [readability/braces] [4]
Raw output
onnxruntime/core/providers/rocm/nn/conv_transpose.h:20:  You don't need a ; after a }  [readability/braces] [4]
Status ComputeInternal(OpKernelContext* context) const override;
Status DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const;

Expand Down
2 changes: 2 additions & 0 deletions tools/ci_build/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ def convert_arg_line_to_args(self, arg_line):
"--cudnn_home is not specified.",
)
parser.add_argument("--enable_cuda_line_info", action="store_true", help="Enable CUDA line info.")
parser.add_argument("--enable_cuda_nhwc_ops", action="store_true", help="Enable CUDA NHWC ops in build.")

# Python bindings
parser.add_argument("--enable_pybind", action="store_true", help="Enable Python Bindings.")
Expand Down Expand Up @@ -1025,6 +1026,7 @@ def generate_build_tree(
"-Donnxruntime_USE_MPI=" + ("ON" if args.use_mpi else "OFF"),
"-Donnxruntime_ENABLE_MEMORY_PROFILE=" + ("ON" if args.enable_memory_profile else "OFF"),
"-Donnxruntime_ENABLE_CUDA_LINE_NUMBER_INFO=" + ("ON" if args.enable_cuda_line_info else "OFF"),
"-Donnxruntime_USE_CUDA_NHWC_OPS=" + ("ON" if args.enable_cuda_nhwc_ops else "OFF"),
"-Donnxruntime_BUILD_WEBASSEMBLY_STATIC_LIB=" + ("ON" if args.build_wasm_static_lib else "OFF"),
"-Donnxruntime_ENABLE_WEBASSEMBLY_EXCEPTION_CATCHING="
+ ("OFF" if args.disable_wasm_exception_catching else "ON"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ jobs:
--parallel \
--build_wheel \
--enable_onnx_tests --use_cuda --cuda_version=${{variables.common_cuda_version}} --cuda_home=/usr/local/cuda-${{variables.common_cuda_version}} --cudnn_home=/usr/local/cuda-${{variables.common_cuda_version}} \
--enable_cuda_profiling \
--enable_cuda_profiling --enable_cuda_nhwc_ops \
--enable_pybind --build_java \
--use_cache \
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75; \
Expand Down
Loading