Skip to content

Commit

Permalink
cpplint changes
Browse files Browse the repository at this point in the history
  • Loading branch information
gedoensmax committed Oct 12, 2023
1 parent b2402a9 commit d0daf4e
Show file tree
Hide file tree
Showing 12 changed files with 67 additions and 98 deletions.
12 changes: 7 additions & 5 deletions onnxruntime/core/providers/cpu/nn/instance_norm_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
#include "core/framework/tensor.h"
#endif
#include <sstream>
#include <utility>

namespace onnxruntime {

class InstanceNormHelper {
public:
static common::Status ValidateInputs(const Tensor* input, const Tensor* scale, const Tensor* B, bool is_nhwc = false) {
static common::Status ValidateInputs(const Tensor* input, const Tensor* scale, const Tensor* B,
bool is_nhwc = false) {
const auto rank = input->Shape().NumDimensions();
if (rank < 3) {
std::ostringstream ostr;
Expand All @@ -31,8 +33,8 @@ class InstanceNormHelper {

if (scale->Shape().Size() != in_channels) {
std::ostringstream ostr;
ostr << "Mismatch between input data and scale: size of scale != input channel count "
<< scale->Shape().Size() << " vs. " << in_channels << "nhwc: " << is_nhwc;
ostr << "Mismatch between input data and scale: size of scale != input channel count " << scale->Shape().Size()
<< " vs. " << input->Shape().GetDims()[1];
return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, ostr.str());
}

Expand All @@ -44,8 +46,8 @@ class InstanceNormHelper {

if (B->Shape().Size() != in_channels) {
std::ostringstream ostr;
ostr << "Mismatch between input data and B: size of B != input channel count "
<< B->Shape().Size() << " vs. " << input->Shape().GetDims()[1];
ostr << "Mismatch between input data and B: size of B != input channel count " << B->Shape().Size() << " vs. "
<< input->Shape().GetDims()[1];
return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, ostr.str());
}

Expand Down
8 changes: 4 additions & 4 deletions onnxruntime/core/providers/cuda/cuda_nhwc_kernels.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@

#ifdef ENABLE_CUDA_NHWC_OPS

#include <utility>

#include "core/providers/shared_library/provider_api.h"
#include "core/providers/cuda/cuda_fwd.h"

#include "core/providers/cuda/cuda_nhwc_kernels.h"

namespace onnxruntime {
namespace cuda {
namespace onnxruntime::cuda {

// When adding new supported NHWC operations make sure to also integrate them into: ConvertNodeLayout
// in onnxruntime/core/optimizer/layout_transformation/layout_transformation.cc
Expand Down Expand Up @@ -164,6 +165,5 @@ Status RegisterCudaNhwcKernels(KernelRegistry& kernel_registry) {
}
return Status::OK();
}
} // namespace cuda
} // namespace onnxruntime
} // namespace onnxruntime::cuda
#endif
10 changes: 3 additions & 7 deletions onnxruntime/core/providers/cuda/cuda_nhwc_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,8 @@

#include "core/common/status.h"

using namespace onnxruntime::common;
namespace onnxruntime::cuda {

namespace onnxruntime {
namespace cuda {
onnxruntime::common::Status RegisterCudaNhwcKernels(onnxruntime::KernelRegistry& kernel_registry);

Status RegisterCudaNhwcKernels(KernelRegistry& kernel_registry);

} // namespace cuda
} // namespace onnxruntime
} // namespace onnxruntime::cuda
4 changes: 3 additions & 1 deletion onnxruntime/core/providers/cuda/cudnn_common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// Copyright (c) 2023 NVIDIA Corporation.
// Licensed under the MIT License.

#include "cudnn_common.h"
#include <utility>

#include "core/providers/cuda/cudnn_common.h"
#include "core/common/inlined_containers.h"
#include "core/common/gsl.h"
#include "shared_inc/cuda_call.h"
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/cuda/nn/conv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// Copyright (c) 2023 NVIDIA Corporation.
// Licensed under the MIT License.

#include <utility>

#include "core/providers/cuda/nn/conv.h"
#include "core/common/span_utils.h"
#include "core/providers/cuda/cuda_common.h"
Expand Down
4 changes: 3 additions & 1 deletion onnxruntime/core/providers/cuda/nn/conv.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@

#pragma once

#include <list>
#include <memory>

#include "core/platform/ort_mutex.h"
#include "core/providers/cuda/cuda_kernel.h"
#include "core/providers/cuda/cudnn_common.h"
#include "core/providers/cpu/nn/conv_attributes.h"
#include <list>

namespace onnxruntime {

Expand Down
112 changes: 35 additions & 77 deletions onnxruntime/core/providers/cuda/nn/conv_transpose.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// Copyright (c) 2023 NVIDIA Corporation.
// Licensed under the MIT License.

#include <utility>

#include "conv_transpose.h"
#include "core/providers/cuda/tensor/transpose.h"

Expand All @@ -19,23 +21,13 @@ namespace cuda {

// Op Set 11 for ConvTranspose only update document to clarify default dilations and strides value.
// which are already covered by op set 11 cpu version, so simply add declaration.
#define REGISTER_KERNEL_TYPED(T, DOMAIN, NHWC) \
ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX( \
ConvTranspose, \
DOMAIN, \
1, 10, \
T, \
kCudaExecutionProvider, \
(*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
ConvTranspose<T, NHWC>); \
ONNX_OPERATOR_TYPED_KERNEL_EX( \
ConvTranspose, \
DOMAIN, \
11, \
T, \
kCudaExecutionProvider, \
(*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
ConvTranspose<T, NHWC>);
#define REGISTER_KERNEL_TYPED(T, DOMAIN, NHWC) \
ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX( \
ConvTranspose, DOMAIN, 1, 10, T, kCudaExecutionProvider, \
(*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), ConvTranspose<T, NHWC>); \
ONNX_OPERATOR_TYPED_KERNEL_EX(ConvTranspose, DOMAIN, 11, T, kCudaExecutionProvider, \
(*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
ConvTranspose<T, NHWC>);

REGISTER_KERNEL_TYPED(float, kOnnxDomain, false)
REGISTER_KERNEL_TYPED(double, kOnnxDomain, false)
Expand All @@ -52,8 +44,8 @@ Status ConvTranspose<T, NHWC>::ComputeInternal(OpKernelContext* context) const {
}

template <typename T, bool NHWC>
Status ConvTranspose<T, NHWC>::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc,
bool& is_packed, [[maybe_unused]] PrePackedWeights* prepacked_weights) {
Status ConvTranspose<T, NHWC>::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc, bool& is_packed,
[[maybe_unused]] PrePackedWeights* prepacked_weights) {
is_packed = false;
// only layout of weight input is adjusted via PrePack
if (NHWC) { // InputTensors::IN_W
Expand All @@ -63,15 +55,10 @@ Status ConvTranspose<T, NHWC>::PrePack(const Tensor& tensor, int input_idx, Allo

InlinedVector<size_t> perm{0, 2, 3, 1};
gsl::span<size_t> permutation(perm.data(), 4);
TensorShapeVector new_dims{orig_shape[0],
orig_shape[2],
orig_shape[3],
orig_shape[1]};
TensorShapeVector new_dims{orig_shape[0], orig_shape[2], orig_shape[3], orig_shape[1]};
W_ = Tensor::Create(tensor.DataType(), TensorShape(new_dims), std::move(alloc));

auto status = cuda::Transpose::DoTranspose(GetDeviceProp(),
DefaultCudaStream(),
DefaultCublasHandle(),
auto status = cuda::Transpose::DoTranspose(GetDeviceProp(), DefaultCudaStream(), DefaultCublasHandle(),
permutation, tensor, *W_);

if (!status.IsOK()) {
Expand Down Expand Up @@ -126,16 +113,16 @@ Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dy
bool input_dims_changed = (s_.last_x_dims.AsShapeVector() != x_dims);
bool w_dims_changed = (s_.last_w_dims.AsShapeVector() != w_dims);
if (input_dims_changed || w_dims_changed) {
if (input_dims_changed)
s_.last_x_dims = gsl::make_span(x_dims);
if (input_dims_changed) s_.last_x_dims = gsl::make_span(x_dims);

if (w_dims_changed) {
s_.last_w_dims = gsl::make_span(w_dims);
s_.cached_benchmark_results.clear();
}

ConvTransposeAttributes::Prepare p;
ORT_RETURN_IF_ERROR(conv_transpose_attrs_.PrepareForCompute(context, has_bias, p, dynamic_padding, &w_shape, NHWC));
ORT_RETURN_IF_ERROR(
conv_transpose_attrs_.PrepareForCompute(context, has_bias, p, dynamic_padding, &w_shape, NHWC));

auto y_dims = p.Y->Shape().AsShapeVector();
if (x_dimensions == 3) {
Expand All @@ -150,12 +137,9 @@ Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dy

if (w_dims_changed) {
if (NHWC) {
ORT_RETURN_IF_ERROR(s_.w_desc.Set(CUDNN_TENSOR_NHWC,
CudnnTensor::GetDataType<CudaT>(),
static_cast<int>(w_dims[0]),
static_cast<int>(w_dims[3]),
static_cast<int>(w_dims[1]),
static_cast<int>(w_dims[2])));
ORT_RETURN_IF_ERROR(s_.w_desc.Set(CUDNN_TENSOR_NHWC, CudnnTensor::GetDataType<CudaT>(),
static_cast<int>(w_dims[0]), static_cast<int>(w_dims[3]),
static_cast<int>(w_dims[1]), static_cast<int>(w_dims[2])));
} else {
ORT_RETURN_IF_ERROR(s_.w_desc.Set(w_dims, CudnnTensor::GetDataType<CudaT>()));
}
Expand All @@ -170,41 +154,37 @@ Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dy
}
if (NHWC) {
ORT_RETURN_IF_ERROR(s_.x_tensor.Set(CUDNN_TENSOR_NHWC, CudnnTensor::GetDataType<CudaT>(),
static_cast<int>(x_dims[0]),
static_cast<int>(x_dims[3]),
static_cast<int>(x_dims[1]),
static_cast<int>(x_dims[2])));
static_cast<int>(x_dims[0]), static_cast<int>(x_dims[3]),
static_cast<int>(x_dims[1]), static_cast<int>(x_dims[2])));
ORT_RETURN_IF_ERROR(s_.y_tensor.Set(CUDNN_TENSOR_NHWC, CudnnTensor::GetDataType<CudaT>(),
static_cast<int>(y_dims[0]),
static_cast<int>(y_dims[3]),
static_cast<int>(y_dims[1]),
static_cast<int>(y_dims[2])));
static_cast<int>(y_dims[0]), static_cast<int>(y_dims[3]),
static_cast<int>(y_dims[1]), static_cast<int>(y_dims[2])));
} else {
ORT_RETURN_IF_ERROR(s_.x_tensor.Set(x_dims, CudnnTensor::GetDataType<CudaT>()));
ORT_RETURN_IF_ERROR(s_.y_tensor.Set(y_dims, CudnnTensor::GetDataType<CudaT>()));
}

cudnnConvolutionMode_t mode = CUDNN_CROSS_CORRELATION;
ORT_RETURN_IF_ERROR(s_.conv_desc.Set(p.kernel_shape.size(), p.pads, p.strides, p.dilations,
gsl::narrow_cast<int>(conv_transpose_attrs_.group),
mode, CudnnTensor::GetDataType<CudaT>()));
gsl::narrow_cast<int>(conv_transpose_attrs_.group), mode,
CudnnTensor::GetDataType<CudaT>()));

if (has_bias) {
const auto& b_shape = p.B->Shape();
ORT_RETURN_IF_NOT(b_shape.NumDimensions() == 1, "bias should be 1D");
TensorShapeVector b_dims(2 + p.kernel_shape.size());
b_dims[0] = 1; // N
b_dims[NHWC ? 3 : 1] = b_shape[0]; // C
for (size_t i = 0; i < p.kernel_shape.size(); i++)
b_dims[(NHWC ? 1 : 2) + i] = 1;
for (size_t i = 0; i < p.kernel_shape.size(); i++) b_dims[(NHWC ? 1 : 2) + i] = 1;

ORT_RETURN_IF_ERROR(s_.b_tensor.Set(b_dims, CudnnTensor::GetDataType<CudaT>(), NHWC));
}

y_data = reinterpret_cast<CudaT*>(p.Y->MutableData<T>());

if (!s_.cached_benchmark_results.contains(x_dims)) {
IAllocatorUniquePtr<void> algo_search_workspace = GetScratchBuffer<void>(AlgoSearchWorkspaceSize, context->GetComputeStream());
IAllocatorUniquePtr<void> algo_search_workspace =
GetScratchBuffer<void>(AlgoSearchWorkspaceSize, context->GetComputeStream());

// set math type to tensor core before algorithm search
if constexpr (std::is_same<T, MLFloat16>::value)
Expand All @@ -213,19 +193,8 @@ Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dy
cudnnConvolutionBwdDataAlgoPerf_t perf;
int algo_count = 1;
CUDNN_RETURN_IF_ERROR(cudnnFindConvolutionBackwardDataAlgorithmEx(
GetCudnnHandle(context),
s_.w_desc,
w_data,
s_.x_tensor,
x_data,
s_.conv_desc,
s_.y_tensor,
y_data,
1,
&algo_count,
&perf,
algo_search_workspace.get(),
AlgoSearchWorkspaceSize));
GetCudnnHandle(context), s_.w_desc, w_data, s_.x_tensor, x_data, s_.conv_desc, s_.y_tensor, y_data, 1,
&algo_count, &perf, algo_search_workspace.get(), AlgoSearchWorkspaceSize));
s_.cached_benchmark_results.insert(x_dims, {perf.algo, perf.memory, perf.mathType});
}

Expand Down Expand Up @@ -256,26 +225,15 @@ Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dy

IAllocatorUniquePtr<void> workspace = GetScratchBuffer<void>(s_.workspace_bytes, context->GetComputeStream());

CUDNN_RETURN_IF_ERROR(
cudnnConvolutionBackwardData(
GetCudnnHandle(context),
&alpha,
s_.w_desc,
w_data,
s_.x_tensor,
x_data,
s_.conv_desc,
s_.algo,
workspace.get(),
s_.workspace_bytes,
&beta,
s_.y_tensor,
y_data));
CUDNN_RETURN_IF_ERROR(cudnnConvolutionBackwardData(GetCudnnHandle(context), &alpha, s_.w_desc, w_data, s_.x_tensor,
x_data, s_.conv_desc, s_.algo, workspace.get(),
s_.workspace_bytes, &beta, s_.y_tensor, y_data));

if (has_bias) {
const Tensor* B = dynamic_padding ? context->Input<Tensor>(3) : context->Input<Tensor>(2);
auto b_data = reinterpret_cast<const CudaT*>(B->Data<T>());
CUDNN_RETURN_IF_ERROR(cudnnAddTensor(GetCudnnHandle(context), &alpha, s_.b_tensor, b_data, &alpha, s_.y_tensor, y_data));
CUDNN_RETURN_IF_ERROR(
cudnnAddTensor(GetCudnnHandle(context), &alpha, s_.b_tensor, b_data, &alpha, s_.y_tensor, y_data));
}
}

Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/cuda/nn/conv_transpose.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#pragma once

#include <memory>

#include "core/providers/cuda/cuda_common.h"
#include "core/providers/cuda/cuda_kernel.h"
#include "core/providers/cuda/cudnn_common.h"
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/cuda/nn/pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class Pool : public CudaKernel, public PoolBase {
template <typename T, bool NHWC>
class Pool<T, MaxPool<8>, NHWC> final : public Pool<T, MaxPool<1>, NHWC> {
public:
Pool(const OpKernelInfo& info) : Pool<T, MaxPool<1>, NHWC>(info) {}
explicit Pool(const OpKernelInfo& info) : Pool<T, MaxPool<1>, NHWC>(info) {}

Status ComputeInternal(OpKernelContext* context) const override;
};
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/test/perftest/ort_test_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
}
auto pos = token.find("|");
if (pos == std::string::npos || pos == 0 || pos == token.length()) {
ORT_THROW("[ERROR] [CUDA] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
ORT_THROW(
"[ERROR] [CUDA] Use a '|' to separate the key and value for the run-time option you are trying to use.\n");
}

auto key = token.substr(0, pos);
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/test/providers/compare_provider_test_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,8 @@ void CompareOpTester::CompareEPs(const std::shared_ptr<IExecutionProvider>& sour
// compare
ASSERT_TRUE(source_fetches.size() == target_fetches.size());
for (size_t i = 0; i < source_fetches.size(); i++) {
auto ret = CompareOrtValue(target_fetches[i], source_fetches[i], per_sample_tolerance, relative_per_sample_tolerance, false);
auto ret = CompareOrtValue(target_fetches[i], source_fetches[i], per_sample_tolerance,
relative_per_sample_tolerance, false);
EXPECT_EQ(ret.first, COMPARE_RESULT::SUCCESS) << ret.second;
}
}
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/test/providers/cuda/nhwc/nhwc_cuda_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
// Licensed under the MIT License.

#include <vector>
#include <utility>
#include <memory>

#include "core/providers/cuda/cuda_provider_options.h"
#include "core/providers/common.h"

Expand Down

0 comments on commit d0daf4e

Please sign in to comment.