From d0daf4e63b73df685779afc7ce13978f9a599698 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20M=C3=BCller?= Date: Thu, 12 Oct 2023 19:10:32 +0200 Subject: [PATCH] cpplint changes --- .../providers/cpu/nn/instance_norm_helper.h | 12 +- .../core/providers/cuda/cuda_nhwc_kernels.cc | 8 +- .../core/providers/cuda/cuda_nhwc_kernels.h | 10 +- .../core/providers/cuda/cudnn_common.cc | 4 +- onnxruntime/core/providers/cuda/nn/conv.cc | 2 + onnxruntime/core/providers/cuda/nn/conv.h | 4 +- .../core/providers/cuda/nn/conv_transpose.cc | 112 ++++++------------ .../core/providers/cuda/nn/conv_transpose.h | 2 + onnxruntime/core/providers/cuda/nn/pool.h | 2 +- onnxruntime/test/perftest/ort_test_session.cc | 3 +- .../providers/compare_provider_test_utils.cc | 3 +- .../providers/cuda/nhwc/nhwc_cuda_helper.h | 3 + 12 files changed, 67 insertions(+), 98 deletions(-) diff --git a/onnxruntime/core/providers/cpu/nn/instance_norm_helper.h b/onnxruntime/core/providers/cpu/nn/instance_norm_helper.h index 6734ebf62f56d..7aa51001f3b29 100644 --- a/onnxruntime/core/providers/cpu/nn/instance_norm_helper.h +++ b/onnxruntime/core/providers/cpu/nn/instance_norm_helper.h @@ -9,12 +9,14 @@ #include "core/framework/tensor.h" #endif #include +#include namespace onnxruntime { class InstanceNormHelper { public: - static common::Status ValidateInputs(const Tensor* input, const Tensor* scale, const Tensor* B, bool is_nhwc = false) { + static common::Status ValidateInputs(const Tensor* input, const Tensor* scale, const Tensor* B, + bool is_nhwc = false) { const auto rank = input->Shape().NumDimensions(); if (rank < 3) { std::ostringstream ostr; @@ -31,8 +33,8 @@ class InstanceNormHelper { if (scale->Shape().Size() != in_channels) { std::ostringstream ostr; - ostr << "Mismatch between input data and scale: size of scale != input channel count " - << scale->Shape().Size() << " vs. " << in_channels << "nhwc: " << is_nhwc; + ostr << "Mismatch between input data and scale: size of scale != input channel count " << scale->Shape().Size() + << " vs. " << input->Shape().GetDims()[1]; return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, ostr.str()); } @@ -44,8 +46,8 @@ class InstanceNormHelper { if (B->Shape().Size() != in_channels) { std::ostringstream ostr; - ostr << "Mismatch between input data and B: size of B != input channel count " - << B->Shape().Size() << " vs. " << input->Shape().GetDims()[1]; + ostr << "Mismatch between input data and B: size of B != input channel count " << B->Shape().Size() << " vs. " + << input->Shape().GetDims()[1]; return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, ostr.str()); } diff --git a/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.cc b/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.cc index 15c3f3f579f76..f416caecd115f 100644 --- a/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.cc +++ b/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.cc @@ -4,13 +4,14 @@ #ifdef ENABLE_CUDA_NHWC_OPS +#include + #include "core/providers/shared_library/provider_api.h" #include "core/providers/cuda/cuda_fwd.h" #include "core/providers/cuda/cuda_nhwc_kernels.h" -namespace onnxruntime { -namespace cuda { +namespace onnxruntime::cuda { // When adding new supported NHWC operations make sure to also integrate them into: ConvertNodeLayout // in onnxruntime/core/optimizer/layout_transformation/layout_transformation.cc @@ -164,6 +165,5 @@ Status RegisterCudaNhwcKernels(KernelRegistry& kernel_registry) { } return Status::OK(); } -} // namespace cuda -} // namespace onnxruntime +} // namespace onnxruntime::cuda #endif diff --git a/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.h b/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.h index 00afb2f0554e3..0b3a6d5cff0c7 100644 --- a/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.h +++ b/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.h @@ -6,12 +6,8 @@ #include "core/common/status.h" -using namespace onnxruntime::common; +namespace onnxruntime::cuda { -namespace onnxruntime { -namespace cuda { +onnxruntime::common::Status RegisterCudaNhwcKernels(onnxruntime::KernelRegistry& kernel_registry); -Status RegisterCudaNhwcKernels(KernelRegistry& kernel_registry); - -} // namespace cuda -} // namespace onnxruntime +} // namespace onnxruntime::cuda diff --git a/onnxruntime/core/providers/cuda/cudnn_common.cc b/onnxruntime/core/providers/cuda/cudnn_common.cc index 0b57ab559d04a..4df59a98b12e5 100644 --- a/onnxruntime/core/providers/cuda/cudnn_common.cc +++ b/onnxruntime/core/providers/cuda/cudnn_common.cc @@ -2,7 +2,9 @@ // Copyright (c) 2023 NVIDIA Corporation. // Licensed under the MIT License. -#include "cudnn_common.h" +#include + +#include "core/providers/cuda/cudnn_common.h" #include "core/common/inlined_containers.h" #include "core/common/gsl.h" #include "shared_inc/cuda_call.h" diff --git a/onnxruntime/core/providers/cuda/nn/conv.cc b/onnxruntime/core/providers/cuda/nn/conv.cc index de8782f89f9e0..82f3503919237 100644 --- a/onnxruntime/core/providers/cuda/nn/conv.cc +++ b/onnxruntime/core/providers/cuda/nn/conv.cc @@ -2,6 +2,8 @@ // Copyright (c) 2023 NVIDIA Corporation. // Licensed under the MIT License. +#include + #include "core/providers/cuda/nn/conv.h" #include "core/common/span_utils.h" #include "core/providers/cuda/cuda_common.h" diff --git a/onnxruntime/core/providers/cuda/nn/conv.h b/onnxruntime/core/providers/cuda/nn/conv.h index e103f2e74863d..bcaa4d855b81e 100644 --- a/onnxruntime/core/providers/cuda/nn/conv.h +++ b/onnxruntime/core/providers/cuda/nn/conv.h @@ -4,11 +4,13 @@ #pragma once +#include +#include + #include "core/platform/ort_mutex.h" #include "core/providers/cuda/cuda_kernel.h" #include "core/providers/cuda/cudnn_common.h" #include "core/providers/cpu/nn/conv_attributes.h" -#include namespace onnxruntime { diff --git a/onnxruntime/core/providers/cuda/nn/conv_transpose.cc b/onnxruntime/core/providers/cuda/nn/conv_transpose.cc index a8831a09605b7..55dceaa2698e8 100644 --- a/onnxruntime/core/providers/cuda/nn/conv_transpose.cc +++ b/onnxruntime/core/providers/cuda/nn/conv_transpose.cc @@ -2,6 +2,8 @@ // Copyright (c) 2023 NVIDIA Corporation. // Licensed under the MIT License. +#include + #include "conv_transpose.h" #include "core/providers/cuda/tensor/transpose.h" @@ -19,23 +21,13 @@ namespace cuda { // Op Set 11 for ConvTranspose only update document to clarify default dilations and strides value. // which are already covered by op set 11 cpu version, so simply add declaration. -#define REGISTER_KERNEL_TYPED(T, DOMAIN, NHWC) \ - ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX( \ - ConvTranspose, \ - DOMAIN, \ - 1, 10, \ - T, \ - kCudaExecutionProvider, \ - (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType()), \ - ConvTranspose); \ - ONNX_OPERATOR_TYPED_KERNEL_EX( \ - ConvTranspose, \ - DOMAIN, \ - 11, \ - T, \ - kCudaExecutionProvider, \ - (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType()), \ - ConvTranspose); +#define REGISTER_KERNEL_TYPED(T, DOMAIN, NHWC) \ + ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX( \ + ConvTranspose, DOMAIN, 1, 10, T, kCudaExecutionProvider, \ + (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType()), ConvTranspose); \ + ONNX_OPERATOR_TYPED_KERNEL_EX(ConvTranspose, DOMAIN, 11, T, kCudaExecutionProvider, \ + (*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType()), \ + ConvTranspose); REGISTER_KERNEL_TYPED(float, kOnnxDomain, false) REGISTER_KERNEL_TYPED(double, kOnnxDomain, false) @@ -52,8 +44,8 @@ Status ConvTranspose::ComputeInternal(OpKernelContext* context) const { } template -Status ConvTranspose::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc, - bool& is_packed, [[maybe_unused]] PrePackedWeights* prepacked_weights) { +Status ConvTranspose::PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc, bool& is_packed, + [[maybe_unused]] PrePackedWeights* prepacked_weights) { is_packed = false; // only layout of weight input is adjusted via PrePack if (NHWC) { // InputTensors::IN_W @@ -63,15 +55,10 @@ Status ConvTranspose::PrePack(const Tensor& tensor, int input_idx, Allo InlinedVector perm{0, 2, 3, 1}; gsl::span permutation(perm.data(), 4); - TensorShapeVector new_dims{orig_shape[0], - orig_shape[2], - orig_shape[3], - orig_shape[1]}; + TensorShapeVector new_dims{orig_shape[0], orig_shape[2], orig_shape[3], orig_shape[1]}; W_ = Tensor::Create(tensor.DataType(), TensorShape(new_dims), std::move(alloc)); - auto status = cuda::Transpose::DoTranspose(GetDeviceProp(), - DefaultCudaStream(), - DefaultCublasHandle(), + auto status = cuda::Transpose::DoTranspose(GetDeviceProp(), DefaultCudaStream(), DefaultCublasHandle(), permutation, tensor, *W_); if (!status.IsOK()) { @@ -126,8 +113,7 @@ Status ConvTranspose::DoConvTranspose(OpKernelContext* context, bool dy bool input_dims_changed = (s_.last_x_dims.AsShapeVector() != x_dims); bool w_dims_changed = (s_.last_w_dims.AsShapeVector() != w_dims); if (input_dims_changed || w_dims_changed) { - if (input_dims_changed) - s_.last_x_dims = gsl::make_span(x_dims); + if (input_dims_changed) s_.last_x_dims = gsl::make_span(x_dims); if (w_dims_changed) { s_.last_w_dims = gsl::make_span(w_dims); @@ -135,7 +121,8 @@ Status ConvTranspose::DoConvTranspose(OpKernelContext* context, bool dy } ConvTransposeAttributes::Prepare p; - ORT_RETURN_IF_ERROR(conv_transpose_attrs_.PrepareForCompute(context, has_bias, p, dynamic_padding, &w_shape, NHWC)); + ORT_RETURN_IF_ERROR( + conv_transpose_attrs_.PrepareForCompute(context, has_bias, p, dynamic_padding, &w_shape, NHWC)); auto y_dims = p.Y->Shape().AsShapeVector(); if (x_dimensions == 3) { @@ -150,12 +137,9 @@ Status ConvTranspose::DoConvTranspose(OpKernelContext* context, bool dy if (w_dims_changed) { if (NHWC) { - ORT_RETURN_IF_ERROR(s_.w_desc.Set(CUDNN_TENSOR_NHWC, - CudnnTensor::GetDataType(), - static_cast(w_dims[0]), - static_cast(w_dims[3]), - static_cast(w_dims[1]), - static_cast(w_dims[2]))); + ORT_RETURN_IF_ERROR(s_.w_desc.Set(CUDNN_TENSOR_NHWC, CudnnTensor::GetDataType(), + static_cast(w_dims[0]), static_cast(w_dims[3]), + static_cast(w_dims[1]), static_cast(w_dims[2]))); } else { ORT_RETURN_IF_ERROR(s_.w_desc.Set(w_dims, CudnnTensor::GetDataType())); } @@ -170,15 +154,11 @@ Status ConvTranspose::DoConvTranspose(OpKernelContext* context, bool dy } if (NHWC) { ORT_RETURN_IF_ERROR(s_.x_tensor.Set(CUDNN_TENSOR_NHWC, CudnnTensor::GetDataType(), - static_cast(x_dims[0]), - static_cast(x_dims[3]), - static_cast(x_dims[1]), - static_cast(x_dims[2]))); + static_cast(x_dims[0]), static_cast(x_dims[3]), + static_cast(x_dims[1]), static_cast(x_dims[2]))); ORT_RETURN_IF_ERROR(s_.y_tensor.Set(CUDNN_TENSOR_NHWC, CudnnTensor::GetDataType(), - static_cast(y_dims[0]), - static_cast(y_dims[3]), - static_cast(y_dims[1]), - static_cast(y_dims[2]))); + static_cast(y_dims[0]), static_cast(y_dims[3]), + static_cast(y_dims[1]), static_cast(y_dims[2]))); } else { ORT_RETURN_IF_ERROR(s_.x_tensor.Set(x_dims, CudnnTensor::GetDataType())); ORT_RETURN_IF_ERROR(s_.y_tensor.Set(y_dims, CudnnTensor::GetDataType())); @@ -186,8 +166,8 @@ Status ConvTranspose::DoConvTranspose(OpKernelContext* context, bool dy cudnnConvolutionMode_t mode = CUDNN_CROSS_CORRELATION; ORT_RETURN_IF_ERROR(s_.conv_desc.Set(p.kernel_shape.size(), p.pads, p.strides, p.dilations, - gsl::narrow_cast(conv_transpose_attrs_.group), - mode, CudnnTensor::GetDataType())); + gsl::narrow_cast(conv_transpose_attrs_.group), mode, + CudnnTensor::GetDataType())); if (has_bias) { const auto& b_shape = p.B->Shape(); @@ -195,8 +175,7 @@ Status ConvTranspose::DoConvTranspose(OpKernelContext* context, bool dy TensorShapeVector b_dims(2 + p.kernel_shape.size()); b_dims[0] = 1; // N b_dims[NHWC ? 3 : 1] = b_shape[0]; // C - for (size_t i = 0; i < p.kernel_shape.size(); i++) - b_dims[(NHWC ? 1 : 2) + i] = 1; + for (size_t i = 0; i < p.kernel_shape.size(); i++) b_dims[(NHWC ? 1 : 2) + i] = 1; ORT_RETURN_IF_ERROR(s_.b_tensor.Set(b_dims, CudnnTensor::GetDataType(), NHWC)); } @@ -204,7 +183,8 @@ Status ConvTranspose::DoConvTranspose(OpKernelContext* context, bool dy y_data = reinterpret_cast(p.Y->MutableData()); if (!s_.cached_benchmark_results.contains(x_dims)) { - IAllocatorUniquePtr algo_search_workspace = GetScratchBuffer(AlgoSearchWorkspaceSize, context->GetComputeStream()); + IAllocatorUniquePtr algo_search_workspace = + GetScratchBuffer(AlgoSearchWorkspaceSize, context->GetComputeStream()); // set math type to tensor core before algorithm search if constexpr (std::is_same::value) @@ -213,19 +193,8 @@ Status ConvTranspose::DoConvTranspose(OpKernelContext* context, bool dy cudnnConvolutionBwdDataAlgoPerf_t perf; int algo_count = 1; CUDNN_RETURN_IF_ERROR(cudnnFindConvolutionBackwardDataAlgorithmEx( - GetCudnnHandle(context), - s_.w_desc, - w_data, - s_.x_tensor, - x_data, - s_.conv_desc, - s_.y_tensor, - y_data, - 1, - &algo_count, - &perf, - algo_search_workspace.get(), - AlgoSearchWorkspaceSize)); + GetCudnnHandle(context), s_.w_desc, w_data, s_.x_tensor, x_data, s_.conv_desc, s_.y_tensor, y_data, 1, + &algo_count, &perf, algo_search_workspace.get(), AlgoSearchWorkspaceSize)); s_.cached_benchmark_results.insert(x_dims, {perf.algo, perf.memory, perf.mathType}); } @@ -256,26 +225,15 @@ Status ConvTranspose::DoConvTranspose(OpKernelContext* context, bool dy IAllocatorUniquePtr workspace = GetScratchBuffer(s_.workspace_bytes, context->GetComputeStream()); - CUDNN_RETURN_IF_ERROR( - cudnnConvolutionBackwardData( - GetCudnnHandle(context), - &alpha, - s_.w_desc, - w_data, - s_.x_tensor, - x_data, - s_.conv_desc, - s_.algo, - workspace.get(), - s_.workspace_bytes, - &beta, - s_.y_tensor, - y_data)); + CUDNN_RETURN_IF_ERROR(cudnnConvolutionBackwardData(GetCudnnHandle(context), &alpha, s_.w_desc, w_data, s_.x_tensor, + x_data, s_.conv_desc, s_.algo, workspace.get(), + s_.workspace_bytes, &beta, s_.y_tensor, y_data)); if (has_bias) { const Tensor* B = dynamic_padding ? context->Input(3) : context->Input(2); auto b_data = reinterpret_cast(B->Data()); - CUDNN_RETURN_IF_ERROR(cudnnAddTensor(GetCudnnHandle(context), &alpha, s_.b_tensor, b_data, &alpha, s_.y_tensor, y_data)); + CUDNN_RETURN_IF_ERROR( + cudnnAddTensor(GetCudnnHandle(context), &alpha, s_.b_tensor, b_data, &alpha, s_.y_tensor, y_data)); } } diff --git a/onnxruntime/core/providers/cuda/nn/conv_transpose.h b/onnxruntime/core/providers/cuda/nn/conv_transpose.h index e553adffbdb2c..77c9d94162b6b 100644 --- a/onnxruntime/core/providers/cuda/nn/conv_transpose.h +++ b/onnxruntime/core/providers/cuda/nn/conv_transpose.h @@ -4,6 +4,8 @@ #pragma once +#include + #include "core/providers/cuda/cuda_common.h" #include "core/providers/cuda/cuda_kernel.h" #include "core/providers/cuda/cudnn_common.h" diff --git a/onnxruntime/core/providers/cuda/nn/pool.h b/onnxruntime/core/providers/cuda/nn/pool.h index 3118ff4089d3a..8b5152a1565a9 100644 --- a/onnxruntime/core/providers/cuda/nn/pool.h +++ b/onnxruntime/core/providers/cuda/nn/pool.h @@ -22,7 +22,7 @@ class Pool : public CudaKernel, public PoolBase { template class Pool, NHWC> final : public Pool, NHWC> { public: - Pool(const OpKernelInfo& info) : Pool, NHWC>(info) {} + explicit Pool(const OpKernelInfo& info) : Pool, NHWC>(info) {} Status ComputeInternal(OpKernelContext* context) const override; }; diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 4e5a904fea5b7..1323f21b2eda6 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -144,7 +144,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } auto pos = token.find("|"); if (pos == std::string::npos || pos == 0 || pos == token.length()) { - ORT_THROW("[ERROR] [CUDA] Use a '|' to separate the key and value for the run-time option you are trying to use.\n"); + ORT_THROW( + "[ERROR] [CUDA] Use a '|' to separate the key and value for the run-time option you are trying to use.\n"); } auto key = token.substr(0, pos); diff --git a/onnxruntime/test/providers/compare_provider_test_utils.cc b/onnxruntime/test/providers/compare_provider_test_utils.cc index 79a34c39f68a0..3ef74259e27b6 100644 --- a/onnxruntime/test/providers/compare_provider_test_utils.cc +++ b/onnxruntime/test/providers/compare_provider_test_utils.cc @@ -193,7 +193,8 @@ void CompareOpTester::CompareEPs(const std::shared_ptr& sour // compare ASSERT_TRUE(source_fetches.size() == target_fetches.size()); for (size_t i = 0; i < source_fetches.size(); i++) { - auto ret = CompareOrtValue(target_fetches[i], source_fetches[i], per_sample_tolerance, relative_per_sample_tolerance, false); + auto ret = CompareOrtValue(target_fetches[i], source_fetches[i], per_sample_tolerance, + relative_per_sample_tolerance, false); EXPECT_EQ(ret.first, COMPARE_RESULT::SUCCESS) << ret.second; } } diff --git a/onnxruntime/test/providers/cuda/nhwc/nhwc_cuda_helper.h b/onnxruntime/test/providers/cuda/nhwc/nhwc_cuda_helper.h index 677bc1591b57f..2c942bb790096 100644 --- a/onnxruntime/test/providers/cuda/nhwc/nhwc_cuda_helper.h +++ b/onnxruntime/test/providers/cuda/nhwc/nhwc_cuda_helper.h @@ -3,6 +3,9 @@ // Licensed under the MIT License. #include +#include +#include + #include "core/providers/cuda/cuda_provider_options.h" #include "core/providers/common.h"