Skip to content

Commit

Permalink
Update ck
Browse files Browse the repository at this point in the history
  • Loading branch information
cloudhan committed Nov 17, 2023
1 parent 05526b3 commit b4ffef4
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 77 deletions.
2 changes: 1 addition & 1 deletion cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,4 @@ tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2
cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.1.0.zip;757f90a795034a89d4f48a79d1f009f7a04c8dee
utf8_range;https://github.com/protocolbuffers/utf8_range/archive/72c943dea2b9240cd09efde15191e144bc7c7d38.zip;9925739c9debc0efa2adcb194d371a35b6a03156
extensions;https://github.com/microsoft/onnxruntime-extensions/archive/94142d8391c9791ec71c38336436319a2d4ac7a0.zip;4365ac5140338b4cb75a39944a4be276e3829b3c
composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/a4f72a314a85732ed67d5aa8d1088d207a7e0e61.zip;f57357ab6d300e207a632d034ebc8aa036a090d9
composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/5356c4a943a35e74d7cdc69486afcb8703b9a59a.zip;522382c2af437e09124287e5879ab64af5b2e299
17 changes: 14 additions & 3 deletions cmake/patches/composable_kernel/Fix_Clang_Build.patch
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b09da41a8..fca2bdf69 100644
index 04674124c..12e8b8b00 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,7 +19,7 @@ endif()
Expand Down Expand Up @@ -48,7 +48,18 @@ index b09da41a8..fca2bdf69 100644

## tidy
include(EnableCompilerWarnings)
@@ -489,11 +466,3 @@ rocm_install(FILES
@@ -376,7 +353,9 @@ if(BUILD_DEV)
add_compile_options(-Werror -Weverything)
endif()
#add flags to reduce the size of binaries
-add_compile_options(-Oz -flto=thin)
+# -flto requires ORT to use a linker that support LTO and -flto flag shoud be passed to linker together.
+# add_compile_options(-Oz -flto=thin)
+add_compile_options(-Oz)
message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")

add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR})
@@ -482,11 +461,3 @@ rocm_install(FILES

set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
Expand All @@ -61,7 +72,7 @@ index b09da41a8..fca2bdf69 100644
- HEADER_ONLY
-)
diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
index a0478c9f0..1e7782cd4 100644
index 9cb5d0e9a..141a46f3d 100644
--- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt
+++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
@@ -44,8 +44,14 @@ function(add_instance_library INSTANCE_NAME)
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ auto GetCKGroupNormNHWCTypeStringAndOps() {
gamma_beta_strides, // gammaStrides
gamma_beta_strides, // betaStrides
in_out_strides, // yStrides
{0, 0}, // saveMeanStrides
{0, 0}, // saveInvStdStrides
reduce_dims, // reduceDims
params->epsilon,
params->src,
Expand Down
124 changes: 63 additions & 61 deletions onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

#ifdef USE_COMPOSABLE_KERNEL
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/device_normalization.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_normalization_impl.hpp"
#include "ck/tensor_operation/gpu/device/device_normalization_fwd.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_normalization_fwd_impl.hpp"
#include "ck/utility/data_type.hpp"

namespace onnxruntime {
Expand All @@ -21,58 +21,60 @@ using F32 = float;
using Swish = ck::tensor_operation::element_wise::Swish;
using Pass = ck::tensor_operation::element_wise::PassThrough;

using ck::tensor_operation::device::DeviceNormalization; // the interface
using ck::tensor_operation::device::DeviceNormalizationImpl; // the implementation
using ck::tensor_operation::device::DeviceNormalizationFwd; // the interface
using ck::tensor_operation::device::DeviceNormalizationFwdImpl; // the implementation

// See https://github.com/ROCmSoftwarePlatform/composable_kernel/blob/1fefd82ed8/library/src/tensor_operation_instance/gpu/normalization_fwd/normalization_fwd_instance_common.hpp

template <typename OutElementwise, ck::index_t Rank, ck::index_t Reduce>
using device_normalization_f32_instances = std::tuple<
// clang-format off
// XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType, OutElementwise, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, GammaSrcVectorSize, BetaSrcVectorSize, YDstVectorSize>
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 2, 1, 2, 1, 2, 1, 2, 2>, // irregular size
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 4, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 8, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 16, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 32, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 4, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 8, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 16, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 2, 16, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 32, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 4, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 8, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 2, 8, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 4, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 8, 1, 4, 1, 4, 1, 4, 4>
// XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType, SaveMeanInvStdDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorSize, SaveMeanInvStdScalarPerVector>

Check warning on line 32 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L32

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:32:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size

Check warning on line 33 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L33

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:33:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size

Check warning on line 34 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L34

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:34:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size

Check warning on line 35 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L35

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:35:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size

Check warning on line 36 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L36

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:36:  Lines should be <= 120 characters long  [whitespace/line_length] [2]

Check warning on line 36 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L36

At least two spaces is best between code and comments [whitespace/comments] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:36:  At least two spaces is best between code and comments  [whitespace/comments] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1>, // irregular size

Check warning on line 37 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L37

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:37:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1>,

Check warning on line 38 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L38

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:38:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 8, 1, 4, 1, 4, 1, 4, 4, 1>,

Check warning on line 39 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L39

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:39:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 16, 1, 4, 1, 4, 1, 4, 4, 1>,

Check warning on line 40 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L40

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:40:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 32, 1, 4, 1, 4, 1, 4, 4, 1>,

Check warning on line 41 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L41

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:41:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1>,

Check warning on line 42 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L42

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:42:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 8, 1, 4, 1, 4, 1, 4, 4, 1>,

Check warning on line 43 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L43

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:43:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 16, 1, 4, 1, 4, 1, 4, 4, 1>,

Check warning on line 44 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L44

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:44:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 2, 16, 1, 4, 1, 4, 1, 4, 4, 2>,

Check warning on line 45 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L45

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:45:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 32, 1, 4, 1, 4, 1, 4, 4, 1>,

Check warning on line 46 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L46

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:46:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1>,

Check warning on line 47 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L47

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:47:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 8, 1, 4, 1, 4, 1, 4, 4, 1>,

Check warning on line 48 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L48

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:48:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 2, 8, 1, 4, 1, 4, 1, 4, 4, 2>,

Check warning on line 49 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L49

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:49:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1>,

Check warning on line 50 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L50

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:50:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F32, F32, F32, F32, F32, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 8, 1, 4, 1, 4, 1, 4, 4, 1>

Check warning on line 51 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L51

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:51:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
// clang-format on
>;

template <typename OutElementwise, ck::index_t Rank, ck::index_t Reduce>
using device_normalization_f16_instances = std::tuple<
using device_normalization_f16_instances =
// clang-format off
// XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType, OutElementwise, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, GammaSrcVectorSize, BetaSrcVectorSize, YDstVectorSize>
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 2, 1, 2, 1, 2, 1, 2, 2>, // irregular size
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 4, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 8, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 16, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 32, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 4, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 8, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 16, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 256, 1, 256, 2, 16, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 32, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 4, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 8, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 512, 1, 512, 2, 8, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 4, 1, 4, 1, 4, 1, 4, 4>,
DeviceNormalizationImpl<F16, F32, F32, F32, F16, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 8, 1, 4, 1, 4, 1, 4, 4>
std::tuple <

Check warning on line 58 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L58

Add #include <tuple> for tuple<> [build/include_what_you_use] [4]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:58:  Add #include <tuple> for tuple<>  [build/include_what_you_use] [4]
// XDataType, GammaDataType, BetaDataType, ComputeDataType, YDataType, SaveMeanInvStdDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XYSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorSize, SaveMeanInvStdScalarPerVector>

Check warning on line 59 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L59

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:59:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size

Check warning on line 60 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L60

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:60:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size

Check warning on line 61 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L61

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:61:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size

Check warning on line 62 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L62

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:62:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>, // irregular size

Check warning on line 63 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L63

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:63:  Lines should be <= 120 characters long  [whitespace/line_length] [2]

Check warning on line 63 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L63

At least two spaces is best between code and comments [whitespace/comments] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:63:  At least two spaces is best between code and comments  [whitespace/comments] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1>, // irregular size

Check warning on line 64 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L64

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:64:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 4, 1, 4, 1, 4, 1, 4, 4, 1>, // irregular size

Check warning on line 65 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L65

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:65:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 64, 1, 64, 1, 8, 1, 8, 1, 8, 1, 8, 8, 1>,

Check warning on line 66 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L66

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:66:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 8, 1, 8, 1, 8, 1, 8, 8, 1>,

Check warning on line 67 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L67

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:67:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 16, 1, 8, 1, 8, 1, 8, 8, 1>,

Check warning on line 68 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L68

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:68:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 128, 1, 128, 1, 32, 1, 8, 1, 8, 1, 8, 8, 1>,

Check warning on line 69 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L69

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:69:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 8, 1, 8, 1, 8, 1, 8, 8, 1>,

Check warning on line 70 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L70

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:70:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 16, 1, 8, 1, 8, 1, 8, 8, 1>,

Check warning on line 71 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L71

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:71:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 2, 16, 1, 8, 1, 8, 1, 8, 8, 2>,

Check warning on line 72 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L72

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:72:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 256, 1, 256, 1, 32, 1, 8, 1, 8, 1, 8, 8, 1>,

Check warning on line 73 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L73

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:73:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 8, 1, 8, 1, 8, 1, 8, 8, 1>,

Check warning on line 74 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L74

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:74:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 512, 1, 512, 1, 16, 1, 8, 1, 8, 1, 8, 8, 1>,

Check warning on line 75 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L75

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:75:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 8, 1, 8, 1, 8, 1, 8, 8, 1>,

Check warning on line 76 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L76

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:76:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
DeviceNormalizationFwdImpl<F16, F32, F32, F32, F16, F32, OutElementwise, Rank, Reduce, 1024, 1, 1024, 1, 16, 1, 8, 1, 8, 1, 8, 8, 1>

Check warning on line 77 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L77

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:77:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
// clang-format on
>;

Expand All @@ -85,38 +87,38 @@ template <typename InDataType,
typename YElementwiseOperation,
ck::index_t Rank,
ck::index_t NumReduceDim>
std::vector<std::unique_ptr<DeviceNormalization<InDataType,
GammaDataType,
BetaDataType,
AccDataType,
OutDataType,
YElementwiseOperation,
Rank,
NumReduceDim>>>
std::vector<std::unique_ptr<DeviceNormalizationFwd<InDataType,
GammaDataType,
BetaDataType,
AccDataType,
OutDataType,
YElementwiseOperation,
Rank,
NumReduceDim>>>
GetDeviceGroupNormInstances() {
return {};
}

template <>
std::vector<std::unique_ptr<DeviceNormalization<
F16, F32, F32, F32, F16, Swish, 5, 3>>>
std::vector<std::unique_ptr<DeviceNormalizationFwd<
F16, F32, F32, F16, F32, Swish, 5, 3>>>
GetDeviceGroupNormInstances<
F16, F32, F32, F32, F16, Swish, 5, 3>();
F16, F32, F32, F16, F32, Swish, 5, 3>();

template <>
std::vector<std::unique_ptr<DeviceNormalization<
F16, F32, F32, F32, F16, Pass, 5, 3>>>
std::vector<std::unique_ptr<DeviceNormalizationFwd<
F16, F32, F32, F16, F32, Pass, 5, 3>>>
GetDeviceGroupNormInstances<
F16, F32, F32, F32, F16, Pass, 5, 3>();
F16, F32, F32, F16, F32, Pass, 5, 3>();

template <>
std::vector<std::unique_ptr<DeviceNormalization<
std::vector<std::unique_ptr<DeviceNormalizationFwd<
F32, F32, F32, F32, F32, Swish, 5, 3>>>
GetDeviceGroupNormInstances<
F32, F32, F32, F32, F32, Swish, 5, 3>();

template <>
std::vector<std::unique_ptr<DeviceNormalization<
std::vector<std::unique_ptr<DeviceNormalizationFwd<

Check warning on line 121 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L121

Add #include <memory> for unique_ptr<> [build/include_what_you_use] [4]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:121:  Add #include <memory> for unique_ptr<>  [build/include_what_you_use] [4]

Check warning on line 121 in onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh#L121

Add #include <vector> for vector<> [build/include_what_you_use] [4]
Raw output
onnxruntime/contrib_ops/rocm/diffusion/group_norm_ck_impl/impl.cuh:121:  Add #include <vector> for vector<>  [build/include_what_you_use] [4]
F32, F32, F32, F32, F32, Pass, 5, 3>>>
GetDeviceGroupNormInstances<
F32, F32, F32, F32, F32, Pass, 5, 3>();
Expand Down
Loading

0 comments on commit b4ffef4

Please sign in to comment.