From c7b213d8dd8b9d9042456203bd36306d76e502c0 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 4 Mar 2024 22:20:24 +0000 Subject: [PATCH 01/10] add split,pad,scatter opset18 tests --- .../orttraining/test/gradient/gradient_ops_test.cc | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/orttraining/orttraining/test/gradient/gradient_ops_test.cc b/orttraining/orttraining/test/gradient/gradient_ops_test.cc index feca94ae27c13..17ae9e4d5c15d 100644 --- a/orttraining/orttraining/test/gradient/gradient_ops_test.cc +++ b/orttraining/orttraining/test/gradient/gradient_ops_test.cc @@ -698,6 +698,13 @@ TEST(GradientCheckerTest, SplitGrad) { ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def_13, {shape}, {{3, 5}, {3, 5}, {3, 5}}, &max_error, {MakeAttribute("axis", int64_t(0))})); EXPECT_IS_TINY(max_error); + + // opset18 test + OpDef op_def_18{"Split", kOnnxDomain, 18}; + ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def_18, {shape}, {{3, 5}, {3, 5}, {3, 5}}, &max_error, + {MakeAttribute("axis", int64_t(0)), + MakeAttribute("num_outputs", int64_t(3))})); + EXPECT_IS_TINY(max_error); } template @@ -2733,7 +2740,7 @@ TEST(GradientCheckerTest, TileGrad) { TEST(GradientCheckerTest, PadGrad) { float max_error; GradientChecker gradient_checker; - OpDef op_def{"Pad", kOnnxDomain, 11}; + OpDef op_def{"Pad", kOnnxDomain, 18}; { TensorInfo x_info({2, 4}, true); @@ -2803,7 +2810,7 @@ TEST(GradientCheckerTest, PadGrad) { TEST(GradientCheckerTest, ScatterNDGrad) { float max_error; GradientChecker gradient_checker; - OpDef op_def{"ScatterND", kOnnxDomain, 11}; + OpDef op_def{"ScatterND", kOnnxDomain, 18}; { TensorInfo data_info({8}, true); @@ -2887,7 +2894,7 @@ TEST(GradientCheckerTest, ScatterNDGrad) { TEST(GradientCheckerTest, ScatterElementsGrad) { float max_error; GradientChecker gradient_checker; - OpDef op_def{"ScatterElements", kOnnxDomain, 13}; + OpDef op_def{"ScatterElements", kOnnxDomain, 18}; { // without axis TensorInfo data_info({3, 3}, true); From 152aa10e935c6535fc3edd5b68c356f9bb062804 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 4 Mar 2024 23:35:12 +0000 Subject: [PATCH 02/10] reducesum opset 18 test --- orttraining/orttraining/test/gradient/gradient_ops_test.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/orttraining/orttraining/test/gradient/gradient_ops_test.cc b/orttraining/orttraining/test/gradient/gradient_ops_test.cc index 17ae9e4d5c15d..91af4374d3794 100644 --- a/orttraining/orttraining/test/gradient/gradient_ops_test.cc +++ b/orttraining/orttraining/test/gradient/gradient_ops_test.cc @@ -619,6 +619,10 @@ TEST(GradientCheckerTest, ReduceSumGrad) { OpDef op_def_13{"ReduceSum", kOnnxDomain, 13}; RunReductionTests(op_def_13, true, true); + + OpDef op_def_18{"ReduceSum", kOnnxDomain, 18}; + + RunReductionTests(op_def_18, true, true); } TEST(GradientCheckerTest, ReduceL2Grad) { From ed2edf06518bc7331739d8884dee76dd47d3fd6c Mon Sep 17 00:00:00 2001 From: root Date: Tue, 5 Mar 2024 00:30:22 +0000 Subject: [PATCH 03/10] reducel2 opset18 test and gradient builder changes --- .../orttraining/core/graph/gradient_builder.cc | 14 ++++++++++---- .../orttraining/test/gradient/gradient_ops_test.cc | 5 +++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/orttraining/orttraining/core/graph/gradient_builder.cc b/orttraining/orttraining/core/graph/gradient_builder.cc index e675b55c8af8f..a4fe7cc2b372a 100755 --- a/orttraining/orttraining/core/graph/gradient_builder.cc +++ b/orttraining/orttraining/core/graph/gradient_builder.cc @@ -1188,11 +1188,17 @@ IMPLEMENT_GRADIENT_BUILDER(GetReduceL2Gradient) { ArgDef scaled_dy_arg_def = IA("Masked_Scaled_dY"); result.emplace_back(NodeDef("Where", {IA("Masked_Y"), ZERO, IA("Scaled_dY")}, {scaled_dy_arg_def})); - if (!keepdims && attributes.find("axes") != attributes.end()) { - std::vector axes_values = RetrieveValues(attributes.at("axes")); + if (!keepdims) { + size_t numInputs = GetSrcNodeInputSize(); scaled_dy_arg_def = IA("Unsqueezed_Masked_Scaled_dY"); - result.emplace_back( - NodeDef("Unsqueeze", {IA("Masked_Scaled_dY")}, {scaled_dy_arg_def}, {MakeAttribute("axes", axes_values)})); + if (attributes.find("axes") != attributes.end()) { + std::vector axes_values = RetrieveValues(attributes.at("axes")); + result.emplace_back( + NodeDef("Unsqueeze", {IA("Masked_Scaled_dY")}, {scaled_dy_arg_def}, {MakeAttribute("axes", axes_values)})); + } else if (numInputs == 2) { // optional input 'axes' is available as input I(1) + result.emplace_back( + NodeDef("Unsqueeze", {IA("Masked_Scaled_dY"), I(1)}, {scaled_dy_arg_def})); + } } result.emplace_back(NodeDef("Mul", {I(0), scaled_dy_arg_def}, {GI(0)})); diff --git a/orttraining/orttraining/test/gradient/gradient_ops_test.cc b/orttraining/orttraining/test/gradient/gradient_ops_test.cc index 91af4374d3794..0b19609378a35 100644 --- a/orttraining/orttraining/test/gradient/gradient_ops_test.cc +++ b/orttraining/orttraining/test/gradient/gradient_ops_test.cc @@ -645,6 +645,11 @@ TEST(GradientCheckerTest, ReduceL2Grad) { {MakeAttribute("axes", axes)})); EXPECT_IS_TINY(max_error); } + + // axes is input from opset 18 + OpDef op_def_18{"ReduceL2", kOnnxDomain, 18}; + + RunReductionTests(op_def_18, true, true); } TEST(GradientCheckerTest, ReduceLogSumExpGrad) { From cfd65765205a16dd902dabb451ae372b2106ab45 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 5 Mar 2024 01:24:05 +0000 Subject: [PATCH 04/10] ReduceMean and ReduceLogSumExp test updates --- .../core/graph/gradient_builder.cc | 21 ++++++++++++++----- .../test/gradient/gradient_ops_test.cc | 8 +++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/orttraining/orttraining/core/graph/gradient_builder.cc b/orttraining/orttraining/core/graph/gradient_builder.cc index a4fe7cc2b372a..34e75e04e4d9f 100755 --- a/orttraining/orttraining/core/graph/gradient_builder.cc +++ b/orttraining/orttraining/core/graph/gradient_builder.cc @@ -1112,9 +1112,10 @@ IMPLEMENT_GRADIENT_BUILDER(GetReduceMeanGradient) { ArgDef grad = GO(0); if (!keepdims) { + size_t numInputs = GetSrcNodeInputSize(); + grad = IA("Unqueezed_Grad"); if (attributes.find("axes") != attributes.end()) { std::vector axes_values = RetrieveValues(attributes.at("axes")); - grad = IA("Unqueezed_Grad"); if (SrcNodeOpsetVersion() < 13) { // axes is attribute for unsqueeze result.push_back(NodeDef("Unsqueeze", {GO(0)}, {grad}, {MakeAttribute("axes", axes_values)})); } else { @@ -1122,6 +1123,8 @@ IMPLEMENT_GRADIENT_BUILDER(GetReduceMeanGradient) { result.push_back(axes_values_node); result.push_back(NodeDef(OpDef{"Unsqueeze", kOnnxDomain, 13}, {GO(0), axes_values_node.output_args[0]}, {grad})); } + } else if (numInputs == 2) { // optional input 'axes' is available as input I(1) + result.push_back(NodeDef("Unsqueeze", {GO(0), I(1)}, {grad})); } } @@ -1152,12 +1155,20 @@ IMPLEMENT_GRADIENT_BUILDER(GetReduceLogSumExpGradient) { } ArgDef grad = GO(0); - if (!keepdims && attributes.find("axes") != attributes.end()) { - std::vector axes_values = RetrieveValues(attributes.at("axes")); + if (!keepdims) { + size_t numInputs = GetSrcNodeInputSize(); grad = IA("Unsqueezed_Grad"); - result.push_back(NodeDef("Unsqueeze", {GO(0)}, {grad}, {MakeAttribute("axes", axes_values)})); + if (attributes.find("axes") != attributes.end()) { + std::vector axes_values = RetrieveValues(attributes.at("axes")); + + result.push_back(NodeDef("Unsqueeze", {GO(0)}, {grad}, {MakeAttribute("axes", axes_values)})); - result.push_back(NodeDef("Unsqueeze", {O(0)}, {IA("Unsqueezed_Output")}, {MakeAttribute("axes", axes_values)})); + result.push_back(NodeDef("Unsqueeze", {O(0)}, {IA("Unsqueezed_Output")}, {MakeAttribute("axes", axes_values)})); + } else if (numInputs == 2) { // optional input 'axes' is available as input I(1) + result.push_back(NodeDef("Unsqueeze", {GO(0), I(1)}, {grad})); + + result.push_back(NodeDef("Unsqueeze", {O(0), I(1)}, {IA("Unsqueezed_Output")})); + } result.push_back(NodeDef("Sub", {I(0), IA("Unsqueezed_Output")}, {IA("Self_Sub_Result")})); } else { result.push_back(NodeDef("Sub", {I(0), O(0)}, {IA("Self_Sub_Result")})); diff --git a/orttraining/orttraining/test/gradient/gradient_ops_test.cc b/orttraining/orttraining/test/gradient/gradient_ops_test.cc index 0b19609378a35..b2cde0590a945 100644 --- a/orttraining/orttraining/test/gradient/gradient_ops_test.cc +++ b/orttraining/orttraining/test/gradient/gradient_ops_test.cc @@ -607,6 +607,10 @@ TEST(GradientCheckerTest, ReduceMeanGrad) { OpDef op_def_opset13{"ReduceMean", kOnnxDomain, 13}; RunReductionTests(op_def_opset13); + + // axes is input from opset 18. + OpDef op_def_opset18{"ReduceMean", kOnnxDomain, 18}; + RunReductionTests(op_def_opset18, true, true); } TEST(GradientCheckerTest, ReduceSumGrad) { @@ -657,6 +661,10 @@ TEST(GradientCheckerTest, ReduceLogSumExpGrad) { OpDef op_def{"ReduceLogSumExp", kOnnxDomain, 11}; RunReductionTests(op_def); + + OpDef op_def_opset18{"ReduceLogSumExp", kOnnxDomain, 18}; + + RunReductionTests(op_def_opset18, true, true); } TEST(GradientCheckerTest, ReluGrad) { From a3c8bb265ab575c117314461be33f8b5663aae64 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 5 Mar 2024 01:25:12 +0000 Subject: [PATCH 05/10] lint --- orttraining/orttraining/core/graph/gradient_builder.cc | 2 +- orttraining/orttraining/test/gradient/gradient_ops_test.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/orttraining/orttraining/core/graph/gradient_builder.cc b/orttraining/orttraining/core/graph/gradient_builder.cc index 34e75e04e4d9f..7e8490dfe41af 100755 --- a/orttraining/orttraining/core/graph/gradient_builder.cc +++ b/orttraining/orttraining/core/graph/gradient_builder.cc @@ -1160,7 +1160,7 @@ IMPLEMENT_GRADIENT_BUILDER(GetReduceLogSumExpGradient) { grad = IA("Unsqueezed_Grad"); if (attributes.find("axes") != attributes.end()) { std::vector axes_values = RetrieveValues(attributes.at("axes")); - + result.push_back(NodeDef("Unsqueeze", {GO(0)}, {grad}, {MakeAttribute("axes", axes_values)})); result.push_back(NodeDef("Unsqueeze", {O(0)}, {IA("Unsqueezed_Output")}, {MakeAttribute("axes", axes_values)})); diff --git a/orttraining/orttraining/test/gradient/gradient_ops_test.cc b/orttraining/orttraining/test/gradient/gradient_ops_test.cc index b2cde0590a945..94ca96c68f2ce 100644 --- a/orttraining/orttraining/test/gradient/gradient_ops_test.cc +++ b/orttraining/orttraining/test/gradient/gradient_ops_test.cc @@ -720,7 +720,7 @@ TEST(GradientCheckerTest, SplitGrad) { OpDef op_def_18{"Split", kOnnxDomain, 18}; ASSERT_STATUS_OK(gradient_checker.ComputeGradientError(op_def_18, {shape}, {{3, 5}, {3, 5}, {3, 5}}, &max_error, {MakeAttribute("axis", int64_t(0)), - MakeAttribute("num_outputs", int64_t(3))})); + MakeAttribute("num_outputs", int64_t(3))})); EXPECT_IS_TINY(max_error); } From 7840801631cd01563c8bb8137375bf3206b75b33 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 13 Mar 2024 22:49:24 +0000 Subject: [PATCH 06/10] move IA declaration inside NodeDef --- orttraining/orttraining/core/graph/gradient_builder.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/orttraining/orttraining/core/graph/gradient_builder.cc b/orttraining/orttraining/core/graph/gradient_builder.cc index 7e8490dfe41af..e610bb46a67bc 100755 --- a/orttraining/orttraining/core/graph/gradient_builder.cc +++ b/orttraining/orttraining/core/graph/gradient_builder.cc @@ -1113,18 +1113,17 @@ IMPLEMENT_GRADIENT_BUILDER(GetReduceMeanGradient) { ArgDef grad = GO(0); if (!keepdims) { size_t numInputs = GetSrcNodeInputSize(); - grad = IA("Unqueezed_Grad"); if (attributes.find("axes") != attributes.end()) { std::vector axes_values = RetrieveValues(attributes.at("axes")); if (SrcNodeOpsetVersion() < 13) { // axes is attribute for unsqueeze - result.push_back(NodeDef("Unsqueeze", {GO(0)}, {grad}, {MakeAttribute("axes", axes_values)})); + result.push_back(NodeDef("Unsqueeze", {GO(0)}, {IA("Unqueezed_Grad")}, {MakeAttribute("axes", axes_values)})); } else { NodeDef axes_values_node = ConstantVectorNode(axes_values, Name("axes_values")); result.push_back(axes_values_node); - result.push_back(NodeDef(OpDef{"Unsqueeze", kOnnxDomain, 13}, {GO(0), axes_values_node.output_args[0]}, {grad})); + result.push_back(NodeDef(OpDef{"Unsqueeze", kOnnxDomain, 13}, {GO(0), axes_values_node.output_args[0]}, {IA("Unqueezed_Grad")})); } } else if (numInputs == 2) { // optional input 'axes' is available as input I(1) - result.push_back(NodeDef("Unsqueeze", {GO(0), I(1)}, {grad})); + result.push_back(NodeDef("Unsqueeze", {GO(0), I(1)}, {IA("Unqueezed_Grad")})); } } From 771cb5b5e53989da99eff3d5f5cc8b5398d5b9dd Mon Sep 17 00:00:00 2001 From: Prathik Date: Mon, 18 Mar 2024 22:19:40 +0000 Subject: [PATCH 07/10] undo IA changes --- orttraining/orttraining/core/graph/gradient_builder.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/orttraining/orttraining/core/graph/gradient_builder.cc b/orttraining/orttraining/core/graph/gradient_builder.cc index e610bb46a67bc..7576e8814b826 100755 --- a/orttraining/orttraining/core/graph/gradient_builder.cc +++ b/orttraining/orttraining/core/graph/gradient_builder.cc @@ -1113,17 +1113,18 @@ IMPLEMENT_GRADIENT_BUILDER(GetReduceMeanGradient) { ArgDef grad = GO(0); if (!keepdims) { size_t numInputs = GetSrcNodeInputSize(); + grad = IA("Unqueezed_Grad") if (attributes.find("axes") != attributes.end()) { std::vector axes_values = RetrieveValues(attributes.at("axes")); if (SrcNodeOpsetVersion() < 13) { // axes is attribute for unsqueeze - result.push_back(NodeDef("Unsqueeze", {GO(0)}, {IA("Unqueezed_Grad")}, {MakeAttribute("axes", axes_values)})); + result.push_back(NodeDef("Unsqueeze", {GO(0)}, {grad}, {MakeAttribute("axes", axes_values)})); } else { NodeDef axes_values_node = ConstantVectorNode(axes_values, Name("axes_values")); result.push_back(axes_values_node); - result.push_back(NodeDef(OpDef{"Unsqueeze", kOnnxDomain, 13}, {GO(0), axes_values_node.output_args[0]}, {IA("Unqueezed_Grad")})); + result.push_back(NodeDef(OpDef{"Unsqueeze", kOnnxDomain, 13}, {GO(0), axes_values_node.output_args[0]}, {grad})); } } else if (numInputs == 2) { // optional input 'axes' is available as input I(1) - result.push_back(NodeDef("Unsqueeze", {GO(0), I(1)}, {IA("Unqueezed_Grad")})); + result.push_back(NodeDef("Unsqueeze", {GO(0), I(1)}, {grad})); } } From 58912eb12479a40cf4a77168a4cdb445d935d8b2 Mon Sep 17 00:00:00 2001 From: Prathik Date: Mon, 18 Mar 2024 22:23:11 +0000 Subject: [PATCH 08/10] missing ; --- orttraining/orttraining/core/graph/gradient_builder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/orttraining/orttraining/core/graph/gradient_builder.cc b/orttraining/orttraining/core/graph/gradient_builder.cc index 7576e8814b826..7e8490dfe41af 100755 --- a/orttraining/orttraining/core/graph/gradient_builder.cc +++ b/orttraining/orttraining/core/graph/gradient_builder.cc @@ -1113,7 +1113,7 @@ IMPLEMENT_GRADIENT_BUILDER(GetReduceMeanGradient) { ArgDef grad = GO(0); if (!keepdims) { size_t numInputs = GetSrcNodeInputSize(); - grad = IA("Unqueezed_Grad") + grad = IA("Unqueezed_Grad"); if (attributes.find("axes") != attributes.end()) { std::vector axes_values = RetrieveValues(attributes.at("axes")); if (SrcNodeOpsetVersion() < 13) { // axes is attribute for unsqueeze From 15949a7a82b144771c84977fc9f42e3da757469e Mon Sep 17 00:00:00 2001 From: root Date: Mon, 18 Mar 2024 23:41:39 +0000 Subject: [PATCH 09/10] dont overwrite grad i default mode --- orttraining/orttraining/core/graph/gradient_builder.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/orttraining/orttraining/core/graph/gradient_builder.cc b/orttraining/orttraining/core/graph/gradient_builder.cc index 7e8490dfe41af..8bbbac62b02a2 100755 --- a/orttraining/orttraining/core/graph/gradient_builder.cc +++ b/orttraining/orttraining/core/graph/gradient_builder.cc @@ -1113,9 +1113,9 @@ IMPLEMENT_GRADIENT_BUILDER(GetReduceMeanGradient) { ArgDef grad = GO(0); if (!keepdims) { size_t numInputs = GetSrcNodeInputSize(); - grad = IA("Unqueezed_Grad"); if (attributes.find("axes") != attributes.end()) { std::vector axes_values = RetrieveValues(attributes.at("axes")); + grad = IA("Unqueezed_Grad"); if (SrcNodeOpsetVersion() < 13) { // axes is attribute for unsqueeze result.push_back(NodeDef("Unsqueeze", {GO(0)}, {grad}, {MakeAttribute("axes", axes_values)})); } else { @@ -1124,6 +1124,7 @@ IMPLEMENT_GRADIENT_BUILDER(GetReduceMeanGradient) { result.push_back(NodeDef(OpDef{"Unsqueeze", kOnnxDomain, 13}, {GO(0), axes_values_node.output_args[0]}, {grad})); } } else if (numInputs == 2) { // optional input 'axes' is available as input I(1) + grad = IA("Unqueezed_Grad"); result.push_back(NodeDef("Unsqueeze", {GO(0), I(1)}, {grad})); } } From 1f88d0070e5de5df952c115ce9174029ba182a81 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 18 Mar 2024 23:53:14 +0000 Subject: [PATCH 10/10] reflect same changes in GetReduceLogSumExpGradient --- orttraining/orttraining/core/graph/gradient_builder.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/orttraining/orttraining/core/graph/gradient_builder.cc b/orttraining/orttraining/core/graph/gradient_builder.cc index 8bbbac62b02a2..22dcf4eb92411 100755 --- a/orttraining/orttraining/core/graph/gradient_builder.cc +++ b/orttraining/orttraining/core/graph/gradient_builder.cc @@ -1158,14 +1158,15 @@ IMPLEMENT_GRADIENT_BUILDER(GetReduceLogSumExpGradient) { ArgDef grad = GO(0); if (!keepdims) { size_t numInputs = GetSrcNodeInputSize(); - grad = IA("Unsqueezed_Grad"); if (attributes.find("axes") != attributes.end()) { std::vector axes_values = RetrieveValues(attributes.at("axes")); + grad = IA("Unsqueezed_Grad"); result.push_back(NodeDef("Unsqueeze", {GO(0)}, {grad}, {MakeAttribute("axes", axes_values)})); result.push_back(NodeDef("Unsqueeze", {O(0)}, {IA("Unsqueezed_Output")}, {MakeAttribute("axes", axes_values)})); } else if (numInputs == 2) { // optional input 'axes' is available as input I(1) + grad = IA("Unsqueezed_Grad"); result.push_back(NodeDef("Unsqueeze", {GO(0), I(1)}, {grad})); result.push_back(NodeDef("Unsqueeze", {O(0), I(1)}, {IA("Unsqueezed_Output")}));