From f74221acc3078c7b3fcbf5566f822ce553f43248 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 30 Jan 2024 00:12:50 +0000 Subject: [PATCH 1/4] upgrade to opset 17 --- .../python/training/ortmodule/__init__.py | 2 +- .../test/optimizer/compute_optimizer_test.cc | 8 ++++---- .../test/optimizer/graph_transform_test.cc | 6 +++--- .../test/optimizer/shape_optimizer_test.cc | 20 +++++++++---------- .../python/orttraining_test_ortmodule_api.py | 4 ++-- .../orttraining_test_ortmodule_onnx_ops.py | 2 +- ...orttraining-py-packaging-pipeline-cuda.yml | 2 +- ...ttraining-py-packaging-pipeline-cuda12.yml | 2 +- .../docker/Dockerfile.manylinux2_28_rocm | 2 +- ...Dockerfile.manylinux2_28_training_cuda11_8 | 2 +- ...Dockerfile.manylinux2_28_training_cuda12_2 | 2 +- .../pai/rocm-ci-pipeline-env.Dockerfile | 2 +- 12 files changed, 27 insertions(+), 27 deletions(-) diff --git a/orttraining/orttraining/python/training/ortmodule/__init__.py b/orttraining/orttraining/python/training/ortmodule/__init__.py index fbf1b7c2bac42..4a03465cf2ead 100644 --- a/orttraining/orttraining/python/training/ortmodule/__init__.py +++ b/orttraining/orttraining/python/training/ortmodule/__init__.py @@ -39,7 +39,7 @@ def _defined_from_envvar(name, default_value, warn=True): # NOTE: To *change* values in runtime, import onnxruntime.training.ortmodule and # assign them new values. Importing them directly do not propagate changes. ################################################################################ -ONNX_OPSET_VERSION = 15 +ONNX_OPSET_VERSION = 17 MINIMUM_RUNTIME_PYTORCH_VERSION_STR = "1.8.1" ORTMODULE_TORCH_CPP_DIR = os.path.join(os.path.dirname(__file__), "torch_cpp_extensions") _FALLBACK_INIT_EXCEPTION = None diff --git a/orttraining/orttraining/test/optimizer/compute_optimizer_test.cc b/orttraining/orttraining/test/optimizer/compute_optimizer_test.cc index cf510ea43c89f..509937bdd0c3a 100644 --- a/orttraining/orttraining/test/optimizer/compute_optimizer_test.cc +++ b/orttraining/orttraining/test/optimizer/compute_optimizer_test.cc @@ -135,7 +135,7 @@ TEST(ComputeOptimizerTests, InsertGatherBeforeSceLoss_Allowed) { } }; - std::vector opsets{12, 13, 14, 15}; + std::vector opsets{12, 13, 14, 15, 17}; for (auto opset : opsets) { std::unique_ptr transformer = std::make_unique(compatible_eps, std::vector{"label"}); @@ -206,7 +206,7 @@ TEST(ComputeOptimizerTests, InsertGatherBeforeSceLoss_NotAllowed_LabelNameNotMat } }; - std::vector opsets{12, 13, 14, 15}; + std::vector opsets{12, 13, 14, 15, 17}; for (auto opset : opsets) { std::unique_ptr transformer = std::make_unique(compatible_eps, std::vector{"label"}); @@ -277,7 +277,7 @@ TEST(ComputeOptimizerTests, InsertGatherBeforeSceLoss_NotAllowed_ReduceNone) { } }; - std::vector opsets{12, 13, 14, 15}; + std::vector opsets{12, 13, 14, 15, 17}; for (auto opset : opsets) { std::unique_ptr transformer = std::make_unique(compatible_eps, std::vector{"label"}); @@ -344,7 +344,7 @@ TEST(ComputeOptimizerTests, InsertGatherBeforeSceLoss_NotAllowed_NoIgnoreIndex) } }; - std::vector opsets{12, 13, 14, 15}; + std::vector opsets{12, 13, 14, 15, 17}; for (auto opset : opsets) { std::unique_ptr transformer = std::make_unique(compatible_eps, std::vector{"label"}); diff --git a/orttraining/orttraining/test/optimizer/graph_transform_test.cc b/orttraining/orttraining/test/optimizer/graph_transform_test.cc index b774fec11cc8d..bab7c09839273 100644 --- a/orttraining/orttraining/test/optimizer/graph_transform_test.cc +++ b/orttraining/orttraining/test/optimizer/graph_transform_test.cc @@ -1523,7 +1523,7 @@ TEST_F(GraphTransformationTests, ScaledSumFusionThreeInputs) { builder.AddNode("Identity", {add2_out}, {graph_out}); }; - const std::vector opsets{12, 13, 14, 15}; + const std::vector opsets{12, 13, 14, 15, 17}; for (auto& opset_version : opsets) { std::unique_ptr transformer = std::make_unique(); ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, opset_version, *logger_, std::move(transformer), @@ -1616,7 +1616,7 @@ TEST_F(GraphTransformationTests, ScaledSumFusionThreeInputs_LastAddNotHaveScaleI builder.AddNode("Identity", {add2_out}, {graph_out}); }; - const std::vector opsets{12, 13, 14, 15}; + const std::vector opsets{12, 13, 14, 15, 17}; for (auto& opset_version : opsets) { std::unique_ptr transformer = std::make_unique(); ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, opset_version, *logger_, std::move(transformer), @@ -1710,7 +1710,7 @@ TEST_F(GraphTransformationTests, ScaledSumFusionTwoInputs) { builder.AddNode("Identity", {add1_out}, {graph_output2}); }; - const std::vector opsets{12, 13, 14, 15}; + const std::vector opsets{12, 13, 14, 15, 17}; for (auto& opset_version : opsets) { std::unique_ptr transformer = std::make_unique(); ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, opset_version, *logger_, std::move(transformer), diff --git a/orttraining/orttraining/test/optimizer/shape_optimizer_test.cc b/orttraining/orttraining/test/optimizer/shape_optimizer_test.cc index ea05b29c8668b..a1629eb73eeb6 100644 --- a/orttraining/orttraining/test/optimizer/shape_optimizer_test.cc +++ b/orttraining/orttraining/test/optimizer/shape_optimizer_test.cc @@ -67,7 +67,7 @@ TEST(ShapeOptimizerTests, Shape15CannotFold) { return Status::OK(); }; - std::vector opset_candidates{15}; + std::vector opset_candidates{15, 17}; for (auto opset : opset_candidates) { auto build_test_case = [&](ModelTestBuilder& builder) { std::vector> identity_input_shape; @@ -145,7 +145,7 @@ TEST(ShapeOptimizerTests, Shape15) { return Status::OK(); }; - std::vector opset_candidates{15}; + std::vector opset_candidates{15, 17}; for (auto opset : opset_candidates) { auto build_test_case = [&](ModelTestBuilder& builder) { std::vector> identity_input_shape; @@ -218,7 +218,7 @@ TEST(ShapeOptimizerTests, Shape15TakesGraphInput) { return Status::OK(); }; - std::vector opset_candidates{15}; + std::vector opset_candidates{15, 17}; for (auto opset : opset_candidates) { auto build_test_case = [&](ModelTestBuilder& builder) { std::vector> shape_input_shape; @@ -289,7 +289,7 @@ TEST(ShapeOptimizerTests, Shape15GeneratesGraphOutput) { return Status::OK(); }; - std::vector opset_candidates{15}; + std::vector opset_candidates{15, 17}; for (auto opset : opset_candidates) { auto build_test_case = [&](ModelTestBuilder& builder) { std::vector> identity_input_shape; @@ -366,7 +366,7 @@ TEST(ShapeOptimizerTests, Slice) { return Status::OK(); }; - std::vector opset_candidates{10, 11, 12, 13, 14, 15}; + std::vector opset_candidates{10, 11, 12, 13, 14, 15, 17}; for (auto opset : opset_candidates) { auto build_test_case = [&](ModelTestBuilder& builder) { std::vector> shape_input_shape; @@ -446,7 +446,7 @@ TEST(ShapeOptimizerTests, SliceGeneratesGraphOutput) { return Status::OK(); }; - std::vector opset_candidates{10, 11, 12, 13, 14, 15}; + std::vector opset_candidates{10, 11, 12, 13, 14, 15, 17}; for (auto opset : opset_candidates) { auto build_test_case = [&](ModelTestBuilder& builder) { std::vector> shape_input_shape; @@ -530,7 +530,7 @@ TEST(ShapeOptimizerTests, Gather) { return Status::OK(); }; - std::vector opset_candidates{10, 11, 12, 13, 14, 15}; + std::vector opset_candidates{10, 11, 12, 13, 14, 15, 17}; for (auto opset : opset_candidates) { auto build_test_case = [&](ModelTestBuilder& builder) { std::vector> shape_input_shape; @@ -639,7 +639,7 @@ TEST(ShapeOptimizerTests, ConcreteDimUsedBySlice) { return Status::OK(); }; - std::vector opset_candidates{10, 11, 12, 13, 14, 15}; + std::vector opset_candidates{10, 11, 12, 13, 14, 15, 17}; for (auto opset : opset_candidates) { auto build_test_case = [&](ModelTestBuilder& builder) { std::vector> dropout_input_shape; @@ -810,7 +810,7 @@ TEST(ShapeOptimizerTests, ConcreteDimUsedByGatherSlice) { return Status::OK(); }; - std::vector opset_candidates{10, 11, 12, 13, 14, 15}; + std::vector opset_candidates{10, 11, 12, 13, 14, 15, 17}; for (auto opset : opset_candidates) { auto build_test_case = [&](ModelTestBuilder& builder) { std::vector> reshape_input_shape; @@ -976,7 +976,7 @@ TEST(ShapeOptimizerTests, SymbolicDimUsedByGather_ConcreteDimUsedByGather) { return Status::OK(); }; - std::vector opset_candidates{10, 11, 12, 13, 14, 15}; + std::vector opset_candidates{10, 11, 12, 13, 14, 15, 17}; for (auto opset : opset_candidates) { auto build_test_case = [&](ModelTestBuilder& builder) { std::vector> reshape_input_shape; diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py index 938d33cc9a714..257eedf34b9a3 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py @@ -34,7 +34,7 @@ from onnxruntime.training.ortmodule._custom_gradient_registry import register_gradient from onnxruntime.training.ortmodule.options import _SkipCheck -DEFAULT_OPSET = 15 +DEFAULT_OPSET = 17 # PyTorch model definitions for tests @@ -5252,7 +5252,7 @@ def run_step(model, x): assert ort_model._torch_module._execution_manager(True)._runtime_options.onnx_opset_version == 13 -@pytest.mark.parametrize("opset_version", [12, 13, 14, 15]) +@pytest.mark.parametrize("opset_version", [12, 13, 14, 15, 17]) def test_opset_version_change(opset_version): original_env = None if "ORTMODULE_ONNX_OPSET_VERSION" in os.environ: diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py index 4f0925c5c855b..2fcf8bf0f26c0 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py @@ -79,7 +79,7 @@ def run_step(model, x): for onnx_model in [onnx_graph_inf, onnx_graph_train]: for oimp in onnx_model.opset_import: if oimp.domain == "": - self.assertEqual(oimp.version, 15) + self.assertEqual(oimp.version, 17) # Needs to match latest default ORTModule opset if op_grad_type is not None: if isinstance(op_grad_type, tuple): text = str(onnx_graph_train) diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml index f244851f8cc37..6b99955a02177 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml @@ -13,7 +13,7 @@ stages: parameters: build_py_parameters: --enable_training --update --build torch_version: '2.0.0' - opset_version: '15' + opset_version: '17' cuda_version: '11.8' cmake_cuda_architectures: 60;61;70;75;80;86;90 docker_file: Dockerfile.manylinux2_28_training_cuda11_8 diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda12.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda12.yml index 422fb33eec5de..86dce7ae465fc 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda12.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda12.yml @@ -13,7 +13,7 @@ stages: parameters: build_py_parameters: --enable_training --update --build torch_version: '2.1.0' - opset_version: '15' + opset_version: '17' cuda_version: '12.2' cmake_cuda_architectures: 70;75;80;86;90 docker_file: Dockerfile.manylinux2_28_training_cuda12_2 diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm index dd7c669c37885..e1914d5fe2f06 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm @@ -178,7 +178,7 @@ CMD ["/bin/bash"] #Build manylinux2014 docker image end ARG PYTHON_VERSION=3.8 -ARG OPSET_VERSION=15 +ARG OPSET_VERSION=17 ARG INSTALL_DEPS_EXTRA_ARGS diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_training_cuda11_8 b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_training_cuda11_8 index a6a75afb0f4c3..fed29689fbe5e 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_training_cuda11_8 +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_training_cuda11_8 @@ -161,7 +161,7 @@ CMD ["/bin/bash"] #Build manylinux2014 docker image end ARG PYTHON_VERSION=3.9 ARG TORCH_VERSION=2.0.0 -ARG OPSET_VERSION=15 +ARG OPSET_VERSION=17 ARG INSTALL_DEPS_EXTRA_ARGS #Add our own dependencies diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_training_cuda12_2 b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_training_cuda12_2 index d29157daef611..e1caa141ef317 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_training_cuda12_2 +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_training_cuda12_2 @@ -161,7 +161,7 @@ CMD ["/bin/bash"] #Build manylinux2014 docker image end ARG PYTHON_VERSION=3.9 ARG TORCH_VERSION=2.1.0 -ARG OPSET_VERSION=15 +ARG OPSET_VERSION=17 ARG INSTALL_DEPS_EXTRA_ARGS #Add our own dependencies diff --git a/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile b/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile index 4db9df80ed187..ac2b80b321eaa 100644 --- a/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile +++ b/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile @@ -130,7 +130,7 @@ RUN pip install \ # Install migraphx RUN apt update && apt install -y migraphx -ENV ORTMODULE_ONNX_OPSET_VERSION=15 +ENV ORTMODULE_ONNX_OPSET_VERSION=17 ARG BUILD_UID=1001 ARG BUILD_USER=onnxruntimedev From 4419d11c85c1ff89c6ae179176d150cf8f5e86d6 Mon Sep 17 00:00:00 2001 From: Prathik Rao Date: Tue, 13 Feb 2024 00:17:48 +0000 Subject: [PATCH 2/4] override default layer_norm export num outputs --- .../ortmodule/_custom_op_symbolic_registry.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py b/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py index 99e8851b6a697..5cfcde00fdada 100644 --- a/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py +++ b/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py @@ -808,3 +808,37 @@ def upsample_nearest2d(g, input, output_size, scale_factors): @register_symbolic("upsample_nearest3d") def upsample_nearest3d(g, input, output_size, scale_factors): return _upsample_nearest(g, input, output_size, scale_factors, "upsample_nearest3d") + +@register_symbolic("layer_norm") +@parse_args("v", "is", "v", "v", "f", "none") +def layer_norm(g, input, normalized_shape, weight, bias, eps, cudnn_enable): + # normalized_shape: input shape from an expected input of size + # axis: The first normalization dimension. + # layer_norm normalizes on the last D dimensions, + # where D is the size of normalized_shape + axis = -len(normalized_shape) + scalar_type = _type_utils.JitScalarType.from_value( + input, _type_utils.JitScalarType.FLOAT + ) + dtype = scalar_type.dtype() + if symbolic_helper._is_none(weight): + weight_value = torch.ones(normalized_shape, dtype=dtype) + weight = g.op("Constant", value_t=weight_value) + if symbolic_helper._is_none(bias): + bias_value = torch.zeros(normalized_shape, dtype=dtype) + bias = g.op("Constant", value_t=bias_value) + + out = g.op( + "LayerNormalization", + input, + weight, + bias, + epsilon_f=eps, + axis_i=axis, + outputs=3, # force all 3 outputs to be exported in training mode + operator_s="layer_norm", + overload_name_s="vec", + ) + + res, new_running_mean, new_running_var = out + return res, new_running_mean, new_running_var From 21054b2f7ab6e4ed478490247ed7c589d27d4068 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 13 Feb 2024 03:57:17 +0000 Subject: [PATCH 3/4] layernorm fix --- .../ortmodule/_custom_op_symbolic_registry.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py b/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py index 5cfcde00fdada..559f4b0b9a31b 100644 --- a/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py +++ b/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py @@ -817,18 +817,8 @@ def layer_norm(g, input, normalized_shape, weight, bias, eps, cudnn_enable): # layer_norm normalizes on the last D dimensions, # where D is the size of normalized_shape axis = -len(normalized_shape) - scalar_type = _type_utils.JitScalarType.from_value( - input, _type_utils.JitScalarType.FLOAT - ) - dtype = scalar_type.dtype() - if symbolic_helper._is_none(weight): - weight_value = torch.ones(normalized_shape, dtype=dtype) - weight = g.op("Constant", value_t=weight_value) - if symbolic_helper._is_none(bias): - bias_value = torch.zeros(normalized_shape, dtype=dtype) - bias = g.op("Constant", value_t=bias_value) - - out = g.op( + + res, new_running_mean, new_running_var = g.op( "LayerNormalization", input, weight, @@ -840,5 +830,4 @@ def layer_norm(g, input, normalized_shape, weight, bias, eps, cudnn_enable): overload_name_s="vec", ) - res, new_running_mean, new_running_var = out - return res, new_running_mean, new_running_var + return res From cd33bf40c440f5ac5dcfd93e722a85e46b8e3d12 Mon Sep 17 00:00:00 2001 From: Prathik Rao Date: Tue, 13 Feb 2024 07:55:49 +0000 Subject: [PATCH 4/4] linting --- .../training/ortmodule/_custom_op_symbolic_registry.py | 5 ++--- .../test/python/orttraining_test_ortmodule_onnx_ops.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py b/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py index 5657a95d02a02..f81aef5f6b9c4 100644 --- a/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py +++ b/orttraining/orttraining/python/training/ortmodule/_custom_op_symbolic_registry.py @@ -822,7 +822,7 @@ def upsample_bicubic2d(g, input, output_size, align_corners, scale_factors): overload_name_s="vec", ) - + @register_symbolic("layer_norm") @parse_args("v", "is", "v", "v", "f", "none") def layer_norm(g, input, normalized_shape, weight, bias, eps, cudnn_enable): @@ -839,10 +839,9 @@ def layer_norm(g, input, normalized_shape, weight, bias, eps, cudnn_enable): bias, epsilon_f=eps, axis_i=axis, - outputs=3, # force all 3 outputs to be exported in training mode + outputs=3, # force all 3 outputs to be exported in training mode operator_s="layer_norm", overload_name_s="vec", ) return res - \ No newline at end of file diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py index 2fcf8bf0f26c0..2f240406b25b9 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py @@ -79,7 +79,7 @@ def run_step(model, x): for onnx_model in [onnx_graph_inf, onnx_graph_train]: for oimp in onnx_model.opset_import: if oimp.domain == "": - self.assertEqual(oimp.version, 17) # Needs to match latest default ORTModule opset + self.assertEqual(oimp.version, 17) # Needs to match latest default ORTModule opset if op_grad_type is not None: if isinstance(op_grad_type, tuple): text = str(onnx_graph_train)