diff --git a/onnxruntime/core/optimizer/constant_folding.cc b/onnxruntime/core/optimizer/constant_folding.cc index eb130785add1c..f46273f2680a9 100644 --- a/onnxruntime/core/optimizer/constant_folding.cc +++ b/onnxruntime/core/optimizer/constant_folding.cc @@ -185,19 +185,24 @@ Status ConstantFolding::ApplyImpl(Graph& graph, bool& modified, int graph_level, fetch_mlvalue_idxs.push_back(info.GetMLValueIndex(node_out->Name())); } - auto& ep_type = node->GetExecutionProviderType(); - const bool node_on_cpu_ep = ep_type == kCpuExecutionProvider; + const bool node_on_cpu_ep = node->GetExecutionProviderType() == kCpuExecutionProvider; + + std::unique_ptr kernel; - // override the EP assigned to the node so that it will use the CPU kernel for Compute. if (!node_on_cpu_ep) { + // We need to copy the string here instead of taking a reference to it since node->SetExecutionProviderType + // will change the value of the reference + auto ep_type = node->GetExecutionProviderType(); + + // override the EP assigned to the node so that it will use the CPU kernel for Compute. node->SetExecutionProviderType(kCpuExecutionProvider); - } - auto kernel = info.CreateKernel(node); + kernel = info.CreateKernel(node); - // undo the EP change to the value that was assigned at graph partitioning time - if (!node_on_cpu_ep) { + // undo the EP change to the value that was assigned at graph partitioning time node->SetExecutionProviderType(ep_type); + } else { + kernel = info.CreateKernel(node); } // We currently constant fold using the CPU EP only. diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc index 8e1511bcaafeb..553fcca92aa78 100755 --- a/onnxruntime/test/optimizer/graph_transform_test.cc +++ b/onnxruntime/test/optimizer/graph_transform_test.cc @@ -613,6 +613,36 @@ TEST_F(GraphTransformationTests, ConstantFoldingNodesOnDifferentEP) { } } +TEST_F(GraphTransformationTests, ConstantFoldingUnsupportedFloat16) { + constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "constant_float16_mul.onnx"; + std::shared_ptr model; + ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_)); + Graph& graph = model->MainGraph(); + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count["Mul"] == 1); + std::unique_ptr e = + std::make_unique(CPUExecutionProviderInfo()); + onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; + ASSERT_STATUS_OK(graph_transformation_mgr.Register( + std::make_unique(*e.get(), false /*skip_dequantize_linear*/), TransformerLevel::Level1)); + + // assign all nodes to CUDA. the constant folding should try folding the node on the CPU and fail, thus leaving the + // EP as CUDA and not constant folding the node. + for (auto& node : graph.Nodes()) { + node.SetExecutionProviderType(kCudaExecutionProvider); + } + + ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_)); + + op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count["Mul"] == 1); + + // all nodes should still be on CUDA + for (auto& node : graph.Nodes()) { + EXPECT_STREQ(node.GetExecutionProviderType().c_str(), kCudaExecutionProvider); + } +} + TEST_F(GraphTransformationTests, ConstantFoldingSubgraph) { TensorProto value_tensor; value_tensor.add_dims(1); diff --git a/onnxruntime/test/testdata/transform/constant_float16_mul.onnx b/onnxruntime/test/testdata/transform/constant_float16_mul.onnx new file mode 100644 index 0000000000000..baa682edb7f56 --- /dev/null +++ b/onnxruntime/test/testdata/transform/constant_float16_mul.onnx @@ -0,0 +1,17 @@ +onnxruntime_test:° +2c1c1_node"Constant* +value*  +*€xBc1v  +3c2c2_node"Constant* +value* +*€€Bc2v  + +c1 +c2 +mul_outputmul"Mul float16_mulb + +mul_output + + + +B \ No newline at end of file