diff --git a/onnxruntime/test/python/onnxruntime_test_python_trt_acc.py b/onnxruntime/test/python/onnxruntime_test_python_trt_acc.py index 9cf3a1523756e..9683317b37733 100644 --- a/onnxruntime/test/python/onnxruntime_test_python_trt_acc.py +++ b/onnxruntime/test/python/onnxruntime_test_python_trt_acc.py @@ -1,8 +1,8 @@ """ This test compares output of below huggingface models -- "microsoft/resnet-50" -- "microsoft/Phi-3.5-mini-instruct" -on Pytorch cpu vs [ORT CPU EP, ORT TensorRT EP] with different configuations (fp16, no ort graph optimization, 1 layer transformer vs full model) +- microsoft/resnet-18 and microsoft/resnet-50 +- microsoft/Phi-3.5-mini-instruct with 1 layer transformer vs full model +on Pytorch cpu vs [ORT CPU EP, ORT TensorRT EP] with different configuations [fp16, no ort graph optimization]). """ from transformers import AutoImageProcessor, ResNetForImageClassification from transformers import AutoModel, AutoTokenizer @@ -21,29 +21,21 @@ def run_model_in_pytorch(model, inputs): output = model(**inputs).logits return output -def run_model_in_ort(model_file, inputs, ep, disable_ort_graph_optimization=False): - if disable_ort_graph_optimization: +def run_model_in_ort(model_file, inputs, ep, use_graph_opt=True): + if use_graph_opt: + sess_opt = None + else: sess_opt = ort.SessionOptions() sess_opt.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL - else: - sess_opt = None session = ort.InferenceSession(model_file, providers=ep, sess_opt=sess_opt) - # model_inputs = session.get_inputs() - # input_data = np.array(input_tensor) - # outputs = session.run(None, {model_inputs[0].name: input_data}) outputs = session.run(None, inputs) output = np.array(outputs[0]) return output def get_model_and_inputs(model_name, use_minimal_model=True): - if model_name == "microsoft/resnet-50": + if model_name == "microsoft/resnet-50" or model_name == "microsoft/resnet-18": model = ResNetForImageClassification.from_pretrained(model_name) - # if use_minimal_model: - # model.config.num_channels = 1 - # model.config.embedding_size = 1 - # model.config.hidden_sizes = [1, 2] - # model.config.depths = [1, 2] input_tensor = torch.randn(1, 3, 224, 224) pytorch_inputs = {'pixel_values': input_tensor} # inputs key value need to match forward() @@ -56,8 +48,6 @@ def get_model_and_inputs(model_name, use_minimal_model=True): model.model.layers = model.model.layers[:1] # Update the configuration to reflect the reduced number of layers model.config.num_hidden_layers = 1 # default 32 - # input_tensor = torch.randint(0, model.config.vocab_size, (1, 30)) # Batch size 1, sequence length 30 - # inputs = {'input_ids': random_input_ids} else: print(f"Using full model for {model_name}") # model.model.layers = model.model.layers[:4] @@ -96,110 +86,39 @@ def fix_phi35_model(onnx_model_filename): # Iterate through nodes to find the node by name for node in graph.node: if node.name == "/model/layers.0/mlp/Slice_1": - # print(f"Found node: {node.name}") - # print(node) # Print the details of the node - # print(node.input) - node.input[1] = "/model/layers.0/mlp/Constant_6_output_0" # starts - node.input[2] = "/model/layers.0/mlp/Constant_7_output_0" # ends + node.input[1] = "/model/layers.0/mlp/Constant_6_output_0" # starts attribute + node.input[2] = "/model/layers.0/mlp/Constant_7_output_0" # ends attribute if node.name == "/model/layers.0/mlp/Slice": - # print(f"Found node: {node.name}") - # print(node) # Print the details of the node - # print(node.input) - node.input[2] = "/model/layers.0/mlp/Constant_6_output_0" # ends + node.input[2] = "/model/layers.0/mlp/Constant_6_output_0" # ends attribute if node.name == "/Slice": - # print(f"Found node: {node.name}") - # print(node) # Print the details of the node - # print(node.input) - node.input[1] = "/Constant41_output_0" - # return - # if node.name == "/model/layers.0/mlp/Mul_output_0": - # print(f"Found node: {node.name}") - # print(node) # Print the details of the node - # # return - # if node.name == "/model/layers.0/mlp/Constant_1_output_0": - # print(f"Found node: {node.name}") - # print(node) # Print the details of the node - # if node.name == "/model/layers.0/mlp/Mul_1": - # print(node) - # if node.name == "/model/layers.0/mlp/Constant_1": - # print(node) - - # for initializer in graph.initializer: - # print(f"Name: {initializer.name}") - # tensor_value = onnx.numpy_helper.to_array(initializer) - # print(f"Value: {tensor_value}") - # print(tensor_value) - # if initializer.name == "/model/layers.0/mlp/Mul_output_0": - # print(f"Tensor '{initializer.name}' found in initializers.") - # tensor_value = numpy_helper.to_array(initializer) - # print(f"Value: {tensor_value}") - # print(tensor_value) - # # return tensor_value - # if initializer.name == "/model/layers.0/mlp/Constant_1_output_0": - # print(f"Tensor '{initializer.name}' found in initializers.") - # tensor_value = numpy_helper.to_array(initializer) - # print(f"Value: {tensor_value}") - # print(node) - - # for node in graph.output: - # print(node) - # if node.name == "/model/layers.0/mlp/Mul_output_0": - # print(f"Tensor '{node.name}' found (op_type: {node.op_type}) .") - # print(node) - # # return node - # if node.name == "/model/layers.0/mlp/Constant_1_output_0": - # print(f"Tensor '{node.name}' found (op_type: {node.op_type}) .") - # print(node) - - # for node in graph.node: - # if node.op_type == "Constant": - # print(node) + node.input[1] = "/Constant41_output_0" # ends attribute - # print(f"Node '{node_name}' not found in the model.") - # data = np.array([8192], dtype=np.int64) - # # raw_bytes = data.tobytes() - # # # raw_bytes = struct('