diff --git a/onnxruntime/contrib_ops/cuda/bert/relative_attn_bias.cc b/onnxruntime/contrib_ops/cuda/bert/relative_attn_bias.cc index 0284e6f2aaf42..92ba808dd85c2 100644 --- a/onnxruntime/contrib_ops/cuda/bert/relative_attn_bias.cc +++ b/onnxruntime/contrib_ops/cuda/bert/relative_attn_bias.cc @@ -66,7 +66,7 @@ Status RelPosAttnBias::ComputeInternal(OpKernelContext* context) const { const int64_t key_len = *key_length->Data(); if (query_len != key_len) { - ORT_THROW("Relatvie position bias currently only support query length equal to key length in Self Attention."); + ORT_THROW("Relative position bias currently only support query length equal to key length in Self Attention."); } Tensor* output = context->Output(0, {1, num_heads, query_len, key_len}); diff --git a/onnxruntime/python/tools/transformers/__init__.py b/onnxruntime/python/tools/transformers/__init__.py index edfb82b253b81..90c03fa9b9a24 100644 --- a/onnxruntime/python/tools/transformers/__init__.py +++ b/onnxruntime/python/tools/transformers/__init__.py @@ -2,15 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- - import os import sys -sys.path.append(os.path.join(os.path.dirname(__file__), "models", "gpt2")) - -import convert_to_onnx # noqa: E402, F401 - -# added for backward compatible -import gpt2_helper # noqa: E402, F401 - -sys.path.append(os.path.join(os.path.dirname(__file__), "models", "t5")) +sys.path.append(os.path.dirname(__file__)) diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index bd9a649ae74fd..97330295e17ed 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -45,12 +45,9 @@ import os import timeit from datetime import datetime -from enum import Enum # noqa: F401 import numpy -import onnx # noqa: F401 import psutil -from benchmark_helper import allocateOutputBuffers # noqa: F401 from benchmark_helper import ( ConfigModifier, OptimizerInfo, @@ -65,6 +62,7 @@ setup_logger, ) from fusion_options import FusionOptions +from huggingface_models import MODEL_CLASSES, MODELS from onnx_exporter import ( create_onnxruntime_input, export_onnx_model_from_pt, @@ -76,8 +74,6 @@ logger = logging.getLogger("") -from huggingface_models import MODEL_CLASSES, MODELS # noqa: E402 - cpu_count = psutil.cpu_count(logical=False) # Set OMP environment variable before importing onnxruntime or torch. @@ -85,7 +81,7 @@ os.environ["OMP_NUM_THREADS"] = str(cpu_count) import torch # noqa: E402 -from transformers import AutoConfig, AutoModel, AutoTokenizer, GPT2Model, LxmertConfig # noqa: E402, F401 +from transformers import AutoConfig, AutoTokenizer, LxmertConfig # noqa: E402 def run_onnxruntime( diff --git a/onnxruntime/python/tools/transformers/bert_perf_test.py b/onnxruntime/python/tools/transformers/bert_perf_test.py index 984814f2f5cbb..ddf152ba13964 100644 --- a/onnxruntime/python/tools/transformers/bert_perf_test.py +++ b/onnxruntime/python/tools/transformers/bert_perf_test.py @@ -3,11 +3,11 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -# This tool measures the inference performance of onnxruntime or onnxruntime-gpu python package on Bert model. - -# The input model shall have exactly three inputs. The model is either fully optimized (with EmbedLayerNormalization node), -# or with reasonable input names (one input name has 'mask' substring, another has 'token' or 'segment' substring). -# See get_bert_inputs function in bert_test_data.py for more information. +# This tool measures the inference performance of onnxruntime on BERT-like model with inputs like input_ids, +# token_type_ids (optional), and attention_mask (optional). +# +# If the model does not have exactly three inputs like above, you might need specify names of inputs with +# --input_ids_name, --segment_ids_name and --input_mask_name # Example command to run test on batch_size 1 and 2 for a model on GPU: # python bert_perf_test.py --model bert.onnx --batch_size 1 2 --sequence_length 128 --use_gpu --samples 1000 --test_times 1 @@ -270,7 +270,7 @@ def run_one_test(model_setting, test_setting, perf_results, all_inputs, intra_op results, latency_list = onnxruntime_inference(session, all_inputs, output_names) all_latency_list.extend(latency_list) - # latency in miliseconds + # latency in milliseconds latency_ms = np.array(all_latency_list) * 1000 average_latency = statistics.mean(latency_ms) diff --git a/onnxruntime/python/tools/transformers/bert_test_data.py b/onnxruntime/python/tools/transformers/bert_test_data.py index 6a7139d0e8085..84ecae1907cd3 100644 --- a/onnxruntime/python/tools/transformers/bert_test_data.py +++ b/onnxruntime/python/tools/transformers/bert_test_data.py @@ -584,7 +584,12 @@ def create_and_save_test_data( import onnxruntime - session = onnxruntime.InferenceSession(model) + providers = ( + ["CUDAExecutionProvider", "CPUExecutionProvider"] + if "CUDAExecutionProvider" in onnxruntime.get_available_providers() + else ["CPUExecutionProvider"] + ) + session = onnxruntime.InferenceSession(model, providers=providers) output_names = [output.name for output in session.get_outputs()] for i, inputs in enumerate(all_inputs): @@ -629,6 +634,7 @@ def main(): args.only_input_tensors, args.average_sequence_length, args.random_sequence_length, + args.mask_type, ) print("Test data is saved to directory:", output_dir) diff --git a/onnxruntime/python/tools/transformers/convert_generation.py b/onnxruntime/python/tools/transformers/convert_generation.py index 90ae35a9311e5..73561d312e4d4 100644 --- a/onnxruntime/python/tools/transformers/convert_generation.py +++ b/onnxruntime/python/tools/transformers/convert_generation.py @@ -45,7 +45,6 @@ import logging import math import os -import sys import time from enum import Enum from pathlib import Path @@ -54,9 +53,14 @@ import numpy as np import onnx import torch -from benchmark_helper import Precision +from benchmark_helper import Precision, setup_logger from fusion_utils import NumpyHelper +from models.gpt2.convert_to_onnx import main as convert_gpt2_to_onnx +from models.gpt2.gpt2_helper import PRETRAINED_GPT2_MODELS +from models.t5.convert_to_onnx import export_onnx_models as export_t5_onnx_models +from models.t5.t5_helper import PRETRAINED_MT5_MODELS, PRETRAINED_T5_MODELS from onnx import GraphProto, ModelProto, TensorProto +from onnx_model import OnnxModel from transformers import ( GPT2Config, GPT2LMHeadModel, @@ -70,16 +74,6 @@ from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions, get_available_providers -sys.path.append(os.path.join(os.path.dirname(__file__), "models", "gpt2")) -from gpt2_helper import PRETRAINED_GPT2_MODELS # noqa: E402 -from models.gpt2.convert_to_onnx import main as convert_gpt2_to_onnx # noqa: E402 - -sys.path.append(os.path.join(os.path.dirname(__file__), "models", "t5")) -from benchmark_helper import setup_logger # noqa: E402 -from models.t5.convert_to_onnx import export_onnx_models as export_t5_onnx_models # noqa: E402 -from models.t5.t5_helper import PRETRAINED_MT5_MODELS, PRETRAINED_T5_MODELS # noqa: E402 -from onnx_model import OnnxModel # noqa: E402 - logger = logging.getLogger("") diff --git a/onnxruntime/python/tools/transformers/fusion_gelu_approximation.py b/onnxruntime/python/tools/transformers/fusion_gelu_approximation.py index 085723ce75c61..5337a10824442 100644 --- a/onnxruntime/python/tools/transformers/fusion_gelu_approximation.py +++ b/onnxruntime/python/tools/transformers/fusion_gelu_approximation.py @@ -3,8 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -from logging import getLogger # noqa: F401 - from fusion_base import Fusion from onnx import helper from onnx_model import OnnxModel diff --git a/onnxruntime/python/tools/transformers/fusion_gpt_attention.py b/onnxruntime/python/tools/transformers/fusion_gpt_attention.py index 308112a18c3d2..7b9e758178e2d 100644 --- a/onnxruntime/python/tools/transformers/fusion_gpt_attention.py +++ b/onnxruntime/python/tools/transformers/fusion_gpt_attention.py @@ -7,7 +7,7 @@ import numpy as np from fusion_base import Fusion from fusion_utils import FusionUtils -from onnx import TensorProto, helper, numpy_helper # noqa: F401 +from onnx import helper from onnx_model import OnnxModel logger = getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/fusion_gpt_attention_megatron.py b/onnxruntime/python/tools/transformers/fusion_gpt_attention_megatron.py index 4d5aac78efb76..052dd243fd788 100644 --- a/onnxruntime/python/tools/transformers/fusion_gpt_attention_megatron.py +++ b/onnxruntime/python/tools/transformers/fusion_gpt_attention_megatron.py @@ -5,10 +5,8 @@ from logging import getLogger import numpy as np -from fusion_base import Fusion # noqa: F401 from fusion_gpt_attention import FusionGptAttentionPastBase -from fusion_utils import FusionUtils # noqa: F401 -from onnx import TensorProto, helper, numpy_helper # noqa: F401 +from onnx import helper from onnx_model import OnnxModel logger = getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/fusion_gpt_attention_no_past.py b/onnxruntime/python/tools/transformers/fusion_gpt_attention_no_past.py index 3a83de5f40bba..83fa51dcfafa6 100644 --- a/onnxruntime/python/tools/transformers/fusion_gpt_attention_no_past.py +++ b/onnxruntime/python/tools/transformers/fusion_gpt_attention_no_past.py @@ -4,10 +4,8 @@ # -------------------------------------------------------------------------- from logging import getLogger -import numpy as np # noqa: F401 from fusion_base import Fusion -from fusion_utils import FusionUtils # noqa: F401 -from onnx import TensorProto, helper, numpy_helper # noqa: F401 +from onnx import helper from onnx_model import OnnxModel logger = getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/machine_info.py b/onnxruntime/python/tools/transformers/machine_info.py index 6d10b855d7e7d..288e36facb708 100644 --- a/onnxruntime/python/tools/transformers/machine_info.py +++ b/onnxruntime/python/tools/transformers/machine_info.py @@ -9,9 +9,8 @@ import json import logging import platform -import sys # noqa: F401 from os import environ -from typing import Dict, List, Tuple, Union # noqa: F401 +from typing import Dict, List import cpuinfo import psutil diff --git a/onnxruntime/python/tools/transformers/models/bart/__init__.py b/onnxruntime/python/tools/transformers/models/bart/__init__.py new file mode 100644 index 0000000000000..815be385d7dd4 --- /dev/null +++ b/onnxruntime/python/tools/transformers/models/bart/__init__.py @@ -0,0 +1,12 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import os.path +import sys + +sys.path.append(os.path.dirname(__file__)) + +transformers_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", "..")) +if transformers_dir not in sys.path: + sys.path.append(transformers_dir) diff --git a/onnxruntime/python/tools/transformers/models/bert/__init__.py b/onnxruntime/python/tools/transformers/models/bert/__init__.py index cc667396a2622..815be385d7dd4 100644 --- a/onnxruntime/python/tools/transformers/models/bert/__init__.py +++ b/onnxruntime/python/tools/transformers/models/bert/__init__.py @@ -2,3 +2,11 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- +import os.path +import sys + +sys.path.append(os.path.dirname(__file__)) + +transformers_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", "..")) +if transformers_dir not in sys.path: + sys.path.append(transformers_dir) diff --git a/onnxruntime/python/tools/transformers/models/gpt2/__init__.py b/onnxruntime/python/tools/transformers/models/gpt2/__init__.py index cc667396a2622..815be385d7dd4 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/__init__.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/__init__.py @@ -2,3 +2,11 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- +import os.path +import sys + +sys.path.append(os.path.dirname(__file__)) + +transformers_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", "..")) +if transformers_dir not in sys.path: + sys.path.append(transformers_dir) diff --git a/onnxruntime/python/tools/transformers/models/gpt2/benchmark_gpt2.py b/onnxruntime/python/tools/transformers/models/gpt2/benchmark_gpt2.py index e8553e2cae0f7..e48f0adc832c5 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/benchmark_gpt2.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/benchmark_gpt2.py @@ -10,25 +10,22 @@ import csv import logging import os -import sys from datetime import datetime import psutil import torch -from gpt2_helper import DEFAULT_TOLERANCE, MODEL_CLASSES, PRETRAINED_GPT2_MODELS, Gpt2Helper -from packaging import version -from transformers import AutoConfig - -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) - -from benchmark_helper import ( # noqa: E402 +from benchmark_helper import ( Precision, create_onnxruntime_session, get_ort_environment_variables, prepare_environment, setup_logger, ) -from quantize_helper import QuantizeHelper # noqa: E402 +from gpt2_helper import DEFAULT_TOLERANCE, MODEL_CLASSES, PRETRAINED_GPT2_MODELS, Gpt2Helper +from packaging import version +from quantize_helper import QuantizeHelper +from transformers import AutoConfig +from transformers import __version__ as transformers_version logger = logging.getLogger("") @@ -169,8 +166,6 @@ def parse_arguments(argv=None): def main(args): - from transformers import __version__ as transformers_version - if version.parse(transformers_version) < version.parse( "3.1.0" ): # past_key_values name does not exist in 3.0.2 or older diff --git a/onnxruntime/python/tools/transformers/models/gpt2/convert_to_onnx.py b/onnxruntime/python/tools/transformers/models/gpt2/convert_to_onnx.py index 2042a58153e25..27e3899c11b7a 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/convert_to_onnx.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/convert_to_onnx.py @@ -15,29 +15,31 @@ """ import argparse +import csv import json import logging import os +import shutil import sys from pathlib import Path import numpy import torch -from gpt2_helper import DEFAULT_TOLERANCE, MODEL_CLASSES, PRETRAINED_GPT2_MODELS, Gpt2Helper -from gpt2_tester import Gpt2Tester -from packaging import version -from transformers import AutoConfig - -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) - -from benchmark_helper import ( # noqa: E402 +from benchmark_helper import ( Precision, create_onnxruntime_session, get_ort_environment_variables, prepare_environment, setup_logger, ) -from quantize_helper import QuantizeHelper # noqa: E402 +from gpt2_helper import DEFAULT_TOLERANCE, MODEL_CLASSES, PRETRAINED_GPT2_MODELS, Gpt2Helper +from gpt2_tester import Gpt2Tester +from packaging import version +from quantize_helper import QuantizeHelper +from transformers import AutoConfig +from transformers import __version__ as transformers_version + +from onnxruntime import __version__ as ort_version logger = logging.getLogger("") @@ -242,8 +244,6 @@ def get_latency_name(batch_size, sequence_length, past_sequence_length): def main(argv=None, experiment_name: str = "", run_id: str = "0", csv_filename: str = "gpt2_parity_results.csv"): result = {} - from transformers import __version__ as transformers_version - if version.parse(transformers_version) < version.parse( "3.1.0" ): # past_key_values name does not exist in 3.0.2 or older @@ -253,8 +253,6 @@ def main(argv=None, experiment_name: str = "", run_id: str = "0", csv_filename: setup_logger(args.verbose) if not experiment_name: - import sys - experiment_name = " ".join(argv if argv else sys.argv[1:]) if args.tolerance == 0: @@ -366,8 +364,6 @@ def main(argv=None, experiment_name: str = "", run_id: str = "0", csv_filename: output_path = onnx_model_paths["int8"] if args.output.endswith(".onnx") and output_path != args.output and not args.use_external_data_format: - import shutil - shutil.move(output_path, args.output) output_path = args.output @@ -424,10 +420,6 @@ def main(argv=None, experiment_name: str = "", run_id: str = "0", csv_filename: logger.info(f"fp16 conversion parameters:{fp16_params}") # Write results to file - import csv - - from onnxruntime import __version__ as ort_version - latency_name = get_latency_name(batch_size, sequence_length, past_sequence_length) csv_file_existed = os.path.exists(csv_filename) with open(csv_filename, mode="a", newline="") as csv_file: diff --git a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py index b10f5ba763678..e01585ae84163 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py @@ -9,7 +9,6 @@ import pickle import random import shutil -import sys import tempfile import time from pathlib import Path @@ -18,16 +17,15 @@ import numpy import onnx import torch +from benchmark_helper import Precision +from float16 import float_to_float16_max_diff +from fusion_options import FusionOptions +from io_binding_helper import IOBindingHelper +from onnx_model import OnnxModel +from optimizer import optimize_model +from torch_onnx_export_helper import torch_onnx_export from transformers import GPT2Config, GPT2LMHeadModel, GPT2Model, TFGPT2Model -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) - -from benchmark_helper import Precision # noqa: E402 -from float16 import float_to_float16_max_diff # noqa: E402 -from io_binding_helper import IOBindingHelper # noqa: E402 -from onnx_model import OnnxModel # noqa: E402 -from torch_onnx_export_helper import torch_onnx_export # noqa: E402 - logger = logging.getLogger(__name__) PRETRAINED_GPT2_MODELS = ["distilgpt2", "gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl"] @@ -517,9 +515,6 @@ def optimize_onnx( **kwargs, ): """Optimize ONNX model with an option to convert it to use mixed precision.""" - from fusion_options import FusionOptions - from optimizer import optimize_model - optimization_options = FusionOptions("gpt2") m = optimize_model( diff --git a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py index 905c56ff93523..a1e6d3125e7fb 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py @@ -16,18 +16,14 @@ import json import logging import os -import sys import onnx import scipy.stats +from benchmark_helper import get_ort_environment_variables, setup_logger from convert_to_onnx import main from gpt2_helper import PRETRAINED_GPT2_MODELS, Gpt2Helper from onnx_model import OnnxModel -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) - -from benchmark_helper import get_ort_environment_variables, setup_logger # noqa: E402 - logger = logging.getLogger("") diff --git a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_tester.py b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_tester.py index 80fbbf8b380f3..12700f00ad0c2 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_tester.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_tester.py @@ -8,17 +8,13 @@ import math import os import statistics -import sys import timeit import numpy import torch +from benchmark_helper import Precision from gpt2_helper import Gpt2Helper, Gpt2Inputs -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) - -from benchmark_helper import Precision # noqa: E402 - logger = logging.getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/models/gpt2/parity_check_helper.py b/onnxruntime/python/tools/transformers/models/gpt2/parity_check_helper.py index 2e0e0b1cb6ff2..5adf801d59ebe 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/parity_check_helper.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/parity_check_helper.py @@ -9,18 +9,14 @@ import math import multiprocessing import os -import sys from pathlib import Path import numpy import torch +from benchmark_helper import create_onnxruntime_session from gpt2_helper import Gpt2Helper from onnx import TensorProto, numpy_helper -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) - -from benchmark_helper import create_onnxruntime_session # noqa: E402 - NON_ZERO_VALUE = str(1) ZERO_VALUE = str(0) diff --git a/onnxruntime/python/tools/transformers/models/longformer/__init__.py b/onnxruntime/python/tools/transformers/models/longformer/__init__.py index cc667396a2622..815be385d7dd4 100644 --- a/onnxruntime/python/tools/transformers/models/longformer/__init__.py +++ b/onnxruntime/python/tools/transformers/models/longformer/__init__.py @@ -2,3 +2,11 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- +import os.path +import sys + +sys.path.append(os.path.dirname(__file__)) + +transformers_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", "..")) +if transformers_dir not in sys.path: + sys.path.append(transformers_dir) diff --git a/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py b/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py index bf6c1e60308be..c9a679c4eac8a 100644 --- a/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py +++ b/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py @@ -43,6 +43,7 @@ from datetime import datetime from typing import Any, Dict, List +import benchmark_helper import numpy as np import torch from longformer_helper import PRETRAINED_LONGFORMER_MODELS, LongformerHelper, LongformerInputs @@ -50,9 +51,6 @@ import onnxruntime -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -import benchmark_helper # noqa: E402 - logger = logging.getLogger("") diff --git a/onnxruntime/python/tools/transformers/models/longformer/convert_to_onnx.py b/onnxruntime/python/tools/transformers/models/longformer/convert_to_onnx.py index da6ed0bae9440..b80feec892994 100644 --- a/onnxruntime/python/tools/transformers/models/longformer/convert_to_onnx.py +++ b/onnxruntime/python/tools/transformers/models/longformer/convert_to_onnx.py @@ -33,23 +33,19 @@ import argparse import inspect -import os -import sys from pathlib import Path import torch import transformers from longformer_helper import PRETRAINED_LONGFORMER_MODELS from onnx import load_model +from onnx_model_bert import BertOnnxModel from packaging import version from torch.onnx import register_custom_op_symbolic from torch.onnx.symbolic_helper import parse_args +from torch_onnx_export_helper import torch_onnx_export from transformers import LongformerModel, LongformerSelfAttention -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from onnx_model_bert import BertOnnxModel # noqa: E402 -from torch_onnx_export_helper import torch_onnx_export # noqa: E402 - # Supports format 0 or 1 weight_bias_format = 0 diff --git a/onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py b/onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py index 6ba4fac1b7551..8fb04bf3405c8 100644 --- a/onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py +++ b/onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py @@ -8,15 +8,12 @@ import argparse import os import random -import sys from pathlib import Path import numpy as np -from onnx import ModelProto, TensorProto, numpy_helper # noqa: F401 - -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from bert_test_data import fake_input_ids_data, fake_input_mask_data, output_test_data # noqa: E402 -from onnx_model import OnnxModel # noqa: E402 +from bert_test_data import fake_input_ids_data, fake_input_mask_data, output_test_data +from onnx import ModelProto, TensorProto +from onnx_model import OnnxModel def parse_arguments(): diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/__init__.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/__init__.py index cc667396a2622..815be385d7dd4 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/__init__.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/__init__.py @@ -2,3 +2,11 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- +import os.path +import sys + +sys.path.append(os.path.dirname(__file__)) + +transformers_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", "..")) +if transformers_dir not in sys.path: + sys.path.append(transformers_dir) diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py index f1b60ba1d27fa..a8e3c69332339 100644 --- a/onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py +++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py @@ -23,24 +23,21 @@ import logging import os import shutil -import sys import tempfile from pathlib import Path from typing import List import coloredlogs import onnx +from fusion_options import FusionOptions +from onnx_model_clip import ClipOnnxModel +from onnx_model_unet import UnetOnnxModel +from onnx_model_vae import VaeOnnxModel +from optimizer import optimize_by_onnxruntime, optimize_model from packaging import version import onnxruntime -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from fusion_options import FusionOptions # noqa: E402 -from onnx_model_clip import ClipOnnxModel # noqa: E402 -from onnx_model_unet import UnetOnnxModel # noqa: E402 -from onnx_model_vae import VaeOnnxModel # noqa: E402 -from optimizer import optimize_by_onnxruntime, optimize_model # noqa: E402 - logger = logging.getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/models/t5/__init__.py b/onnxruntime/python/tools/transformers/models/t5/__init__.py index cc667396a2622..815be385d7dd4 100644 --- a/onnxruntime/python/tools/transformers/models/t5/__init__.py +++ b/onnxruntime/python/tools/transformers/models/t5/__init__.py @@ -2,3 +2,11 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- +import os.path +import sys + +sys.path.append(os.path.dirname(__file__)) + +transformers_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", "..")) +if transformers_dir not in sys.path: + sys.path.append(transformers_dir) diff --git a/onnxruntime/python/tools/transformers/models/t5/convert_to_onnx.py b/onnxruntime/python/tools/transformers/models/t5/convert_to_onnx.py index 230885ab6c719..adf5206be8353 100755 --- a/onnxruntime/python/tools/transformers/models/t5/convert_to_onnx.py +++ b/onnxruntime/python/tools/transformers/models/t5/convert_to_onnx.py @@ -8,14 +8,11 @@ import copy import logging import os -import sys import torch +from benchmark_helper import Precision, create_onnxruntime_session, prepare_environment, setup_logger from t5_helper import PRETRAINED_MT5_MODELS, PRETRAINED_T5_MODELS, T5Helper -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from benchmark_helper import Precision, create_onnxruntime_session, prepare_environment, setup_logger # noqa: E402 - logger = logging.getLogger("") diff --git a/onnxruntime/python/tools/transformers/models/t5/t5_decoder.py b/onnxruntime/python/tools/transformers/models/t5/t5_decoder.py index fe415aa7680fc..19e6bba22dc1a 100644 --- a/onnxruntime/python/tools/transformers/models/t5/t5_decoder.py +++ b/onnxruntime/python/tools/transformers/models/t5/t5_decoder.py @@ -6,7 +6,6 @@ import logging import os -import sys import tempfile from pathlib import Path from typing import List, Optional, Union @@ -14,17 +13,15 @@ import numpy import onnx import torch +from io_binding_helper import TypeHelper +from onnx_model import OnnxModel from past_helper import PastKeyValuesHelper from t5_encoder import T5EncoderInputs +from torch_onnx_export_helper import torch_onnx_export from transformers import MT5Config, T5Config from onnxruntime import InferenceSession -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from io_binding_helper import TypeHelper # noqa: E402 -from onnx_model import OnnxModel # noqa: E402 -from torch_onnx_export_helper import torch_onnx_export # noqa: E402 - logger = logging.getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/models/t5/t5_encoder.py b/onnxruntime/python/tools/transformers/models/t5/t5_encoder.py index e047d8adaf58e..fb61e970c1e0c 100644 --- a/onnxruntime/python/tools/transformers/models/t5/t5_encoder.py +++ b/onnxruntime/python/tools/transformers/models/t5/t5_encoder.py @@ -7,7 +7,6 @@ import logging import os import random -import sys import tempfile from pathlib import Path from typing import List, Union @@ -15,14 +14,12 @@ import numpy import onnx import torch +from onnx_model import OnnxModel +from torch_onnx_export_helper import torch_onnx_export from transformers import MT5Config, T5Config from onnxruntime import InferenceSession -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from onnx_model import OnnxModel # noqa: E402 -from torch_onnx_export_helper import torch_onnx_export # noqa: E402 - logger = logging.getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/models/t5/t5_encoder_decoder_init.py b/onnxruntime/python/tools/transformers/models/t5/t5_encoder_decoder_init.py index 8870ca6f34780..fd6ea45ef8b7c 100644 --- a/onnxruntime/python/tools/transformers/models/t5/t5_encoder_decoder_init.py +++ b/onnxruntime/python/tools/transformers/models/t5/t5_encoder_decoder_init.py @@ -6,7 +6,6 @@ import logging import os -import sys import tempfile from pathlib import Path from typing import List, Optional, Union @@ -14,17 +13,15 @@ import numpy import onnx import torch +from onnx_model import OnnxModel from past_helper import PastKeyValuesHelper from t5_decoder import T5DecoderInit from t5_encoder import T5Encoder, T5EncoderInputs +from torch_onnx_export_helper import torch_onnx_export from transformers import MT5Config, T5Config from onnxruntime import InferenceSession -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from onnx_model import OnnxModel # noqa: E402 -from torch_onnx_export_helper import torch_onnx_export # noqa: E402 - logger = logging.getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/models/t5/t5_helper.py b/onnxruntime/python/tools/transformers/models/t5/t5_helper.py index b1b494707a284..f7dc9db0e82c8 100755 --- a/onnxruntime/python/tools/transformers/models/t5/t5_helper.py +++ b/onnxruntime/python/tools/transformers/models/t5/t5_helper.py @@ -6,11 +6,13 @@ import logging import os -import sys from pathlib import Path from typing import Dict, List, Union import torch +from float16 import float_to_float16_max_diff +from onnx_model import OnnxModel +from optimizer import optimize_model from t5_decoder import T5Decoder, T5DecoderHelper, T5DecoderInit from t5_encoder import T5Encoder, T5EncoderHelper from t5_encoder_decoder_init import T5EncoderDecoderInit, T5EncoderDecoderInitHelper @@ -18,11 +20,6 @@ from onnxruntime import InferenceSession -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from float16 import float_to_float16_max_diff # noqa: E402 -from onnx_model import OnnxModel # noqa: E402 -from optimizer import optimize_model # noqa: E402 - logger = logging.getLogger(__name__) PRETRAINED_T5_MODELS = ["t5-small", "t5-base", "t5-large", "t5-3b", "t5-11b"] diff --git a/onnxruntime/python/tools/transformers/models/whisper/__init__.py b/onnxruntime/python/tools/transformers/models/whisper/__init__.py index 90c03fa9b9a24..815be385d7dd4 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/__init__.py +++ b/onnxruntime/python/tools/transformers/models/whisper/__init__.py @@ -2,7 +2,11 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -import os +import os.path import sys sys.path.append(os.path.dirname(__file__)) + +transformers_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", "..")) +if transformers_dir not in sys.path: + sys.path.append(transformers_dir) diff --git a/onnxruntime/python/tools/transformers/models/whisper/convert_to_onnx.py b/onnxruntime/python/tools/transformers/models/whisper/convert_to_onnx.py index eef35db20bbc5..288d79f62456a 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/convert_to_onnx.py +++ b/onnxruntime/python/tools/transformers/models/whisper/convert_to_onnx.py @@ -8,17 +8,14 @@ import copy import logging import os -import sys import torch +from benchmark_helper import Precision, create_onnxruntime_session, prepare_environment, setup_logger from whisper_chain import chain_model from whisper_helper import PRETRAINED_WHISPER_MODELS, WhisperHelper from onnxruntime import quantization -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from benchmark_helper import Precision, create_onnxruntime_session, prepare_environment, setup_logger # noqa: E402 - logger = logging.getLogger("") PROVIDERS = { diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py index 1a20cbd101af3..7e2325c148efa 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py +++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_chain.py @@ -1,18 +1,12 @@ import logging import os -import sys import onnx +from benchmark_helper import Precision +from convert_generation import get_shared_initializers, update_decoder_subgraph_share_buffer_and_use_decoder_masked_mha from onnx import TensorProto, helper from transformers import WhisperConfig -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from benchmark_helper import Precision # noqa: E402 -from convert_generation import ( # noqa: E402 - get_shared_initializers, - update_decoder_subgraph_share_buffer_and_use_decoder_masked_mha, -) - logger = logging.getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_decoder.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_decoder.py index d5cef8e3b1a0e..eca5ce3de15d3 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/whisper_decoder.py +++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_decoder.py @@ -6,7 +6,6 @@ import logging import os -import sys import tempfile from pathlib import Path from typing import List, Optional, Union @@ -14,16 +13,14 @@ import numpy import onnx import torch +from io_binding_helper import TypeHelper +from models.t5.past_helper import PastKeyValuesHelper +from onnx_model import OnnxModel +from torch_onnx_export_helper import torch_onnx_export from transformers import WhisperConfig, file_utils from onnxruntime import InferenceSession -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from io_binding_helper import TypeHelper # noqa: E402 -from models.t5.past_helper import PastKeyValuesHelper # noqa: E402 -from onnx_model import OnnxModel # noqa: E402 -from torch_onnx_export_helper import torch_onnx_export # noqa: E402 - logger = logging.getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder.py index 937fe301f0d29..826d6e42c0775 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder.py +++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder.py @@ -6,7 +6,6 @@ import logging import os -import sys import tempfile from pathlib import Path from typing import List @@ -14,14 +13,12 @@ import numpy import onnx import torch +from onnx_model import OnnxModel +from torch_onnx_export_helper import torch_onnx_export from transformers import WhisperConfig from onnxruntime import InferenceSession -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from onnx_model import OnnxModel # noqa: E402 -from torch_onnx_export_helper import torch_onnx_export # noqa: E402 - logger = logging.getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py b/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py index 094ddcebe35f1..a145178dbf37e 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py +++ b/onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py @@ -6,7 +6,6 @@ import logging import os -import sys import tempfile from pathlib import Path from typing import List, Optional @@ -14,17 +13,15 @@ import numpy import onnx import torch +from models.t5.past_helper import PastKeyValuesHelper +from onnx_model import OnnxModel +from torch_onnx_export_helper import torch_onnx_export from transformers import WhisperConfig from whisper_decoder import WhisperDecoderInit from whisper_encoder import WhisperEncoder, WhisperEncoderInputs from onnxruntime import InferenceSession -sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from models.t5.past_helper import PastKeyValuesHelper # noqa: E402 -from onnx_model import OnnxModel # noqa: E402 -from torch_onnx_export_helper import torch_onnx_export # noqa: E402 - logger = logging.getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index 38f7f8cd05f1d..4e064fa53bfc6 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -6,7 +6,6 @@ import logging import os -import sys from pathlib import Path import numpy @@ -18,8 +17,11 @@ from torch_onnx_export_helper import torch_onnx_export from transformers import AutoConfig, AutoFeatureExtractor, AutoTokenizer, LxmertConfig, TransfoXLConfig -sys.path.append(os.path.join(os.path.dirname(__file__), "models", "gpt2")) -from gpt2_helper import PRETRAINED_GPT2_MODELS, GPT2ModelNoPastState, TFGPT2ModelNoPastState # noqa: E402 +from onnxruntime.transformers.models.gpt2.gpt2_helper import ( + PRETRAINED_GPT2_MODELS, + GPT2ModelNoPastState, + TFGPT2ModelNoPastState, +) os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" diff --git a/onnxruntime/python/tools/transformers/onnx_model_bert.py b/onnxruntime/python/tools/transformers/onnx_model_bert.py index e84a08afc66ce..995f8c6541b4c 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_bert.py +++ b/onnxruntime/python/tools/transformers/onnx_model_bert.py @@ -31,14 +31,6 @@ logger = getLogger(__name__) -class BertOptimizationOptions(FusionOptions): - """This class is deprecated""" - - def __init__(self, model_type): - logger.warning("BertOptimizationOptions is depreciated. Please use FusionOptions instead.") - super().__init__(model_type) - - class BertOnnxModel(OnnxModel): def __init__(self, model: ModelProto, num_heads: int = 0, hidden_size: int = 0): """Initialize BERT ONNX Model. diff --git a/onnxruntime/python/tools/transformers/onnx_model_bert_keras.py b/onnxruntime/python/tools/transformers/onnx_model_bert_keras.py index a85e0cc2ba3f7..1229825fec3d4 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_bert_keras.py +++ b/onnxruntime/python/tools/transformers/onnx_model_bert_keras.py @@ -3,14 +3,10 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -import argparse # noqa: F401 import logging -import sys # noqa: F401 -from collections import deque # noqa: F401 -import numpy as np # noqa: F401 import onnx -from onnx import ModelProto, TensorProto, numpy_helper # noqa: F401 +from onnx import numpy_helper from onnx_model_bert_tf import BertOnnxModelTF logger = logging.getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/onnx_model_bert_tf.py b/onnxruntime/python/tools/transformers/onnx_model_bert_tf.py index 0ec4b5a007f90..b7891223e1dc2 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_bert_tf.py +++ b/onnxruntime/python/tools/transformers/onnx_model_bert_tf.py @@ -3,14 +3,11 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -import argparse # noqa: F401 import logging -import sys # noqa: F401 -from collections import deque # noqa: F401 import numpy as np import onnx -from onnx import ModelProto, TensorProto, helper, numpy_helper # noqa: F401 +from onnx import TensorProto, helper, numpy_helper from onnx_model_bert import BertOnnxModel logger = logging.getLogger(__name__) diff --git a/onnxruntime/test/python/transformers/model_loader.py b/onnxruntime/test/python/transformers/model_loader.py index fc66fdd92fe8d..2d871123ec8bb 100644 --- a/onnxruntime/test/python/transformers/model_loader.py +++ b/onnxruntime/test/python/transformers/model_loader.py @@ -5,7 +5,6 @@ # -------------------------------------------------------------------------- import os -import unittest # noqa: F401 from onnx import ModelProto, TensorProto, external_data_helper, load_model, numpy_helper from parity_utilities import find_transformers_source diff --git a/onnxruntime/test/python/transformers/parity_utilities.py b/onnxruntime/test/python/transformers/parity_utilities.py index 472b768e4aa94..d7f79304d2d2b 100644 --- a/onnxruntime/test/python/transformers/parity_utilities.py +++ b/onnxruntime/test/python/transformers/parity_utilities.py @@ -6,6 +6,7 @@ import argparse import os import sys +from pathlib import Path import numpy import torch @@ -51,6 +52,7 @@ def find_transformers_source(sub_dir_paths=[]): # noqa: B006 "transformers", *sub_dir_paths, ) + source_dir = os.path.normpath(source_dir) if os.path.exists(source_dir): if source_dir not in sys.path: sys.path.append(source_dir) @@ -66,13 +68,10 @@ def create_inputs( device=torch.device("cuda"), # noqa: B008 ): float_type = torch.float16 if float16 else torch.float32 - input = torch.normal(mean=0.0, std=10.0, size=(batch_size, sequence_length, hidden_size)).to(float_type).to(device) - return input + return torch.normal(mean=0.0, std=10.0, size=(batch_size, sequence_length, hidden_size)).to(float_type).to(device) def export_onnx(model, onnx_model_path, float16, hidden_size, device): - from pathlib import Path - Path(onnx_model_path).parent.mkdir(parents=True, exist_ok=True) input_hidden_states = create_inputs(hidden_size=hidden_size, float16=float16, device=device) diff --git a/onnxruntime/test/python/transformers/test_parity_decoder_attention.py b/onnxruntime/test/python/transformers/test_parity_decoder_attention.py index 15ac5a8b7dd4d..e870e7f95fcee 100644 --- a/onnxruntime/test/python/transformers/test_parity_decoder_attention.py +++ b/onnxruntime/test/python/transformers/test_parity_decoder_attention.py @@ -10,9 +10,7 @@ # license information. # ------------------------------------------------------------------------- -import math # noqa: F401 -import os # noqa: F401 -from typing import Dict, List, Optional, Tuple # noqa: F401 +from typing import List, Optional, Tuple import numpy import torch diff --git a/onnxruntime/test/python/transformers/test_parity_gelu.py b/onnxruntime/test/python/transformers/test_parity_gelu.py index dfafb9b7e7c5c..5edc0e78d7d5d 100644 --- a/onnxruntime/test/python/transformers/test_parity_gelu.py +++ b/onnxruntime/test/python/transformers/test_parity_gelu.py @@ -28,7 +28,7 @@ import unittest import torch -from parity_utilities import * # noqa: F403 +from parity_utilities import export_onnx, optimize_onnx, parse_arguments, run_parity from torch import nn @@ -36,7 +36,7 @@ class Gelu(nn.Module): def __init__(self, formula=4, fp32_gelu_op=False): super().__init__() self.formula = formula - self.fp32_gelu_op = True + self.fp32_gelu_op = fp32_gelu_op def gelu(self, x): if self.formula == 0: @@ -98,12 +98,12 @@ def run( # Do not re-use onnx file from previous test since weights of model are random. onnx_model_path = "./temp/gelu_{}_{}.onnx".format(formula, "fp16" if float16 else "fp32") - export_onnx(model, onnx_model_path, float16, hidden_size, device) # noqa: F405 + export_onnx(model, onnx_model_path, float16, hidden_size, device) if optimized: optimized_onnx_path = "./temp/gelu_{}_opt_{}.onnx".format(formula, "fp16" if float16 else "fp32") use_gpu = float16 and not fp32_gelu_op - optimize_onnx( # noqa: F405 + optimize_onnx( onnx_model_path, optimized_onnx_path, Gelu.get_fused_op(formula), @@ -115,7 +115,7 @@ def run( else: onnx_path = onnx_model_path - num_failure = run_parity( # noqa: F405 + num_failure = run_parity( model, onnx_path, batch_size, @@ -226,9 +226,7 @@ def test_cpu(self): def test_cuda(self): if not torch.cuda.is_available(): - import pytest - - pytest.skip("test requires GPU and torch+cuda") + self.skipTest("test requires GPU and torch+cuda") else: gpu = torch.device("cuda") for i in self.formula_to_test: @@ -236,7 +234,7 @@ def test_cuda(self): if __name__ == "__main__": - args, remaining_args = parse_arguments(namespace_filter=unittest) # noqa: F405 + args, remaining_args = parse_arguments(namespace_filter=unittest) TestGeluParity.verbose = args.log_verbose TestGeluParity.optimized = args.optimize diff --git a/onnxruntime/test/python/transformers/test_parity_huggingface_gpt_attention.py b/onnxruntime/test/python/transformers/test_parity_huggingface_gpt_attention.py index 1158f38e2887a..ad4117f997567 100644 --- a/onnxruntime/test/python/transformers/test_parity_huggingface_gpt_attention.py +++ b/onnxruntime/test/python/transformers/test_parity_huggingface_gpt_attention.py @@ -12,16 +12,22 @@ import os import random import unittest +from pathlib import Path import numpy import onnx import pytest import torch from onnx import helper -from parity_utilities import compare_outputs, create_ort_session, parse_arguments +from parity_utilities import compare_outputs, create_ort_session, find_transformers_source, parse_arguments from torch import nn from transformers.modeling_utils import Conv1D +if find_transformers_source(): + from onnx_model import OnnxModel +else: + from onnxruntime.transformers.onnx_model import OnnxModel + DEBUG_OUTPUTS = ["qk", "norm_qk", "softmax", "attn_weights"] @@ -206,8 +212,6 @@ def get_output_names(debug=False): def export_onnx(model, onnx_model_path, float16, hidden_size, num_attention_heads, debug, device): - from pathlib import Path - Path(onnx_model_path).parent.mkdir(parents=True, exist_ok=True) input_hidden_states, attention_mask, layer_past = create_inputs( @@ -254,8 +258,6 @@ def export_onnx(model, onnx_model_path, float16, hidden_size, num_attention_head def optimize_onnx(input_onnx_path, optimized_onnx_path, num_heads, debug): - from onnxruntime.transformers.onnx_model import OnnxModel - m = onnx.load(input_onnx_path) onnx_model = OnnxModel(m) @@ -513,9 +515,7 @@ def test_cpu(self): def test_cuda(self): if not torch.cuda.is_available(): - import pytest - - pytest.skip("test requires GPU and torch+cuda") + self.skipTest("test requires GPU and torch+cuda") else: gpu = torch.device("cuda") self.run_small(self.optimized, gpu, verbose=self.verbose) @@ -523,9 +523,7 @@ def test_cuda(self): @pytest.mark.slow def test_large_cuda(self): if not torch.cuda.is_available(): - import pytest - - pytest.skip("test requires GPU and torch+cuda") + self.skipTest("test requires GPU and torch+cuda") else: gpu = torch.device("cuda") self.run_large(self.optimized, gpu, verbose=self.verbose) diff --git a/onnxruntime/test/python/transformers/test_parity_layernorm.py b/onnxruntime/test/python/transformers/test_parity_layernorm.py index 648bfde7a8342..a75c28484e94d 100644 --- a/onnxruntime/test/python/transformers/test_parity_layernorm.py +++ b/onnxruntime/test/python/transformers/test_parity_layernorm.py @@ -9,10 +9,10 @@ import onnx import torch -from parity_utilities import * # noqa: F403 +from parity_utilities import export_onnx, find_transformers_source, optimize_onnx, parse_arguments, run_parity from torch import nn -if find_transformers_source(): # noqa: F405 +if find_transformers_source(): from onnx_model import OnnxModel else: from onnxruntime.transformers.onnx_model import OnnxModel @@ -150,14 +150,12 @@ def run( # Do not re-use onnx file from previous test since weights of model are random. onnx_model_path = "./temp/layer_norm_{}_formula{}.onnx".format("fp16" if float16 else "fp32", formula) - export_onnx(model, onnx_model_path, float16, hidden_size, device) # noqa: F405 + export_onnx(model, onnx_model_path, float16, hidden_size, device) if optimized: optimized_onnx_path = "./temp/layer_norm_{}_formula{}_opt.onnx".format("fp16" if float16 else "fp32", formula) if (not float16) or cast_fp16: - optimize_onnx( # noqa: F405 - onnx_model_path, optimized_onnx_path, expected_op=LayerNorm.get_fused_op(), verbose=verbose - ) + optimize_onnx(onnx_model_path, optimized_onnx_path, expected_op=LayerNorm.get_fused_op(), verbose=verbose) else: if cast_onnx_only: optimize_fp16_onnx_with_cast(onnx_model_path, optimized_onnx_path, epsilon=epsilon) @@ -168,7 +166,7 @@ def run( else: onnx_path = onnx_model_path - num_failure = run_parity( # noqa: F405 + num_failure = run_parity( model, onnx_path, batch_size, @@ -299,16 +297,14 @@ def test_cpu(self): def test_cuda(self): if not torch.cuda.is_available(): - import pytest - - pytest.skip("test requires GPU and torch+cuda") + self.skipTest("test requires GPU and torch+cuda") else: gpu = torch.device("cuda") self.run_one(self.optimized, gpu, hidden_size=self.hidden_size, run_extra_tests=True, verbose=self.verbose) if __name__ == "__main__": - args, remaining_args = parse_arguments(namespace_filter=unittest) # noqa: F405 + args, remaining_args = parse_arguments(namespace_filter=unittest) TestLayerNormParity.verbose = args.log_verbose TestLayerNormParity.optimized = args.optimize diff --git a/onnxruntime/test/python/transformers/test_parity_neox_attention.py b/onnxruntime/test/python/transformers/test_parity_neox_attention.py index e1942a47a1e67..8c8e871a854b0 100644 --- a/onnxruntime/test/python/transformers/test_parity_neox_attention.py +++ b/onnxruntime/test/python/transformers/test_parity_neox_attention.py @@ -14,6 +14,7 @@ import numpy as np import torch +from onnx import TensorProto, helper from torch import nn np.random.seed(0) @@ -29,8 +30,6 @@ def create_neox_attention_graph( qkv_bias, num_heads, ): - from onnx import TensorProto, helper - nodes = [ helper.make_node( "Attention", @@ -78,8 +77,6 @@ def create_neox_decoder_masked_self_attention_graph( qkv_bias, num_heads, ): - from onnx import TensorProto, helper - nodes = [ helper.make_node( "DecoderMaskedSelfAttention", diff --git a/onnxruntime/test/python/transformers/test_parity_t5_mha.py b/onnxruntime/test/python/transformers/test_parity_t5_mha.py index 409f8de5e1a80..c7fb398dde82e 100644 --- a/onnxruntime/test/python/transformers/test_parity_t5_mha.py +++ b/onnxruntime/test/python/transformers/test_parity_t5_mha.py @@ -16,6 +16,7 @@ import numpy as np import torch +from onnx import TensorProto, helper from torch import nn torch.manual_seed(0) @@ -30,8 +31,6 @@ def create_t5_mha_graph( use_past, is_static_kv, ): - from onnx import TensorProto, helper - use_present = not use_past if not is_static_kv and use_past: use_present = True @@ -163,8 +162,6 @@ def create_t5_decoder_masked_mha_graph( num_heads, is_cross_attention, ): - from onnx import TensorProto, helper - nodes = [ helper.make_node( "DecoderMaskedMultiHeadAttention", diff --git a/onnxruntime/test/python/transformers/test_whisper_timestamp_processor.py b/onnxruntime/test/python/transformers/test_whisper_timestamp_processor.py index 052c5ca264af9..66200af06f511 100644 --- a/onnxruntime/test/python/transformers/test_whisper_timestamp_processor.py +++ b/onnxruntime/test/python/transformers/test_whisper_timestamp_processor.py @@ -32,7 +32,7 @@ def generate_dataset(self): def run_timestamp(self, provider: str): self.generate_model("-m openai/whisper-tiny --optimize_onnx --precision fp32 -l -e") [input_features, processor] = self.generate_dataset() - model_path = "./onnx_models/openai/whisper-tiny_beamsearch.onnx" + model_path = "./onnx_models/whisper-tiny_beamsearch.onnx" sess_options = SessionOptions() sess_options.log_severity_level = 4 sess = InferenceSession(model_path, sess_options, providers=[provider])