Skip to content

Commit

Permalink
Add transformers 4.36 tests (#2085)
Browse files Browse the repository at this point in the history
* add transformers 4.36 tests

* add test depending on tranformers version

* add min transformers required version for gemma

* update macos

* fix whisper test

* add opt

* fix mpt

* add comment

* add granite testwhen supported by transformers
  • Loading branch information
echarlaix authored Nov 18, 2024
1 parent e8b0332 commit c513437
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 22 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/test_onnxruntime.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ jobs:
fail-fast: false
matrix:
transformers-version: ["latest"]
os: [ubuntu-20.04, windows-2019, macos-13]
os: [ubuntu-20.04, windows-2019, macos-15]
include:
- transformers-version: "4.36.*"
os: ubuntu-20.04
- transformers-version: "4.45.*"
os: ubuntu-20.04

Expand Down
4 changes: 3 additions & 1 deletion optimum/exporters/onnx/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ class Qwen2OnnxConfig(LlamaOnnxConfig):
class GemmaOnnxConfig(LlamaOnnxConfig):
DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, GemmaDummyPastKeyValuesGenerator)
DUMMY_PKV_GENERATOR_CLASS = GemmaDummyPastKeyValuesGenerator
pass
MIN_TRANSFORMERS_VERSION = version.parse("4.38.0")


class GraniteOnnxConfig(LlamaOnnxConfig):
Expand Down Expand Up @@ -348,6 +348,8 @@ def patch_model_for_export(
class MPTOnnxConfig(TextDecoderOnnxConfig):
# MPT does not require position_ids input.
DEFAULT_ONNX_OPSET = 13
# TODO: fix inference for transformers < v4.41 for beam_search > 1
MIN_TRANSFORMERS_VERSION = version.parse("4.41.0")
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(
num_attention_heads="n_heads", hidden_size="d_model", num_layers="n_layers"
)
Expand Down
10 changes: 5 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
"datasets>=1.2.1",
"evaluate",
"protobuf>=3.20.1",
"transformers<4.47.0",
"transformers>=4.36,<4.47.0",
],
"onnxruntime-gpu": [
"onnx",
Expand All @@ -63,19 +63,19 @@
"evaluate",
"protobuf>=3.20.1",
"accelerate", # ORTTrainer requires it.
"transformers<4.47.0",
"transformers>=4.36,<4.47.0",
],
"exporters": [
"onnx",
"onnxruntime",
"timm",
"transformers<4.47.0",
"transformers>=4.36,<4.47.0",
],
"exporters-gpu": [
"onnx",
"onnxruntime-gpu",
"timm",
"transformers<4.47.0",
"transformers>=4.36,<4.47.0",
],
"exporters-tf": [
"tensorflow>=2.4,<=2.12.1",
Expand All @@ -86,7 +86,7 @@
"h5py",
"numpy<1.24.0",
"datasets<=2.16",
"transformers>=4.26,<4.38",
"transformers>=4.36,<4.38",
],
"diffusers": ["diffusers"],
"intel": "optimum-intel>=1.18.0",
Expand Down
37 changes: 22 additions & 15 deletions tests/onnxruntime/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -2318,21 +2318,28 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin):
"bloom",
"codegen",
"falcon",
"gemma",
"gpt2",
"gpt_bigcode",
"gpt_neo",
"gpt_neox",
"gptj",
"granite",
"llama",
"mistral",
"mpt",
"opt",
]

if check_if_transformers_greater("4.40"):
SUPPORTED_ARCHITECTURES.extend(["gemma", "phi3", "qwen2"])
if check_if_transformers_greater("4.37"):
SUPPORTED_ARCHITECTURES.append("qwen2")

if check_if_transformers_greater("4.38"):
SUPPORTED_ARCHITECTURES.append("gemma")

# TODO: fix "mpt" for which inference fails for transformers < v4.41
if check_if_transformers_greater("4.41"):
SUPPORTED_ARCHITECTURES.extend(["phi3", "mpt"])

if check_if_transformers_greater("4.45"):
SUPPORTED_ARCHITECTURES.append("granite")

FULL_GRID = {
"model_arch": SUPPORTED_ARCHITECTURES,
Expand Down Expand Up @@ -2445,7 +2452,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach
transformers_model = AutoModelForCausalLM.from_pretrained(model_id)
transformers_model = transformers_model.eval()
tokenizer = get_preprocessor(model_id)
tokens = tokenizer("This is a sample output", return_tensors="pt")
tokens = tokenizer("This is a sample input", return_tensors="pt")
position_ids = None
if model_arch.replace("_", "-") in MODEL_TYPES_REQUIRING_POSITION_IDS:
input_shape = tokens["input_ids"].shape
Expand All @@ -2467,7 +2474,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach
# Compare batched generation.
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "left"
tokens = tokenizer(["Today is a nice day and I am longer", "This is me"], return_tensors="pt", padding=True)
tokens = tokenizer(["This is", "This is a sample input"], return_tensors="pt", padding=True)
onnx_model.generation_config.eos_token_id = None
transformers_model.generation_config.eos_token_id = None
onnx_model.config.eos_token_id = None
Expand Down Expand Up @@ -4598,14 +4605,14 @@ def test_compare_with_and_without_past_key_values(self, model_arch: str):
)

self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv))
self.assertEqual(
outputs_model_with_pkv.shape[1],
self.GENERATION_LENGTH + 2 if model_arch == "whisper" else self.GENERATION_LENGTH + 1,
)
self.assertEqual(
outputs_model_without_pkv.shape[1],
self.GENERATION_LENGTH + 2 if model_arch == "whisper" else self.GENERATION_LENGTH + 1,
)

if model_arch == "whisper" and check_if_transformers_greater("4.43"):
gen_length = self.GENERATION_LENGTH + 2
else:
gen_length = self.GENERATION_LENGTH + 1

self.assertEqual(outputs_model_with_pkv.shape[1], gen_length)
self.assertEqual(outputs_model_without_pkv.shape[1], gen_length)

self.GENERATION_LENGTH = generation_length
if os.environ.get("TEST_LEVEL", 0) == "1":
Expand Down

0 comments on commit c513437

Please sign in to comment.