diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml index d2cad279ac..7f7e8aec9f 100644 --- a/.github/workflows/test_onnxruntime.yml +++ b/.github/workflows/test_onnxruntime.yml @@ -20,7 +20,7 @@ jobs: transformers-version: ["latest"] os: [ubuntu-20.04, windows-2019, macos-15] include: - - transformers-version: "4.36.*" + - transformers-version: "4.41.0" os: ubuntu-20.04 - transformers-version: "4.45.*" os: ubuntu-20.04 @@ -56,11 +56,4 @@ jobs: - name: Test with pytest (in series) working-directory: tests run: | - pytest onnxruntime -m "run_in_series" --durations=0 -vvvv -s - - - name: Test with pytest (in parallel) - env: - HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} - working-directory: tests - run: | - pytest onnxruntime -m "not run_in_series" --durations=0 -vvvv -s -n auto + pytest onnxruntime -k test_compare_to_transformers_ort diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index f1d9cb9d00..f02d9eca5e 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -2315,18 +2315,8 @@ def test_compare_to_io_binding(self, model_arch): class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin): SUPPORTED_ARCHITECTURES = [ - "bloom", - "codegen", - "falcon", - "gpt2", - "gpt_bigcode", - "gpt_neo", - "gpt_neox", - "gptj", - "llama", - "mistral", + "mpt", - "opt", ] if check_if_transformers_greater("4.37"): @@ -2420,7 +2410,7 @@ def test_merge_from_onnx_and_save(self, model_arch): self.assertNotIn(ONNX_WEIGHTS_NAME, folder_contents) @parameterized.expand(grid_parameters({**FULL_GRID, "num_beams": [1, 4]})) - def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cache: bool, num_beams: int): + def test_compare_to_transformers_ort(self, test_name: str, model_arch: str, use_cache: bool, num_beams: int): use_io_binding = None if use_cache is False: use_io_binding = False @@ -4602,14 +4592,14 @@ def test_compare_with_and_without_past_key_values(self, model_arch: str): ) self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv)) - self.assertEqual( - outputs_model_with_pkv.shape[1], - self.GENERATION_LENGTH + 2 if model_arch == "whisper" else self.GENERATION_LENGTH + 1, - ) - self.assertEqual( - outputs_model_without_pkv.shape[1], - self.GENERATION_LENGTH + 2 if model_arch == "whisper" else self.GENERATION_LENGTH + 1, - ) + + if model_arch == "whisper" and check_if_transformers_greater("4.43"): + gen_length = self.GENERATION_LENGTH + 2 + else: + gen_length = self.GENERATION_LENGTH + 1 + + self.assertEqual(outputs_model_with_pkv.shape[1], gen_length) + self.assertEqual(outputs_model_without_pkv.shape[1], gen_length) self.GENERATION_LENGTH = generation_length if os.environ.get("TEST_LEVEL", 0) == "1":