From bbf7d821950b7c81f07af34e0faa64bbcd9e4e2b Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Thu, 12 Sep 2024 14:04:44 -0600 Subject: [PATCH 1/9] feat(exporters/onnx): Add GraniteOnnxConfig and task support list Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- optimum/exporters/onnx/model_configs.py | 10 ++++++++++ optimum/exporters/tasks.py | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 9e57128c27..51dab27266 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -298,6 +298,16 @@ class GemmaOnnxConfig(LlamaOnnxConfig): pass +class GraniteOnnxConfig(TextDecoderOnnxConfig): + # GG TODO: Bump past 4.44.2 once the next release is out + MIN_TRANSFORMERS_VERSION = version.parse("4.44.2") + DEFAULT_ONNX_OPSET = 14 # Granite follows Llama's default + + DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, MistralDummyPastKeyValuesGenerator) + DUMMY_PKV_GENERATOR_CLASS = MistralDummyPastKeyValuesGenerator + NORMALIZED_CONFIG_CLASS = NormalizedTextConfig + + class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig): DEFAULT_ONNX_OPSET = 14 # Phi now uses F.scaled_dot_product_attention by default for torch>=2.1.1. NORMALIZED_CONFIG_CLASS = NormalizedTextConfig diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index a489f34fb0..fdc8bfcb53 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -915,6 +915,13 @@ class TasksManager: "text-classification", onnx="LlamaOnnxConfig", ), + "granite": supported_tasks_mapping( + "feature-extraction", + "feature-extraction-with-past", + "text-generation", + "text-generation-with-past", + onnx="GraniteOnnxConfig", + ), "pegasus": supported_tasks_mapping( "feature-extraction", "feature-extraction-with-past", From 63f66fcd7ab06923d953415f6930c6e5d61f4798 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Thu, 12 Sep 2024 14:05:24 -0600 Subject: [PATCH 2/9] feat: Add granite's normalized config for inference Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- optimum/utils/normalized_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py index 81207b7649..9ceed24c2d 100644 --- a/optimum/utils/normalized_config.py +++ b/optimum/utils/normalized_config.py @@ -281,6 +281,7 @@ class NormalizedConfigManager: "xlm-roberta": NormalizedTextConfig, "yolos": NormalizedVisionConfig, "qwen2": NormalizedTextConfig, + "granite": NormalizedTextConfigWithGQA, } @classmethod From 767fe05ad82f1f1ae3a29676fb4081bb1e0df820 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Thu, 12 Sep 2024 15:54:40 -0600 Subject: [PATCH 3/9] feat(onnx opt): Add onnx optimization support for granite Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- optimum/onnxruntime/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/onnxruntime/utils.py b/optimum/onnxruntime/utils.py index 128e2406f1..9e92e0bd32 100644 --- a/optimum/onnxruntime/utils.py +++ b/optimum/onnxruntime/utils.py @@ -128,6 +128,7 @@ class ORTConfigManager: "gpt-neo": "gpt2", "gpt-neox": "gpt2", "gptj": "gpt2", + "granite": "gpt2", # longt5 with O4 results in segmentation fault "longt5": "bert", "llama": "gpt2", From 1ab66dfee6a4da97edc7039921f2b48097a291bf Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Tue, 8 Oct 2024 12:58:48 -0600 Subject: [PATCH 4/9] fix(onnx/granite): Use LlamaOnnxConfig as the base for GraniteOnnxConfig Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- optimum/exporters/onnx/model_configs.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 51dab27266..46c127acd9 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -298,14 +298,8 @@ class GemmaOnnxConfig(LlamaOnnxConfig): pass -class GraniteOnnxConfig(TextDecoderOnnxConfig): - # GG TODO: Bump past 4.44.2 once the next release is out - MIN_TRANSFORMERS_VERSION = version.parse("4.44.2") - DEFAULT_ONNX_OPSET = 14 # Granite follows Llama's default - - DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, MistralDummyPastKeyValuesGenerator) - DUMMY_PKV_GENERATOR_CLASS = MistralDummyPastKeyValuesGenerator - NORMALIZED_CONFIG_CLASS = NormalizedTextConfig +class GraniteOnnxConfig(LlamaOnnxConfig): + MIN_TRANSFORMERS_VERSION = version.parse("4.45.0") class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig): From 4fc9ead2977a322b97e0fb1da78397a966992524 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Tue, 8 Oct 2024 13:00:11 -0600 Subject: [PATCH 5/9] fix(onnxruntime): Add "granite" to list of model types with grouped attention Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- optimum/onnxruntime/modeling_decoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py index 984d7f22eb..8f1d062221 100644 --- a/optimum/onnxruntime/modeling_decoder.py +++ b/optimum/onnxruntime/modeling_decoder.py @@ -340,7 +340,7 @@ def prepare_past_key_values( if self.model_type == "gemma": num_attention_heads = self.normalized_config.num_key_value_heads embed_size_per_head = self.normalized_config.head_dim - elif self.model_type in {"mistral", "llama", "qwen2"}: + elif self.model_type in {"mistral", "llama", "qwen2", "granite"}: num_attention_heads = self.normalized_config.num_key_value_heads else: num_attention_heads = self.normalized_config.num_attention_heads From e6e56641f0f2f99169a8604723149ad57a3f79ab Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Tue, 8 Oct 2024 14:51:57 -0600 Subject: [PATCH 6/9] fix: Add granite to the list of models that require position_ids Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- optimum/exporters/onnx/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/exporters/onnx/utils.py b/optimum/exporters/onnx/utils.py index 56249bbf5c..19e24f8874 100644 --- a/optimum/exporters/onnx/utils.py +++ b/optimum/exporters/onnx/utils.py @@ -86,6 +86,7 @@ "phi", "phi3", "qwen2", + "granite", } From 551c47ba985e315dd5527e81a2fc2074caaba4d9 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Tue, 22 Oct 2024 16:17:17 -0600 Subject: [PATCH 7/9] fix(granite): Add MIN_TORCH_VERSION for recently fixed torch bug https://github.com/huggingface/optimum/pull/2043#issuecomment-2427975461 Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- optimum/exporters/onnx/model_configs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 46c127acd9..cc752779d3 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -300,6 +300,7 @@ class GemmaOnnxConfig(LlamaOnnxConfig): class GraniteOnnxConfig(LlamaOnnxConfig): MIN_TRANSFORMERS_VERSION = version.parse("4.45.0") + MIN_TORCH_VERSION = version.parse("2.5.0") class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig): From 9529b51c0060ed7340b1470e7a99e869158688d5 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Tue, 22 Oct 2024 16:17:45 -0600 Subject: [PATCH 8/9] test(granite): Add tiny random granite test for onnx exporter Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- tests/exporters/exporters_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index c8a33b0be3..ccccb5510b 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -100,6 +100,7 @@ "gpt-neo": "hf-internal-testing/tiny-random-GPTNeoModel", "gpt-neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM", "gptj": "hf-internal-testing/tiny-random-GPTJModel", + "granite": "hf-internal-testing/tiny-random-GraniteForCausalLM", "groupvit": "hf-internal-testing/tiny-random-groupvit", "ibert": "hf-internal-testing/tiny-random-IBertModel", "imagegpt": "hf-internal-testing/tiny-random-ImageGPTModel", From 52da69fbd821d367aef20344fa8e834b95256bed Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Fri, 25 Oct 2024 11:31:42 -0600 Subject: [PATCH 9/9] tests(onnxruntime): Add granite to onnxruntime tests Branch: OnnxGranite Signed-off-by: Gabe Goodhart --- tests/onnxruntime/test_modeling.py | 1 + tests/onnxruntime/utils_onnxruntime_tests.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index 597eb581e2..a335e01447 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -2324,6 +2324,7 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin): "gpt_neo", "gpt_neox", "gptj", + "granite", "llama", "mistral", "mpt", diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index e3d5423785..9f200e69b3 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -104,6 +104,7 @@ "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel", "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM", "gptj": "hf-internal-testing/tiny-random-GPTJForCausalLM", + "granite": "hf-internal-testing/tiny-random-GraniteForCausalLM", "groupvit": "hf-internal-testing/tiny-random-groupvit", "hubert": "hf-internal-testing/tiny-random-HubertModel", "ibert": "hf-internal-testing/tiny-random-IBertModel",