From bbf7d821950b7c81f07af34e0faa64bbcd9e4e2b Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Thu, 12 Sep 2024 14:04:44 -0600
Subject: [PATCH 1/9] feat(exporters/onnx): Add GraniteOnnxConfig and task
 support list

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 optimum/exporters/onnx/model_configs.py | 10 ++++++++++
 optimum/exporters/tasks.py              |  7 +++++++
 2 files changed, 17 insertions(+)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index 9e57128c27..51dab27266 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -298,6 +298,16 @@ class GemmaOnnxConfig(LlamaOnnxConfig):
     pass
 
 
+class GraniteOnnxConfig(TextDecoderOnnxConfig):
+    # GG TODO: Bump past 4.44.2 once the next release is out
+    MIN_TRANSFORMERS_VERSION = version.parse("4.44.2")
+    DEFAULT_ONNX_OPSET = 14  # Granite follows Llama's default
+
+    DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, MistralDummyPastKeyValuesGenerator)
+    DUMMY_PKV_GENERATOR_CLASS = MistralDummyPastKeyValuesGenerator
+    NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
+
+
 class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
     DEFAULT_ONNX_OPSET = 14  # Phi now uses F.scaled_dot_product_attention by default for torch>=2.1.1.
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
index a489f34fb0..fdc8bfcb53 100644
--- a/optimum/exporters/tasks.py
+++ b/optimum/exporters/tasks.py
@@ -915,6 +915,13 @@ class TasksManager:
             "text-classification",
             onnx="LlamaOnnxConfig",
         ),
+        "granite": supported_tasks_mapping(
+            "feature-extraction",
+            "feature-extraction-with-past",
+            "text-generation",
+            "text-generation-with-past",
+            onnx="GraniteOnnxConfig",
+        ),
         "pegasus": supported_tasks_mapping(
             "feature-extraction",
             "feature-extraction-with-past",

From 63f66fcd7ab06923d953415f6930c6e5d61f4798 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Thu, 12 Sep 2024 14:05:24 -0600
Subject: [PATCH 2/9] feat: Add granite's normalized config for inference

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 optimum/utils/normalized_config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py
index 81207b7649..9ceed24c2d 100644
--- a/optimum/utils/normalized_config.py
+++ b/optimum/utils/normalized_config.py
@@ -281,6 +281,7 @@ class NormalizedConfigManager:
         "xlm-roberta": NormalizedTextConfig,
         "yolos": NormalizedVisionConfig,
         "qwen2": NormalizedTextConfig,
+        "granite": NormalizedTextConfigWithGQA,
     }
 
     @classmethod

From 767fe05ad82f1f1ae3a29676fb4081bb1e0df820 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Thu, 12 Sep 2024 15:54:40 -0600
Subject: [PATCH 3/9] feat(onnx opt): Add onnx optimization support for granite

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 optimum/onnxruntime/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/optimum/onnxruntime/utils.py b/optimum/onnxruntime/utils.py
index 128e2406f1..9e92e0bd32 100644
--- a/optimum/onnxruntime/utils.py
+++ b/optimum/onnxruntime/utils.py
@@ -128,6 +128,7 @@ class ORTConfigManager:
         "gpt-neo": "gpt2",
         "gpt-neox": "gpt2",
         "gptj": "gpt2",
+        "granite": "gpt2",
         # longt5 with O4 results in segmentation fault
         "longt5": "bert",
         "llama": "gpt2",

From 1ab66dfee6a4da97edc7039921f2b48097a291bf Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Tue, 8 Oct 2024 12:58:48 -0600
Subject: [PATCH 4/9] fix(onnx/granite): Use LlamaOnnxConfig as the base for
 GraniteOnnxConfig

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 optimum/exporters/onnx/model_configs.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index 51dab27266..46c127acd9 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -298,14 +298,8 @@ class GemmaOnnxConfig(LlamaOnnxConfig):
     pass
 
 
-class GraniteOnnxConfig(TextDecoderOnnxConfig):
-    # GG TODO: Bump past 4.44.2 once the next release is out
-    MIN_TRANSFORMERS_VERSION = version.parse("4.44.2")
-    DEFAULT_ONNX_OPSET = 14  # Granite follows Llama's default
-
-    DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, MistralDummyPastKeyValuesGenerator)
-    DUMMY_PKV_GENERATOR_CLASS = MistralDummyPastKeyValuesGenerator
-    NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
+class GraniteOnnxConfig(LlamaOnnxConfig):
+    MIN_TRANSFORMERS_VERSION = version.parse("4.45.0")
 
 
 class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):

From 4fc9ead2977a322b97e0fb1da78397a966992524 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Tue, 8 Oct 2024 13:00:11 -0600
Subject: [PATCH 5/9] fix(onnxruntime): Add "granite" to list of model types
 with grouped attention

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 optimum/onnxruntime/modeling_decoder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py
index 984d7f22eb..8f1d062221 100644
--- a/optimum/onnxruntime/modeling_decoder.py
+++ b/optimum/onnxruntime/modeling_decoder.py
@@ -340,7 +340,7 @@ def prepare_past_key_values(
             if self.model_type == "gemma":
                 num_attention_heads = self.normalized_config.num_key_value_heads
                 embed_size_per_head = self.normalized_config.head_dim
-            elif self.model_type in {"mistral", "llama", "qwen2"}:
+            elif self.model_type in {"mistral", "llama", "qwen2", "granite"}:
                 num_attention_heads = self.normalized_config.num_key_value_heads
             else:
                 num_attention_heads = self.normalized_config.num_attention_heads

From e6e56641f0f2f99169a8604723149ad57a3f79ab Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Tue, 8 Oct 2024 14:51:57 -0600
Subject: [PATCH 6/9] fix: Add granite to the list of models that require
 position_ids

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 optimum/exporters/onnx/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/optimum/exporters/onnx/utils.py b/optimum/exporters/onnx/utils.py
index 56249bbf5c..19e24f8874 100644
--- a/optimum/exporters/onnx/utils.py
+++ b/optimum/exporters/onnx/utils.py
@@ -86,6 +86,7 @@
     "phi",
     "phi3",
     "qwen2",
+    "granite",
 }
 
 

From 551c47ba985e315dd5527e81a2fc2074caaba4d9 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Tue, 22 Oct 2024 16:17:17 -0600
Subject: [PATCH 7/9] fix(granite): Add MIN_TORCH_VERSION for recently fixed
 torch bug

https://github.com/huggingface/optimum/pull/2043#issuecomment-2427975461

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 optimum/exporters/onnx/model_configs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index 46c127acd9..cc752779d3 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -300,6 +300,7 @@ class GemmaOnnxConfig(LlamaOnnxConfig):
 
 class GraniteOnnxConfig(LlamaOnnxConfig):
     MIN_TRANSFORMERS_VERSION = version.parse("4.45.0")
+    MIN_TORCH_VERSION = version.parse("2.5.0")
 
 
 class PhiOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):

From 9529b51c0060ed7340b1470e7a99e869158688d5 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Tue, 22 Oct 2024 16:17:45 -0600
Subject: [PATCH 8/9] test(granite): Add tiny random granite test for onnx
 exporter

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 tests/exporters/exporters_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
index c8a33b0be3..ccccb5510b 100644
--- a/tests/exporters/exporters_utils.py
+++ b/tests/exporters/exporters_utils.py
@@ -100,6 +100,7 @@
     "gpt-neo": "hf-internal-testing/tiny-random-GPTNeoModel",
     "gpt-neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM",
     "gptj": "hf-internal-testing/tiny-random-GPTJModel",
+    "granite": "hf-internal-testing/tiny-random-GraniteForCausalLM",
     "groupvit": "hf-internal-testing/tiny-random-groupvit",
     "ibert": "hf-internal-testing/tiny-random-IBertModel",
     "imagegpt": "hf-internal-testing/tiny-random-ImageGPTModel",

From 52da69fbd821d367aef20344fa8e834b95256bed Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Fri, 25 Oct 2024 11:31:42 -0600
Subject: [PATCH 9/9] tests(onnxruntime): Add granite to onnxruntime tests

Branch: OnnxGranite

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
---
 tests/onnxruntime/test_modeling.py           | 1 +
 tests/onnxruntime/utils_onnxruntime_tests.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
index 597eb581e2..a335e01447 100644
--- a/tests/onnxruntime/test_modeling.py
+++ b/tests/onnxruntime/test_modeling.py
@@ -2324,6 +2324,7 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin):
         "gpt_neo",
         "gpt_neox",
         "gptj",
+        "granite",
         "llama",
         "mistral",
         "mpt",
diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py
index e3d5423785..9f200e69b3 100644
--- a/tests/onnxruntime/utils_onnxruntime_tests.py
+++ b/tests/onnxruntime/utils_onnxruntime_tests.py
@@ -104,6 +104,7 @@
     "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel",
     "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM",
     "gptj": "hf-internal-testing/tiny-random-GPTJForCausalLM",
+    "granite": "hf-internal-testing/tiny-random-GraniteForCausalLM",
     "groupvit": "hf-internal-testing/tiny-random-groupvit",
     "hubert": "hf-internal-testing/tiny-random-HubertModel",
     "ibert": "hf-internal-testing/tiny-random-IBertModel",