From ff26d625287560ecd46321185525dbc84e885a6f Mon Sep 17 00:00:00 2001
From: Raghu Ramarao <raghu.ramarao@gmail.com>
Date: Tue, 1 Oct 2024 20:20:10 +0530
Subject: [PATCH 01/15] Decision Transformer to ONNX V0.1

---
 docs/source/exporters/onnx/overview.mdx |  1 +
 optimum/exporters/onnx/base.py          |  8 +++++++
 optimum/exporters/onnx/model_configs.py | 18 ++++++++++++++
 optimum/exporters/tasks.py              | 10 ++++++++
 optimum/utils/__init__.py               |  1 +
 optimum/utils/input_generators.py       | 31 +++++++++++++++++++++++++
 optimum/utils/normalized_config.py      |  9 +++++++
 tests/exporters/exporters_utils.py      |  1 +
 8 files changed, 79 insertions(+)

diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx
index 747e1396fb..2eaada7dad 100644
--- a/docs/source/exporters/onnx/overview.mdx
+++ b/docs/source/exporters/onnx/overview.mdx
@@ -36,6 +36,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra
 - Data2VecVision
 - Deberta
 - Deberta-v2
+- Decision Transformer
 - Deit
 - Detr
 - DistilBert
diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py
index 8cd94194ff..ccf3a3f2bd 100644
--- a/optimum/exporters/onnx/base.py
+++ b/optimum/exporters/onnx/base.py
@@ -173,6 +173,14 @@ class OnnxConfig(ExportConfig, ABC):
                 "end_logits": {0: "batch_size", 1: "sequence_length"},
             }
         ),
+        "reinforcement-learning": OrderedDict(
+            {
+                "state_preds": {0: "batch_size", 1: "sequence_length"},
+                "action_preds": {0: "batch_size", 1: "sequence_length"},
+                "return_preds": {0: "batch_size", 1: "sequence_length"},
+                "last_hidden_state": {0: "batch_size", 1: "sequence_length"},
+            }
+        ),
         "semantic-segmentation": OrderedDict({"logits": {0: "batch_size", 1: "num_labels", 2: "height", 3: "width"}}),
         "text2text-generation": OrderedDict({"logits": {0: "batch_size", 1: "decoder_sequence_length"}}),
         "text-classification": OrderedDict({"logits": {0: "batch_size"}}),
diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index 36963a986d..36fc5d1e77 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -27,6 +27,7 @@
     DummyAudioInputGenerator,
     DummyCodegenDecoderTextInputGenerator,
     DummyDecoderTextInputGenerator,
+    DummyDecisionTransformerInputGenerator,
     DummyEncodecInputGenerator,
     DummyInputGenerator,
     DummyIntGenerator,
@@ -256,6 +257,23 @@ class ImageGPTOnnxConfig(GPT2OnnxConfig):
     pass
 
 
+class DecisionTransformerOnnxConfig(GPT2OnnxConfig):
+    DUMMY_INPUT_GENERATOR_CLASSES = (
+        DummyDecisionTransformerInputGenerator,
+    )
+
+    @property
+    def inputs(self) -> Dict[str, Dict[int, str]]:
+        dynamic_axis = {0: "batch_size", 1: "sequence_length"}
+
+        return {
+            'actions': dynamic_axis,
+            'timesteps': dynamic_axis,
+            'attention_mask': dynamic_axis,
+            'returns_to_go': dynamic_axis,
+            'states': dynamic_axis,
+        }
+
 class GPTNeoOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
     DEFAULT_ONNX_OPSET = 14
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(num_attention_heads="num_heads")
diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
index a489f34fb0..1b3236cf65 100644
--- a/optimum/exporters/tasks.py
+++ b/optimum/exporters/tasks.py
@@ -217,6 +217,7 @@ class TasksManager:
             "multiple-choice": "AutoModelForMultipleChoice",
             "object-detection": "AutoModelForObjectDetection",
             "question-answering": "AutoModelForQuestionAnswering",
+            "reinforcement-learning": "AutoModel",
             "semantic-segmentation": "AutoModelForSemanticSegmentation",
             "text-to-audio": ("AutoModelForTextToSpectrogram", "AutoModelForTextToWaveform"),
             "text-generation": "AutoModelForCausalLM",
@@ -562,6 +563,12 @@ class TasksManager:
             onnx="DebertaV2OnnxConfig",
             tflite="DebertaV2TFLiteConfig",
         ),
+        "decision-transformer": supported_tasks_mapping(
+            "feature-extraction",
+            "feature-extraction-with-past",
+            "reinforcement-learning",
+            onnx="DecisionTransformerOnnxConfig",
+        ),
         "deit": supported_tasks_mapping(
             "feature-extraction",
             "image-classification",
@@ -2061,6 +2068,9 @@ def get_model_from_task(
             if original_task == "automatic-speech-recognition" or task == "automatic-speech-recognition":
                 if original_task == "auto" and config.architectures is not None:
                     model_class_name = config.architectures[0]
+            elif original_task == "reinforcement-learning" or task == "reinforcement-learning":
+                if config.architectures is not None:
+                    model_class_name = config.architectures[0]
 
         if library_name == "diffusers":
             config = DiffusionPipeline.load_config(model_name_or_path, **kwargs)
diff --git a/optimum/utils/__init__.py b/optimum/utils/__init__.py
index 5d5044e63e..157f18ba68 100644
--- a/optimum/utils/__init__.py
+++ b/optimum/utils/__init__.py
@@ -49,6 +49,7 @@
     DummyAudioInputGenerator,
     DummyBboxInputGenerator,
     DummyCodegenDecoderTextInputGenerator,
+    DummyDecisionTransformerInputGenerator,
     DummyDecoderTextInputGenerator,
     DummyEncodecInputGenerator,
     DummyInputGenerator,
diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py
index dac14a3811..e6f2059a1f 100644
--- a/optimum/utils/input_generators.py
+++ b/optimum/utils/input_generators.py
@@ -507,6 +507,37 @@ class DummyDecoderTextInputGenerator(DummyTextInputGenerator):
     )
 
 
+class DummyDecisionTransformerInputGenerator(DummyTextInputGenerator):
+    """
+    Generates dummy decision transformer inputs.
+    """
+
+    SUPPORTED_INPUT_NAMES = (
+        'actions',
+        'timesteps',
+        'attention_mask',
+        'returns_to_go',
+        'states',
+    )
+
+    def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
+
+        if input_name == "states":
+            shape = [self.batch_size, self.normalized_config.config.state_dim]
+        elif input_name == "actions":
+            shape = [self.batch_size, self.normalized_config.config.act_dim]
+        elif input_name == 'returns_to_go':
+            shape = [self.batch_size, 1]
+        elif input_name == 'timesteps':
+            shape = [self.normalized_config.config.state_dim, self.batch_size]
+            max_value = self.normalized_config.config.max_ep_len
+            return self.random_int_tensor(shape=shape, max_value = max_value, framework=framework, dtype=int_dtype)
+        elif input_name == "attention_mask":
+            shape = [self.batch_size, self.normalized_config.config.state_dim]
+
+        return self.random_float_tensor(shape, min_value=-1., max_value=1., framework=framework, dtype=float_dtype)
+
+
 class DummySeq2SeqDecoderTextInputGenerator(DummyDecoderTextInputGenerator):
     SUPPORTED_INPUT_NAMES = (
         "decoder_input_ids",
diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py
index 81207b7649..94425fa264 100644
--- a/optimum/utils/normalized_config.py
+++ b/optimum/utils/normalized_config.py
@@ -85,6 +85,14 @@ class NormalizedTextConfig(NormalizedConfig):
     EOS_TOKEN_ID = "eos_token_id"
 
 
+class NormalizedDecisionTransformerConfig(NormalizedConfig):
+    # REFERENCE: https://huggingface.co/docs/transformers/model_doc/decision_transformer
+    STATE_DIM = "state_dim"
+    ACT_DIM = "act_dim"
+    MAX_EP_LEN = "max_ep_len"
+    HIDDEN_SIZE = "hidden_size"
+
+
 class NormalizedTextConfigWithGQA(NormalizedTextConfig):
     NUM_KEY_VALUE_HEADS = "num_key_value_heads"
 
@@ -236,6 +244,7 @@ class NormalizedConfigManager:
         "cvt": NormalizedVisionConfig,
         "deberta": NormalizedTextConfig,
         "deberta-v2": NormalizedTextConfig,
+        "decision-transformer": NormalizedDecisionTransformerConfig,
         "deit": NormalizedVisionConfig,
         "distilbert": NormalizedTextConfig.with_args(num_attention_heads="n_heads", hidden_size="dim"),
         "donut-swin": NormalizedVisionConfig,
diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
index c8a33b0be3..1f05bbf9e7 100644
--- a/tests/exporters/exporters_utils.py
+++ b/tests/exporters/exporters_utils.py
@@ -67,6 +67,7 @@
     "data2vec-audio": "hf-internal-testing/tiny-random-Data2VecAudioModel",
     "deberta": "hf-internal-testing/tiny-random-DebertaModel",
     "deberta-v2": "hf-internal-testing/tiny-random-DebertaV2Model",
+    "decision-transformer": "edbeeching/decision-transformer-gym-hopper-medium",
     "deit": "hf-internal-testing/tiny-random-DeiTModel",
     "donut": "fxmarty/tiny-doc-qa-vision-encoder-decoder",
     "donut-swin": "hf-internal-testing/tiny-random-DonutSwinModel",

From ace12de787389c7aa630a9a47edef6d3cf67bc22 Mon Sep 17 00:00:00 2001
From: Raghu Ramarao <raghu.ramarao@gmail.com>
Date: Fri, 11 Oct 2024 16:50:40 +0530
Subject: [PATCH 02/15] Decision Transformer to ONNX V0.2

---
 optimum/exporters/onnx/base.py          |  8 ++++----
 optimum/exporters/onnx/model_configs.py | 17 +++++++++++------
 optimum/utils/input_generators.py       | 18 +++++++++++-------
 3 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py
index ccf3a3f2bd..568535472f 100644
--- a/optimum/exporters/onnx/base.py
+++ b/optimum/exporters/onnx/base.py
@@ -175,10 +175,10 @@ class OnnxConfig(ExportConfig, ABC):
         ),
         "reinforcement-learning": OrderedDict(
             {
-                "state_preds": {0: "batch_size", 1: "sequence_length"},
-                "action_preds": {0: "batch_size", 1: "sequence_length"},
-                "return_preds": {0: "batch_size", 1: "sequence_length"},
-                "last_hidden_state": {0: "batch_size", 1: "sequence_length"},
+                "state_preds": {0: "batch_size", 1: "sequence_length", 2: "states"},
+                "action_preds": {0: "batch_size", 1: "sequence_length", 2: "actions"},
+                "return_preds": {0: "batch_size", 1: "sequence_length", 2: "returns"},
+                "last_hidden_state": {0: "batch_size", 1: "sequence_length", 2: "last_hidden_state"},
             }
         ),
         "semantic-segmentation": OrderedDict({"logits": {0: "batch_size", 1: "num_labels", 2: "height", 3: "width"}}),
diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index b2932079dd..d31fef36cf 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -18,6 +18,7 @@
 from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union
 
 from packaging import version
+from sipbuild.generator.parser.tokens import states
 from transformers.utils import is_tf_available
 
 from ...onnx import merge_decoders
@@ -264,16 +265,20 @@ class DecisionTransformerOnnxConfig(GPT2OnnxConfig):
 
     @property
     def inputs(self) -> Dict[str, Dict[int, str]]:
-        dynamic_axis = {0: "batch_size", 1: "sequence_length"}
+        DEFAULT_DUMMY_SHAPES['actions'] = self._normalized_config.config.act_dim
+        DEFAULT_DUMMY_SHAPES['states'] = self._normalized_config.config.state_dim
+        DEFAULT_DUMMY_SHAPES['returns'] = 1
+        DEFAULT_DUMMY_SHAPES['last_hidden_state'] = self._normalized_config.config.hidden_size
 
         return {
-            'actions': dynamic_axis,
-            'timesteps': dynamic_axis,
-            'attention_mask': dynamic_axis,
-            'returns_to_go': dynamic_axis,
-            'states': dynamic_axis,
+            'states': {0: 'batch_size', 1: 'sequence_length', 2: 'states'},
+            'actions': {0: 'batch_size', 1: 'sequence_length', 2: 'actions'},
+            'returns_to_go': {0: 'batch_size', 1: 'sequence_length', 2: 'returns'},
+            'timesteps': {0: 'batch_size', 1: 'sequence_length'},
+            'attention_mask': {0: 'batch_size', 1: 'sequence_length'},
         }
 
+
 class GPTNeoOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
     DEFAULT_ONNX_OPSET = 14
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(num_attention_heads="num_heads")
diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py
index e6f2059a1f..808213eee1 100644
--- a/optimum/utils/input_generators.py
+++ b/optimum/utils/input_generators.py
@@ -522,20 +522,24 @@ class DummyDecisionTransformerInputGenerator(DummyTextInputGenerator):
 
     def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
 
+        states = self.normalized_config.config.state_dim
+        actions = self.normalized_config.config.act_dim
+        max_ep_len = self.normalized_config.config.max_ep_len
+
         if input_name == "states":
-            shape = [self.batch_size, self.normalized_config.config.state_dim]
+            shape = [self.batch_size, self.sequence_length, states]
         elif input_name == "actions":
-            shape = [self.batch_size, self.normalized_config.config.act_dim]
+            shape = [self.batch_size, self.sequence_length, actions]
         elif input_name == 'returns_to_go':
-            shape = [self.batch_size, 1]
+            shape = [self.batch_size, self.sequence_length, 1]
         elif input_name == 'timesteps':
-            shape = [self.normalized_config.config.state_dim, self.batch_size]
-            max_value = self.normalized_config.config.max_ep_len
+            shape = [self.batch_size, self.sequence_length]
+            max_value = max_ep_len
             return self.random_int_tensor(shape=shape, max_value = max_value, framework=framework, dtype=int_dtype)
         elif input_name == "attention_mask":
-            shape = [self.batch_size, self.normalized_config.config.state_dim]
+            shape = [self.batch_size, self.sequence_length]
 
-        return self.random_float_tensor(shape, min_value=-1., max_value=1., framework=framework, dtype=float_dtype)
+        return self.random_float_tensor(shape, min_value=-2., max_value=2., framework=framework, dtype=float_dtype)
 
 
 class DummySeq2SeqDecoderTextInputGenerator(DummyDecoderTextInputGenerator):

From 46cc4aa9d7ff5e0bfaf8b2484c84c5b9b1aab8fb Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 11 Oct 2024 14:33:52 +0200
Subject: [PATCH 03/15] Update optimum/exporters/onnx/model_configs.py

---
 optimum/exporters/onnx/model_configs.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index d31fef36cf..6e948e1ad1 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -265,10 +265,6 @@ class DecisionTransformerOnnxConfig(GPT2OnnxConfig):
 
     @property
     def inputs(self) -> Dict[str, Dict[int, str]]:
-        DEFAULT_DUMMY_SHAPES['actions'] = self._normalized_config.config.act_dim
-        DEFAULT_DUMMY_SHAPES['states'] = self._normalized_config.config.state_dim
-        DEFAULT_DUMMY_SHAPES['returns'] = 1
-        DEFAULT_DUMMY_SHAPES['last_hidden_state'] = self._normalized_config.config.hidden_size
 
         return {
             'states': {0: 'batch_size', 1: 'sequence_length', 2: 'states'},

From 8bbfd7b0e79dc933421a29985fc4566ba64b7868 Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 11 Oct 2024 14:34:53 +0200
Subject: [PATCH 04/15] Apply suggestions from code review

---
 optimum/utils/input_generators.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py
index 808213eee1..2f01c60f0e 100644
--- a/optimum/utils/input_generators.py
+++ b/optimum/utils/input_generators.py
@@ -520,24 +520,25 @@ class DummyDecisionTransformerInputGenerator(DummyTextInputGenerator):
         'states',
     )
 
-    def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
 
-        states = self.normalized_config.config.state_dim
-        actions = self.normalized_config.config.act_dim
-        max_ep_len = self.normalized_config.config.max_ep_len
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.act_dim = self.normalized_config.config.act_dim
+        self.state_dim = self.normalized_config.config.state_dim
+        self.max_ep_len = self.normalized_config.config.max_ep_len
 
+    def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
         if input_name == "states":
-            shape = [self.batch_size, self.sequence_length, states]
+            shape = [self.batch_size, self.sequence_length, self.state_dim]
         elif input_name == "actions":
-            shape = [self.batch_size, self.sequence_length, actions]
+            shape = [self.batch_size, self.sequence_length, self.act_dim]
         elif input_name == 'returns_to_go':
             shape = [self.batch_size, self.sequence_length, 1]
-        elif input_name == 'timesteps':
-            shape = [self.batch_size, self.sequence_length]
-            max_value = max_ep_len
-            return self.random_int_tensor(shape=shape, max_value = max_value, framework=framework, dtype=int_dtype)
         elif input_name == "attention_mask":
             shape = [self.batch_size, self.sequence_length]
+        elif input_name == 'timesteps':
+            shape = [self.batch_size, self.sequence_length]
+            return self.random_int_tensor(shape=shape, max_value=max_ep_len, framework=framework, dtype=int_dtype)
 
         return self.random_float_tensor(shape, min_value=-2., max_value=2., framework=framework, dtype=float_dtype)
 

From 2eca16fb339fb3172bb0c90029cfe677088f65b9 Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 11 Oct 2024 14:40:23 +0200
Subject: [PATCH 05/15] Update optimum/exporters/onnx/base.py

---
 optimum/exporters/onnx/base.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py
index 568535472f..4192892f6e 100644
--- a/optimum/exporters/onnx/base.py
+++ b/optimum/exporters/onnx/base.py
@@ -175,9 +175,9 @@ class OnnxConfig(ExportConfig, ABC):
         ),
         "reinforcement-learning": OrderedDict(
             {
-                "state_preds": {0: "batch_size", 1: "sequence_length", 2: "states"},
-                "action_preds": {0: "batch_size", 1: "sequence_length", 2: "actions"},
-                "return_preds": {0: "batch_size", 1: "sequence_length", 2: "returns"},
+                "return_preds": {0: "batch_size", 1: "sequence_length"},
+                "action_preds": {0: "batch_size", 1: "sequence_length", 2: "act_dim"},
+                "state_preds": {0: "batch_size", 1: "sequence_length", 2: "state_dim"},
                 "last_hidden_state": {0: "batch_size", 1: "sequence_length", 2: "last_hidden_state"},
             }
         ),

From f39b68770786d32a514ae84647546113a85a0137 Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 11 Oct 2024 14:42:05 +0200
Subject: [PATCH 06/15] Update optimum/exporters/onnx/model_configs.py

---
 optimum/exporters/onnx/model_configs.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index 6e948e1ad1..f402141939 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -267,11 +267,11 @@ class DecisionTransformerOnnxConfig(GPT2OnnxConfig):
     def inputs(self) -> Dict[str, Dict[int, str]]:
 
         return {
-            'states': {0: 'batch_size', 1: 'sequence_length', 2: 'states'},
-            'actions': {0: 'batch_size', 1: 'sequence_length', 2: 'actions'},
-            'returns_to_go': {0: 'batch_size', 1: 'sequence_length', 2: 'returns'},
             'timesteps': {0: 'batch_size', 1: 'sequence_length'},
+            'returns_to_go': {0: 'batch_size', 1: 'sequence_length'},
             'attention_mask': {0: 'batch_size', 1: 'sequence_length'},
+            'actions': {0: 'batch_size', 1: 'sequence_length', 2: 'act_dim'},
+            'states': {0: 'batch_size', 1: 'sequence_length', 2: 'state_dim'},
         }
 
 

From aa14a294fcbd4eb904439d6672d2cb20b3c8e6d1 Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 11 Oct 2024 14:44:45 +0200
Subject: [PATCH 07/15] Update optimum/utils/input_generators.py

---
 optimum/utils/input_generators.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py
index 2f01c60f0e..d64fbb4f1c 100644
--- a/optimum/utils/input_generators.py
+++ b/optimum/utils/input_generators.py
@@ -520,7 +520,6 @@ class DummyDecisionTransformerInputGenerator(DummyTextInputGenerator):
         'states',
     )
 
-
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.act_dim = self.normalized_config.config.act_dim

From c8ffcd5857fbd8b4df122178c4ec15dcdcd11faa Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 11 Oct 2024 15:27:42 +0200
Subject: [PATCH 08/15] Update optimum/exporters/onnx/model_configs.py

---
 optimum/exporters/onnx/model_configs.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index f402141939..cb60daf5bc 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -18,7 +18,6 @@
 from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union
 
 from packaging import version
-from sipbuild.generator.parser.tokens import states
 from transformers.utils import is_tf_available
 
 from ...onnx import merge_decoders

From 3db58b646d93139f890afacda773576ee7502784 Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 11 Oct 2024 18:09:53 +0200
Subject: [PATCH 09/15] Apply suggestions from code review

---
 optimum/utils/input_generators.py  | 2 +-
 optimum/utils/normalized_config.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py
index d64fbb4f1c..438d5444ff 100644
--- a/optimum/utils/input_generators.py
+++ b/optimum/utils/input_generators.py
@@ -537,7 +537,7 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int
             shape = [self.batch_size, self.sequence_length]
         elif input_name == 'timesteps':
             shape = [self.batch_size, self.sequence_length]
-            return self.random_int_tensor(shape=shape, max_value=max_ep_len, framework=framework, dtype=int_dtype)
+            return self.random_int_tensor(shape=shape, max_value=self.max_ep_len, framework=framework, dtype=int_dtype)
 
         return self.random_float_tensor(shape, min_value=-2., max_value=2., framework=framework, dtype=float_dtype)
 
diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py
index 94425fa264..8cc1a1f3d9 100644
--- a/optimum/utils/normalized_config.py
+++ b/optimum/utils/normalized_config.py
@@ -87,8 +87,9 @@ class NormalizedTextConfig(NormalizedConfig):
 
 class NormalizedDecisionTransformerConfig(NormalizedConfig):
     # REFERENCE: https://huggingface.co/docs/transformers/model_doc/decision_transformer
-    STATE_DIM = "state_dim"
     ACT_DIM = "act_dim"
+    STATE_DIM = "state_dim"
+
     MAX_EP_LEN = "max_ep_len"
     HIDDEN_SIZE = "hidden_size"
 

From 26e61bbd1489a1aec413bdd2868dd9e09aca2521 Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 11 Oct 2024 18:11:38 +0200
Subject: [PATCH 10/15] Update optimum/exporters/tasks.py

---
 optimum/exporters/tasks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
index 1b3236cf65..2786754c46 100644
--- a/optimum/exporters/tasks.py
+++ b/optimum/exporters/tasks.py
@@ -217,7 +217,7 @@ class TasksManager:
             "multiple-choice": "AutoModelForMultipleChoice",
             "object-detection": "AutoModelForObjectDetection",
             "question-answering": "AutoModelForQuestionAnswering",
-            "reinforcement-learning": "AutoModel",
+            "reinforcement-learning": ("AutoModel",), # multiple auto model families can be used for reinforcement-learning
             "semantic-segmentation": "AutoModelForSemanticSegmentation",
             "text-to-audio": ("AutoModelForTextToSpectrogram", "AutoModelForTextToWaveform"),
             "text-generation": "AutoModelForCausalLM",

From 69802e9d82c4861ef7aa0cc07ce3ce7c83f5d790 Mon Sep 17 00:00:00 2001
From: Raghu Ramarao <raghu.ramarao@gmail.com>
Date: Fri, 25 Oct 2024 20:20:23 +0530
Subject: [PATCH 11/15] ONNXToDT: changes to order of OrderedDict elements

---
 optimum/exporters/onnx/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py
index 4192892f6e..44549f9add 100644
--- a/optimum/exporters/onnx/base.py
+++ b/optimum/exporters/onnx/base.py
@@ -175,9 +175,9 @@ class OnnxConfig(ExportConfig, ABC):
         ),
         "reinforcement-learning": OrderedDict(
             {
-                "return_preds": {0: "batch_size", 1: "sequence_length"},
-                "action_preds": {0: "batch_size", 1: "sequence_length", 2: "act_dim"},
                 "state_preds": {0: "batch_size", 1: "sequence_length", 2: "state_dim"},
+                "action_preds": {0: "batch_size", 1: "sequence_length", 2: "act_dim"},
+                "return_preds": {0: "batch_size", 1: "sequence_length"},
                 "last_hidden_state": {0: "batch_size", 1: "sequence_length", 2: "last_hidden_state"},
             }
         ),

From 0696597e061ea571ca6bf877321012115717d090 Mon Sep 17 00:00:00 2001
From: Raghu Ramarao <raghu.ramarao@gmail.com>
Date: Wed, 20 Nov 2024 17:54:02 +0530
Subject: [PATCH 12/15] make style changes

---
 optimum/exporters/onnx/model_configs.py | 17 +++++++----------
 optimum/exporters/tasks.py              |  4 +++-
 optimum/utils/input_generators.py       | 16 ++++++++--------
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index 392c34ca8f..7c0dd3f821 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -27,8 +27,8 @@
     BloomDummyPastKeyValuesGenerator,
     DummyAudioInputGenerator,
     DummyCodegenDecoderTextInputGenerator,
-    DummyDecoderTextInputGenerator,
     DummyDecisionTransformerInputGenerator,
+    DummyDecoderTextInputGenerator,
     DummyEncodecInputGenerator,
     DummyFluxTransformerTextInputGenerator,
     DummyFluxTransformerVisionInputGenerator,
@@ -265,19 +265,16 @@ class ImageGPTOnnxConfig(GPT2OnnxConfig):
 
 
 class DecisionTransformerOnnxConfig(GPT2OnnxConfig):
-    DUMMY_INPUT_GENERATOR_CLASSES = (
-        DummyDecisionTransformerInputGenerator,
-    )
+    DUMMY_INPUT_GENERATOR_CLASSES = (DummyDecisionTransformerInputGenerator,)
 
     @property
     def inputs(self) -> Dict[str, Dict[int, str]]:
-
         return {
-            'timesteps': {0: 'batch_size', 1: 'sequence_length'},
-            'returns_to_go': {0: 'batch_size', 1: 'sequence_length'},
-            'attention_mask': {0: 'batch_size', 1: 'sequence_length'},
-            'actions': {0: 'batch_size', 1: 'sequence_length', 2: 'act_dim'},
-            'states': {0: 'batch_size', 1: 'sequence_length', 2: 'state_dim'},
+            "timesteps": {0: "batch_size", 1: "sequence_length"},
+            "returns_to_go": {0: "batch_size", 1: "sequence_length"},
+            "attention_mask": {0: "batch_size", 1: "sequence_length"},
+            "actions": {0: "batch_size", 1: "sequence_length", 2: "act_dim"},
+            "states": {0: "batch_size", 1: "sequence_length", 2: "state_dim"},
         }
 
 
diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
index 08c5badebe..a4856f936a 100644
--- a/optimum/exporters/tasks.py
+++ b/optimum/exporters/tasks.py
@@ -217,7 +217,9 @@ class TasksManager:
             "multiple-choice": "AutoModelForMultipleChoice",
             "object-detection": "AutoModelForObjectDetection",
             "question-answering": "AutoModelForQuestionAnswering",
-            "reinforcement-learning": ("AutoModel",), # multiple auto model families can be used for reinforcement-learning
+            "reinforcement-learning": (
+                "AutoModel",
+            ),  # multiple auto model families can be used for reinforcement-learning
             "semantic-segmentation": "AutoModelForSemanticSegmentation",
             "text-to-audio": ("AutoModelForTextToSpectrogram", "AutoModelForTextToWaveform"),
             "text-generation": "AutoModelForCausalLM",
diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py
index b9fe8b22ed..a6ce07bab3 100644
--- a/optimum/utils/input_generators.py
+++ b/optimum/utils/input_generators.py
@@ -513,11 +513,11 @@ class DummyDecisionTransformerInputGenerator(DummyTextInputGenerator):
     """
 
     SUPPORTED_INPUT_NAMES = (
-        'actions',
-        'timesteps',
-        'attention_mask',
-        'returns_to_go',
-        'states',
+        "actions",
+        "timesteps",
+        "attention_mask",
+        "returns_to_go",
+        "states",
     )
 
     def __init__(self, *args, **kwargs):
@@ -531,15 +531,15 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int
             shape = [self.batch_size, self.sequence_length, self.state_dim]
         elif input_name == "actions":
             shape = [self.batch_size, self.sequence_length, self.act_dim]
-        elif input_name == 'returns_to_go':
+        elif input_name == "returns_to_go":
             shape = [self.batch_size, self.sequence_length, 1]
         elif input_name == "attention_mask":
             shape = [self.batch_size, self.sequence_length]
-        elif input_name == 'timesteps':
+        elif input_name == "timesteps":
             shape = [self.batch_size, self.sequence_length]
             return self.random_int_tensor(shape=shape, max_value=self.max_ep_len, framework=framework, dtype=int_dtype)
 
-        return self.random_float_tensor(shape, min_value=-2., max_value=2., framework=framework, dtype=float_dtype)
+        return self.random_float_tensor(shape, min_value=-2.0, max_value=2.0, framework=framework, dtype=float_dtype)
 
 
 class DummySeq2SeqDecoderTextInputGenerator(DummyDecoderTextInputGenerator):

From 5b9c8cde6cd41e1ce608fce189cf31d45d674a69 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Wed, 20 Nov 2024 14:26:46 +0100
Subject: [PATCH 13/15] test

---
 optimum/exporters/onnx/base.py          |  8 --------
 optimum/exporters/onnx/model_configs.py | 14 +++++++++++++-
 optimum/exporters/tasks.py              |  5 +----
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py
index 0f99c85831..7e35691d54 100644
--- a/optimum/exporters/onnx/base.py
+++ b/optimum/exporters/onnx/base.py
@@ -173,14 +173,6 @@ class OnnxConfig(ExportConfig, ABC):
                 "end_logits": {0: "batch_size", 1: "sequence_length"},
             }
         ),
-        "reinforcement-learning": OrderedDict(
-            {
-                "state_preds": {0: "batch_size", 1: "sequence_length", 2: "state_dim"},
-                "action_preds": {0: "batch_size", 1: "sequence_length", 2: "act_dim"},
-                "return_preds": {0: "batch_size", 1: "sequence_length"},
-                "last_hidden_state": {0: "batch_size", 1: "sequence_length", 2: "last_hidden_state"},
-            }
-        ),
         "semantic-segmentation": OrderedDict({"logits": {0: "batch_size", 1: "num_labels", 2: "height", 3: "width"}}),
         "text2text-generation": OrderedDict({"logits": {0: "batch_size", 1: "decoder_sequence_length"}}),
         "text-classification": OrderedDict({"logits": {0: "batch_size"}}),
diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index 7c0dd3f821..8ffd524350 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -264,8 +264,11 @@ class ImageGPTOnnxConfig(GPT2OnnxConfig):
     pass
 
 
-class DecisionTransformerOnnxConfig(GPT2OnnxConfig):
+class DecisionTransformerOnnxConfig(OnnxConfig):
     DUMMY_INPUT_GENERATOR_CLASSES = (DummyDecisionTransformerInputGenerator,)
+    NORMALIZED_CONFIG_CLASS = NormalizedConfig.with_args(
+        act_dim="act_dim", state_dim="state_dim", max_ep_len="max_ep_len", hidden_size="hidden_size", allow_new=True
+    )
 
     @property
     def inputs(self) -> Dict[str, Dict[int, str]]:
@@ -277,6 +280,15 @@ def inputs(self) -> Dict[str, Dict[int, str]]:
             "states": {0: "batch_size", 1: "sequence_length", 2: "state_dim"},
         }
 
+    @property
+    def outputs(self) -> Dict[str, Dict[int, str]]:
+        return {
+            "state_preds": {0: "batch_size", 1: "sequence_length", 2: "state_dim"},
+            "action_preds": {0: "batch_size", 1: "sequence_length", 2: "act_dim"},
+            "return_preds": {0: "batch_size", 1: "sequence_length"},
+            "last_hidden_state": {0: "batch_size", 1: "sequence_length", 2: "last_hidden_state"},
+        }
+
 
 class GPTNeoOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
     DEFAULT_ONNX_OPSET = 14
diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
index a4856f936a..8f28ec42ce 100644
--- a/optimum/exporters/tasks.py
+++ b/optimum/exporters/tasks.py
@@ -217,9 +217,7 @@ class TasksManager:
             "multiple-choice": "AutoModelForMultipleChoice",
             "object-detection": "AutoModelForObjectDetection",
             "question-answering": "AutoModelForQuestionAnswering",
-            "reinforcement-learning": (
-                "AutoModel",
-            ),  # multiple auto model families can be used for reinforcement-learning
+            "reinforcement-learning": "AutoModel",
             "semantic-segmentation": "AutoModelForSemanticSegmentation",
             "text-to-audio": ("AutoModelForTextToSpectrogram", "AutoModelForTextToWaveform"),
             "text-generation": "AutoModelForCausalLM",
@@ -579,7 +577,6 @@ class TasksManager:
         ),
         "decision-transformer": supported_tasks_mapping(
             "feature-extraction",
-            "feature-extraction-with-past",
             "reinforcement-learning",
             onnx="DecisionTransformerOnnxConfig",
         ),

From 6ce8071c130b5fbfb2ba73bd55f25f930d4f63f5 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Wed, 20 Nov 2024 14:29:18 +0100
Subject: [PATCH 14/15] remove custom normalized config

---
 optimum/utils/normalized_config.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py
index ef292a3ebe..9ceed24c2d 100644
--- a/optimum/utils/normalized_config.py
+++ b/optimum/utils/normalized_config.py
@@ -85,15 +85,6 @@ class NormalizedTextConfig(NormalizedConfig):
     EOS_TOKEN_ID = "eos_token_id"
 
 
-class NormalizedDecisionTransformerConfig(NormalizedConfig):
-    # REFERENCE: https://huggingface.co/docs/transformers/model_doc/decision_transformer
-    ACT_DIM = "act_dim"
-    STATE_DIM = "state_dim"
-
-    MAX_EP_LEN = "max_ep_len"
-    HIDDEN_SIZE = "hidden_size"
-
-
 class NormalizedTextConfigWithGQA(NormalizedTextConfig):
     NUM_KEY_VALUE_HEADS = "num_key_value_heads"
 
@@ -245,7 +236,6 @@ class NormalizedConfigManager:
         "cvt": NormalizedVisionConfig,
         "deberta": NormalizedTextConfig,
         "deberta-v2": NormalizedTextConfig,
-        "decision-transformer": NormalizedDecisionTransformerConfig,
         "deit": NormalizedVisionConfig,
         "distilbert": NormalizedTextConfig.with_args(num_attention_heads="n_heads", hidden_size="dim"),
         "donut-swin": NormalizedVisionConfig,

From 269dfba3d84c93e79a0e648f5bbcfb23c174e2d1 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Thu, 21 Nov 2024 13:51:58 +0100
Subject: [PATCH 15/15] remove unncessary dynamic axes

---
 optimum/exporters/onnx/model_configs.py | 14 ++++++--------
 optimum/utils/input_generators.py       |  6 ++++--
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index 8ffd524350..bca7cf24ac 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -266,27 +266,25 @@ class ImageGPTOnnxConfig(GPT2OnnxConfig):
 
 class DecisionTransformerOnnxConfig(OnnxConfig):
     DUMMY_INPUT_GENERATOR_CLASSES = (DummyDecisionTransformerInputGenerator,)
-    NORMALIZED_CONFIG_CLASS = NormalizedConfig.with_args(
-        act_dim="act_dim", state_dim="state_dim", max_ep_len="max_ep_len", hidden_size="hidden_size", allow_new=True
-    )
+    NORMALIZED_CONFIG_CLASS = NormalizedConfig
 
     @property
     def inputs(self) -> Dict[str, Dict[int, str]]:
         return {
+            "states": {0: "batch_size", 1: "sequence_length"},
+            "actions": {0: "batch_size", 1: "sequence_length"},
             "timesteps": {0: "batch_size", 1: "sequence_length"},
             "returns_to_go": {0: "batch_size", 1: "sequence_length"},
             "attention_mask": {0: "batch_size", 1: "sequence_length"},
-            "actions": {0: "batch_size", 1: "sequence_length", 2: "act_dim"},
-            "states": {0: "batch_size", 1: "sequence_length", 2: "state_dim"},
         }
 
     @property
     def outputs(self) -> Dict[str, Dict[int, str]]:
         return {
-            "state_preds": {0: "batch_size", 1: "sequence_length", 2: "state_dim"},
-            "action_preds": {0: "batch_size", 1: "sequence_length", 2: "act_dim"},
+            "state_preds": {0: "batch_size", 1: "sequence_length"},
+            "action_preds": {0: "batch_size", 1: "sequence_length"},
             "return_preds": {0: "batch_size", 1: "sequence_length"},
-            "last_hidden_state": {0: "batch_size", 1: "sequence_length", 2: "last_hidden_state"},
+            "last_hidden_state": {0: "batch_size", 1: "sequence_length"},
         }
 
 
diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py
index a6ce07bab3..0ac1805f97 100644
--- a/optimum/utils/input_generators.py
+++ b/optimum/utils/input_generators.py
@@ -513,11 +513,11 @@ class DummyDecisionTransformerInputGenerator(DummyTextInputGenerator):
     """
 
     SUPPORTED_INPUT_NAMES = (
+        "states",
         "actions",
         "timesteps",
-        "attention_mask",
         "returns_to_go",
-        "states",
+        "attention_mask",
     )
 
     def __init__(self, *args, **kwargs):
@@ -531,6 +531,8 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int
             shape = [self.batch_size, self.sequence_length, self.state_dim]
         elif input_name == "actions":
             shape = [self.batch_size, self.sequence_length, self.act_dim]
+        elif input_name == "rewards":
+            shape = [self.batch_size, self.sequence_length, 1]
         elif input_name == "returns_to_go":
             shape = [self.batch_size, self.sequence_length, 1]
         elif input_name == "attention_mask":