From b1b5dba702ae76a735b1bb0b34f166f5182fa179 Mon Sep 17 00:00:00 2001
From: JingyaHuang <jingya@huggingface.co>
Date: Wed, 10 Jan 2024 16:11:56 +0000
Subject: [PATCH 1/2] improve dummy mask

---
 .../onnxruntime/usage_guides/optimization.mdx |  6 ++
 optimum/utils/input_generators.py             | 66 ++++++++++++++++++-
 2 files changed, 70 insertions(+), 2 deletions(-)
diff --git a/docs/source/onnxruntime/usage_guides/optimization.mdx b/docs/source/onnxruntime/usage_guides/optimization.mdx
index f3868b6a72d..5fff6fcf8b6 100644
--- a/docs/source/onnxruntime/usage_guides/optimization.mdx
+++ b/docs/source/onnxruntime/usage_guides/optimization.mdx
@@ -79,6 +79,12 @@ Here is a list of the possible optimizations you can enable:
 - Add Bias and Gelu / FastGelu fusion with `disable_bias_gelu_fusion=False`,
 - Gelu approximation with `enable_gelu_approximation=True`.
 
+<Tip>
+
+Attention fusion is designed for right-side padding for BERT-like architectures (eg. BERT, RoBERTa, VIT, etc.) and for left-side padding for generative models (GPT-like). If you are not following the convention, please set `use_raw_attention_mask=True` to avoid potential accuracy issues but sacrifice the performance.
+
+</Tip>
+
 While [`~onnxruntime.configuration.OptimizationConfig`] gives you full control on how to do optimization, it can be hard to know what to enable / disable. Instead, you can use [`~onnxruntime.configuration.AutoOptimizationConfig`] which provides four common optimization levels:
 - O1: basic general optimizations.
 - O2: basic and extended general optimizations, transformers-specific fusions.
diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py
index 2fa98eed1b6..a01bf986612 100644
--- a/optimum/utils/input_generators.py
+++ b/optimum/utils/input_generators.py
@@ -180,6 +180,58 @@ def random_int_tensor(
         else:
             return np.random.randint(min_value, high=max_value, size=shape, dtype=DTYPE_MAPPER.np(dtype))
 
+    @staticmethod
+    @check_framework_is_available
+    def random_mask_tensor(shape: List[int], padding_side: str = "right", framework: str = "pt", dtype: str = "int64"):
+        """
+        Generates a mask tensor either right or left padded.
+
+        Args:
+            shape (`List[int]`):
+                The shape of the random tensor.
+            padding_side (`str`, defaults to "right"):
+                The side on which the padding is applied.
+            framework (`str`, defaults to `"pt"`):
+                The requested framework.
+            dtype (`str`, defaults to `"int64"`):
+                The dtype of the generated integer tensor. Could be "int64", "int32", "int8".
+
+        Returns:
+            A random mask tensor either left padded or right padded in the requested framework.
+        """
+        mask_length = random.randint(1, shape[1] - 1)
+        if framework == "pt":
+            mask_tensor = torch.cat(
+                [
+                    torch.ones(shape[0], shape[1] - mask_length, dtype=DTYPE_MAPPER.pt(dtype)),
+                    torch.zeros(shape[0], mask_length, dtype=DTYPE_MAPPER.pt(dtype)),
+                ],
+                dim=1,
+            )
+            if padding_side == "left":
+                mask_tensor = torch.flip(mask_tensor, [1])
+        elif framework == "tf":
+            mask_tensor = tf.concat(
+                [
+                    tf.ones((shape[0], shape[1] - mask_length), dtype=DTYPE_MAPPER.tf(dtype)),
+                    tf.zeros((shape[0], mask_length), dtype=DTYPE_MAPPER.tf(dtype)),
+                ],
+                axis=1,
+            )
+            if padding_side == "left":
+                mask_tensor = tf.reverse(mask_tensor, [1])
+        else:
+            mask_tensor = np.concatenate(
+                [
+                    np.ones((shape[0], shape[1] - mask_length), dtype=DTYPE_MAPPER.np(dtype)),
+                    np.zeros((shape[0], mask_length), dtype=DTYPE_MAPPER.np(dtype)),
+                ],
+                axis=1,
+            )
+            if padding_side == "left":
+                mask_tensor = np.flip(mask_tensor, [1])
+        return mask_tensor
+
     @staticmethod
     @check_framework_is_available
     def random_float_tensor(
@@ -364,13 +416,23 @@ def __init__(
         else:
             self.num_choices = num_choices
 
-    def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
+    def generate(
+        self,
+        input_name: str,
+        framework: str = "pt",
+        int_dtype: str = "int64",
+        float_dtype: str = "fp32",
+        padding_side: str = "right",
+    ):
         min_value = 0
         max_value = 2 if input_name != "input_ids" else self.vocab_size
         shape = [self.batch_size, self.sequence_length]
         if self.task == "multiple-choice":
             shape = [self.batch_size, self.num_choices, self.sequence_length]
-        return self.random_int_tensor(shape, max_value, min_value=min_value, framework=framework, dtype=int_dtype)
+        if "mask" in input_name:
+            return self.random_mask_tensor(shape, padding_side=padding_side, framework=framework, dtype=int_dtype)
+        else:
+            return self.random_int_tensor(shape, max_value, min_value=min_value, framework=framework, dtype=int_dtype)
 
 
 class DummyDecoderTextInputGenerator(DummyTextInputGenerator):

From 9d9a78121fad612ff8c44313daab2c561eccc3b4 Mon Sep 17 00:00:00 2001
From: JingyaHuang <jingya@huggingface.co>
Date: Wed, 10 Jan 2024 23:48:19 +0000
Subject: [PATCH 2/2] apply suggestion and fix

---
 optimum/utils/input_generators.py | 32 ++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py
index a01bf986612..1a9024db7a1 100644
--- a/optimum/utils/input_generators.py
+++ b/optimum/utils/input_generators.py
@@ -199,37 +199,38 @@ def random_mask_tensor(shape: List[int], padding_side: str = "right", framework:
         Returns:
             A random mask tensor either left padded or right padded in the requested framework.
         """
-        mask_length = random.randint(1, shape[1] - 1)
+        shape = tuple(shape)
+        mask_length = random.randint(1, shape[-1] - 1)
         if framework == "pt":
             mask_tensor = torch.cat(
                 [
-                    torch.ones(shape[0], shape[1] - mask_length, dtype=DTYPE_MAPPER.pt(dtype)),
-                    torch.zeros(shape[0], mask_length, dtype=DTYPE_MAPPER.pt(dtype)),
+                    torch.ones(*shape[:-1], shape[-1] - mask_length, dtype=DTYPE_MAPPER.pt(dtype)),
+                    torch.zeros(*shape[:-1], mask_length, dtype=DTYPE_MAPPER.pt(dtype)),
                 ],
-                dim=1,
+                dim=-1,
             )
             if padding_side == "left":
-                mask_tensor = torch.flip(mask_tensor, [1])
+                mask_tensor = torch.flip(mask_tensor, [-1])
         elif framework == "tf":
             mask_tensor = tf.concat(
                 [
-                    tf.ones((shape[0], shape[1] - mask_length), dtype=DTYPE_MAPPER.tf(dtype)),
-                    tf.zeros((shape[0], mask_length), dtype=DTYPE_MAPPER.tf(dtype)),
+                    tf.ones((*shape[:-1], shape[-1] - mask_length), dtype=DTYPE_MAPPER.tf(dtype)),
+                    tf.zeros((*shape[:-1], mask_length), dtype=DTYPE_MAPPER.tf(dtype)),
                 ],
-                axis=1,
+                axis=-1,
             )
             if padding_side == "left":
-                mask_tensor = tf.reverse(mask_tensor, [1])
+                mask_tensor = tf.reverse(mask_tensor, [-1])
         else:
             mask_tensor = np.concatenate(
                 [
-                    np.ones((shape[0], shape[1] - mask_length), dtype=DTYPE_MAPPER.np(dtype)),
-                    np.zeros((shape[0], mask_length), dtype=DTYPE_MAPPER.np(dtype)),
+                    np.ones((*shape[:-1], shape[-1] - mask_length), dtype=DTYPE_MAPPER.np(dtype)),
+                    np.zeros((*shape[:-1], mask_length), dtype=DTYPE_MAPPER.np(dtype)),
                 ],
-                axis=1,
+                axis=-1,
             )
             if padding_side == "left":
-                mask_tensor = np.flip(mask_tensor, [1])
+                mask_tensor = np.flip(mask_tensor, [-1])
         return mask_tensor
 
     @staticmethod
@@ -396,6 +397,7 @@ def __init__(
         random_batch_size_range: Optional[Tuple[int, int]] = None,
         random_sequence_length_range: Optional[Tuple[int, int]] = None,
         random_num_choices_range: Optional[Tuple[int, int]] = None,
+        padding_side: str = "right",
         **kwargs,
     ):
         self.task = task
@@ -415,6 +417,7 @@ def __init__(
             self.num_choices = random.randint(low, high)
         else:
             self.num_choices = num_choices
+        self.padding_side = padding_side
 
     def generate(
         self,
@@ -422,7 +425,6 @@ def generate(
         framework: str = "pt",
         int_dtype: str = "int64",
         float_dtype: str = "fp32",
-        padding_side: str = "right",
     ):
         min_value = 0
         max_value = 2 if input_name != "input_ids" else self.vocab_size
@@ -430,7 +432,7 @@ def generate(
         if self.task == "multiple-choice":
             shape = [self.batch_size, self.num_choices, self.sequence_length]
         if "mask" in input_name:
-            return self.random_mask_tensor(shape, padding_side=padding_side, framework=framework, dtype=int_dtype)
+            return self.random_mask_tensor(shape, padding_side=self.padding_side, framework=framework, dtype=int_dtype)
         else:
             return self.random_int_tensor(shape, max_value, min_value=min_value, framework=framework, dtype=int_dtype)