From b1b5dba702ae76a735b1bb0b34f166f5182fa179 Mon Sep 17 00:00:00 2001 From: JingyaHuang Date: Wed, 10 Jan 2024 16:11:56 +0000 Subject: [PATCH 1/2] improve dummy mask --- .../onnxruntime/usage_guides/optimization.mdx | 6 ++ optimum/utils/input_generators.py | 66 ++++++++++++++++++- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/docs/source/onnxruntime/usage_guides/optimization.mdx b/docs/source/onnxruntime/usage_guides/optimization.mdx index f3868b6a72d..5fff6fcf8b6 100644 --- a/docs/source/onnxruntime/usage_guides/optimization.mdx +++ b/docs/source/onnxruntime/usage_guides/optimization.mdx @@ -79,6 +79,12 @@ Here is a list of the possible optimizations you can enable: - Add Bias and Gelu / FastGelu fusion with `disable_bias_gelu_fusion=False`, - Gelu approximation with `enable_gelu_approximation=True`. + + +Attention fusion is designed for right-side padding for BERT-like architectures (eg. BERT, RoBERTa, VIT, etc.) and for left-side padding for generative models (GPT-like). If you are not following the convention, please set `use_raw_attention_mask=True` to avoid potential accuracy issues but sacrifice the performance. + + + While [`~onnxruntime.configuration.OptimizationConfig`] gives you full control on how to do optimization, it can be hard to know what to enable / disable. Instead, you can use [`~onnxruntime.configuration.AutoOptimizationConfig`] which provides four common optimization levels: - O1: basic general optimizations. - O2: basic and extended general optimizations, transformers-specific fusions. diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py index 2fa98eed1b6..a01bf986612 100644 --- a/optimum/utils/input_generators.py +++ b/optimum/utils/input_generators.py @@ -180,6 +180,58 @@ def random_int_tensor( else: return np.random.randint(min_value, high=max_value, size=shape, dtype=DTYPE_MAPPER.np(dtype)) + @staticmethod + @check_framework_is_available + def random_mask_tensor(shape: List[int], padding_side: str = "right", framework: str = "pt", dtype: str = "int64"): + """ + Generates a mask tensor either right or left padded. + + Args: + shape (`List[int]`): + The shape of the random tensor. + padding_side (`str`, defaults to "right"): + The side on which the padding is applied. + framework (`str`, defaults to `"pt"`): + The requested framework. + dtype (`str`, defaults to `"int64"`): + The dtype of the generated integer tensor. Could be "int64", "int32", "int8". + + Returns: + A random mask tensor either left padded or right padded in the requested framework. + """ + mask_length = random.randint(1, shape[1] - 1) + if framework == "pt": + mask_tensor = torch.cat( + [ + torch.ones(shape[0], shape[1] - mask_length, dtype=DTYPE_MAPPER.pt(dtype)), + torch.zeros(shape[0], mask_length, dtype=DTYPE_MAPPER.pt(dtype)), + ], + dim=1, + ) + if padding_side == "left": + mask_tensor = torch.flip(mask_tensor, [1]) + elif framework == "tf": + mask_tensor = tf.concat( + [ + tf.ones((shape[0], shape[1] - mask_length), dtype=DTYPE_MAPPER.tf(dtype)), + tf.zeros((shape[0], mask_length), dtype=DTYPE_MAPPER.tf(dtype)), + ], + axis=1, + ) + if padding_side == "left": + mask_tensor = tf.reverse(mask_tensor, [1]) + else: + mask_tensor = np.concatenate( + [ + np.ones((shape[0], shape[1] - mask_length), dtype=DTYPE_MAPPER.np(dtype)), + np.zeros((shape[0], mask_length), dtype=DTYPE_MAPPER.np(dtype)), + ], + axis=1, + ) + if padding_side == "left": + mask_tensor = np.flip(mask_tensor, [1]) + return mask_tensor + @staticmethod @check_framework_is_available def random_float_tensor( @@ -364,13 +416,23 @@ def __init__( else: self.num_choices = num_choices - def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + def generate( + self, + input_name: str, + framework: str = "pt", + int_dtype: str = "int64", + float_dtype: str = "fp32", + padding_side: str = "right", + ): min_value = 0 max_value = 2 if input_name != "input_ids" else self.vocab_size shape = [self.batch_size, self.sequence_length] if self.task == "multiple-choice": shape = [self.batch_size, self.num_choices, self.sequence_length] - return self.random_int_tensor(shape, max_value, min_value=min_value, framework=framework, dtype=int_dtype) + if "mask" in input_name: + return self.random_mask_tensor(shape, padding_side=padding_side, framework=framework, dtype=int_dtype) + else: + return self.random_int_tensor(shape, max_value, min_value=min_value, framework=framework, dtype=int_dtype) class DummyDecoderTextInputGenerator(DummyTextInputGenerator): From 9d9a78121fad612ff8c44313daab2c561eccc3b4 Mon Sep 17 00:00:00 2001 From: JingyaHuang Date: Wed, 10 Jan 2024 23:48:19 +0000 Subject: [PATCH 2/2] apply suggestion and fix --- optimum/utils/input_generators.py | 32 ++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py index a01bf986612..1a9024db7a1 100644 --- a/optimum/utils/input_generators.py +++ b/optimum/utils/input_generators.py @@ -199,37 +199,38 @@ def random_mask_tensor(shape: List[int], padding_side: str = "right", framework: Returns: A random mask tensor either left padded or right padded in the requested framework. """ - mask_length = random.randint(1, shape[1] - 1) + shape = tuple(shape) + mask_length = random.randint(1, shape[-1] - 1) if framework == "pt": mask_tensor = torch.cat( [ - torch.ones(shape[0], shape[1] - mask_length, dtype=DTYPE_MAPPER.pt(dtype)), - torch.zeros(shape[0], mask_length, dtype=DTYPE_MAPPER.pt(dtype)), + torch.ones(*shape[:-1], shape[-1] - mask_length, dtype=DTYPE_MAPPER.pt(dtype)), + torch.zeros(*shape[:-1], mask_length, dtype=DTYPE_MAPPER.pt(dtype)), ], - dim=1, + dim=-1, ) if padding_side == "left": - mask_tensor = torch.flip(mask_tensor, [1]) + mask_tensor = torch.flip(mask_tensor, [-1]) elif framework == "tf": mask_tensor = tf.concat( [ - tf.ones((shape[0], shape[1] - mask_length), dtype=DTYPE_MAPPER.tf(dtype)), - tf.zeros((shape[0], mask_length), dtype=DTYPE_MAPPER.tf(dtype)), + tf.ones((*shape[:-1], shape[-1] - mask_length), dtype=DTYPE_MAPPER.tf(dtype)), + tf.zeros((*shape[:-1], mask_length), dtype=DTYPE_MAPPER.tf(dtype)), ], - axis=1, + axis=-1, ) if padding_side == "left": - mask_tensor = tf.reverse(mask_tensor, [1]) + mask_tensor = tf.reverse(mask_tensor, [-1]) else: mask_tensor = np.concatenate( [ - np.ones((shape[0], shape[1] - mask_length), dtype=DTYPE_MAPPER.np(dtype)), - np.zeros((shape[0], mask_length), dtype=DTYPE_MAPPER.np(dtype)), + np.ones((*shape[:-1], shape[-1] - mask_length), dtype=DTYPE_MAPPER.np(dtype)), + np.zeros((*shape[:-1], mask_length), dtype=DTYPE_MAPPER.np(dtype)), ], - axis=1, + axis=-1, ) if padding_side == "left": - mask_tensor = np.flip(mask_tensor, [1]) + mask_tensor = np.flip(mask_tensor, [-1]) return mask_tensor @staticmethod @@ -396,6 +397,7 @@ def __init__( random_batch_size_range: Optional[Tuple[int, int]] = None, random_sequence_length_range: Optional[Tuple[int, int]] = None, random_num_choices_range: Optional[Tuple[int, int]] = None, + padding_side: str = "right", **kwargs, ): self.task = task @@ -415,6 +417,7 @@ def __init__( self.num_choices = random.randint(low, high) else: self.num_choices = num_choices + self.padding_side = padding_side def generate( self, @@ -422,7 +425,6 @@ def generate( framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32", - padding_side: str = "right", ): min_value = 0 max_value = 2 if input_name != "input_ids" else self.vocab_size @@ -430,7 +432,7 @@ def generate( if self.task == "multiple-choice": shape = [self.batch_size, self.num_choices, self.sequence_length] if "mask" in input_name: - return self.random_mask_tensor(shape, padding_side=padding_side, framework=framework, dtype=int_dtype) + return self.random_mask_tensor(shape, padding_side=self.padding_side, framework=framework, dtype=int_dtype) else: return self.random_int_tensor(shape, max_value, min_value=min_value, framework=framework, dtype=int_dtype)