Skip to content

Commit

Permalink
Fix functional TF Whisper and modernize tests (huggingface#24301)
Browse files Browse the repository at this point in the history
* Revert whisper change and modify the test_compile_tf_model test

* make fixup

* Tweak test slightly

* Add functional model saving to test

* Ensure TF can infer shapes for data2vec

* Add override for efficientformer

* Mark test as slow
  • Loading branch information
Rocketknight1 authored Jun 16, 2023
1 parent ba3fb4b commit 62d71f4
Show file tree
Hide file tree
Showing 13 changed files with 40 additions and 300 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1383,11 +1383,11 @@ def call(
# only keep certain features, and reshape
# note that we do +1 as the encoder_hidden_states also includes the initial embeddings
features = [feature for idx, feature in enumerate(encoder_hidden_states) if idx + 1 in self.config.out_indices]
batch_size = shape_list(pixel_values)[0]
patch_resolution = self.config.image_size // self.config.patch_size

def reshape_features(x):
x = tf.reshape(x, (batch_size, patch_resolution, patch_resolution, -1))
# We do it this way so TF can always infer the non-batch dims at compile time
x = tf.reshape(x, (-1, patch_resolution, patch_resolution, self.config.hidden_size))
return x

features = [reshape_features(x[:, 1:, :]) for x in features]
Expand Down
11 changes: 5 additions & 6 deletions src/transformers/models/whisper/modeling_tf_whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,12 +766,11 @@ def _prepare_decoder_attention_mask(self, attention_mask, input_shape, past_key_
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
batch_size, seq_len = input_shape[0], input_shape[1]

if seq_len > 1:
combined_attention_mask = _make_causal_mask(input_shape, past_key_values_length=past_key_values_length)
else:
combined_attention_mask = _expand_mask(
tf.ones((batch_size, seq_len + past_key_values_length)), tgt_len=seq_len
)
combined_attention_mask = tf.cond(
tf.math.greater(seq_len, 1),
lambda: _make_causal_mask(input_shape, past_key_values_length=past_key_values_length),
lambda: _expand_mask(tf.ones((batch_size, seq_len + past_key_values_length)), tgt_len=seq_len),
)

if attention_mask is not None:
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
Expand Down
4 changes: 0 additions & 4 deletions tests/models/data2vec/test_modeling_tf_data2vec_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,10 +240,6 @@ def test_for_image_segmentation(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_image_segmentation(*config_and_inputs)

@unittest.skip("Test was written for TF 1.x and isn't really relevant here")
def test_compile_tf_model(self):
pass

def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
Expand Down
18 changes: 18 additions & 0 deletions tests/models/efficientformer/test_modeling_tf_efficientformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,24 @@ def test_attention_outputs(self):
[self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
)

def test_compile_tf_model(self):
# We use a simplified version of this test for EfficientFormer because it requires training=False
# and Keras refuses to let us force that during functional construction
config, _ = self.model_tester.prepare_config_and_inputs_for_common()

for model_class in self.all_model_classes:
# Prepare our model
model = model_class(config)
# These are maximally general inputs for the model, with multiple None dimensions
# Hopefully this will catch any conditionals that fail for flexible shapes
functional_inputs = {
key: tf.keras.Input(shape=val.shape[1:], dtype=val.dtype, name=key)
for key, val in model.input_signature.items()
if key in model.dummy_inputs
}
outputs_dict = model(functional_inputs)
self.assertTrue(outputs_dict is not None)


# We will verify our results on an image of cute cats
def prepare_img():
Expand Down
4 changes: 0 additions & 4 deletions tests/models/funnel/test_modeling_tf_funnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,10 +390,6 @@ def test_for_question_answering(self):
def test_saved_model_creation(self):
pass

def test_compile_tf_model(self):
# This test fails the CI. TODO Lysandre re-enable it
pass


@require_tf
class TFFunnelBaseModelTest(TFModelTesterMixin, unittest.TestCase):
Expand Down
49 changes: 0 additions & 49 deletions tests/models/lxmert/test_modeling_tf_lxmert.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,55 +532,6 @@ def test_save_load(self):

self.assert_outputs_same(after_outputs, outputs)

def test_compile_tf_model(self):
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy")

for model_class in self.all_model_classes:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
return_obj_labels="PreTraining" in model_class.__name__
)

input_ids = tf.keras.Input(
batch_shape=(self.model_tester.batch_size, self.model_tester.seq_length),
name="input_ids",
dtype="int32",
)
visual_feats = tf.keras.Input(
batch_shape=(
self.model_tester.batch_size,
self.model_tester.num_visual_features,
self.model_tester.visual_feat_dim,
),
name="visual_feats",
dtype="int32",
)
visual_pos = tf.keras.Input(
batch_shape=(self.model_tester.batch_size, self.model_tester.num_visual_features, 4),
name="visual_pos",
dtype="int32",
)

# Prepare our model
model = model_class(config)

# Let's load it from the disk to be sure we can use pretrained weights
with tempfile.TemporaryDirectory() as tmpdirname:
outputs = model(self._prepare_for_class(inputs_dict, model_class)) # build the model
model.save_pretrained(tmpdirname)
model = model_class.from_pretrained(tmpdirname)

outputs_dict = model(input_ids, visual_feats, visual_pos)
hidden_states = outputs_dict[0]

# Add a dense layer on top to test integration with other keras modules
outputs = tf.keras.layers.Dense(2, activation="softmax", name="outputs")(hidden_states)

# Compile extended model
extended_model = tf.keras.Model(inputs=[input_ids, visual_feats, visual_pos], outputs=[outputs])
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

@tooslow
def test_saved_model_creation(self):
pass
Expand Down
32 changes: 0 additions & 32 deletions tests/models/marian/test_modeling_tf_marian.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

from __future__ import annotations

import tempfile
import unittest
import warnings

Expand Down Expand Up @@ -209,37 +208,6 @@ def test_decoder_model_past_large_inputs(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
self.model_tester.check_decoder_model_past_large_inputs(*config_and_inputs)

def test_compile_tf_model(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy")

model_class = self.all_generative_model_classes[0]
input_ids = {
"decoder_input_ids": tf.keras.Input(batch_shape=(2, 2000), name="decoder_input_ids", dtype="int32"),
"input_ids": tf.keras.Input(batch_shape=(2, 2000), name="input_ids", dtype="int32"),
}

# Prepare our model
model = model_class(config)
model(self._prepare_for_class(inputs_dict, model_class)) # Model must be called before saving.
# Let's load it from the disk to be sure we can use pre-trained weights
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname)
model = model_class.from_pretrained(tmpdirname)

outputs_dict = model(input_ids)
hidden_states = outputs_dict[0]

# Add a dense layer on top to test integration with other keras modules
outputs = tf.keras.layers.Dense(2, activation="softmax", name="outputs")(hidden_states)

# Compile extended model
extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

@tooslow
def test_saved_model_creation(self):
pass
Expand Down
27 changes: 0 additions & 27 deletions tests/models/mbart/test_modeling_tf_mbart.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

from __future__ import annotations

import tempfile
import unittest

from transformers import AutoTokenizer, MBartConfig, is_tf_available
Expand Down Expand Up @@ -118,32 +117,6 @@ def check_decoder_model_past_large_inputs(self, config, inputs_dict):
output, past_key_values = outputs.to_tuple()
past_key_values = past_key_values[1]

def test_compile_tf_model(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy")
model_class = self.all_generative_model_classes[0]
input_ids = {
"decoder_input_ids": tf.keras.Input(batch_shape=(2, 2000), name="decoder_input_ids", dtype="int32"),
"input_ids": tf.keras.Input(batch_shape=(2, 2000), name="input_ids", dtype="int32"),
}
# Prepare our model
model = model_class(config)
model(self._prepare_for_class(inputs_dict, model_class)) # Model must be called before saving.
# Let's load it from the disk to be sure we can use pretrained weights
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname)
model = model_class.from_pretrained(tmpdirname)
outputs_dict = model(input_ids)
hidden_states = outputs_dict[0]
# Add a dense layer on top to test integration with other keras modules
outputs = tf.keras.layers.Dense(2, activation="softmax", name="outputs")(hidden_states)
# Compile extended model
extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])


def prepare_mbart_inputs_dict(
config,
Expand Down
4 changes: 0 additions & 4 deletions tests/models/mobilevit/test_modeling_tf_mobilevit.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,6 @@ def test_model_common_attributes(self):
def test_attention_outputs(self):
pass

@unittest.skip("Test was written for TF 1.x and isn't really relevant here")
def test_compile_tf_model(self):
pass

def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()

Expand Down
32 changes: 0 additions & 32 deletions tests/models/pegasus/test_modeling_tf_pegasus.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

from __future__ import annotations

import tempfile
import unittest

from transformers import AutoTokenizer, PegasusConfig, is_tf_available
Expand Down Expand Up @@ -207,37 +206,6 @@ def test_decoder_model_past_large_inputs(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
self.model_tester.check_decoder_model_past_large_inputs(*config_and_inputs)

def test_compile_tf_model(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy")

model_class = self.all_generative_model_classes[0]
input_ids = {
"decoder_input_ids": tf.keras.Input(batch_shape=(2, 2000), name="decoder_input_ids", dtype="int32"),
"input_ids": tf.keras.Input(batch_shape=(2, 2000), name="input_ids", dtype="int32"),
}

# Prepare our model
model = model_class(config)
model(self._prepare_for_class(inputs_dict, model_class)) # Model must be called before saving.
# Let's load it from the disk to be sure we can use pretrained weights
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname)
model = model_class.from_pretrained(tmpdirname)

outputs_dict = model(input_ids)
hidden_states = outputs_dict[0]

# Add a dense layer on top to test integration with other keras modules
outputs = tf.keras.layers.Dense(2, activation="softmax", name="outputs")(hidden_states)

# Compile extended model
extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

@tooslow
def test_saved_model_creation(self):
pass
Expand Down
4 changes: 0 additions & 4 deletions tests/models/segformer/test_modeling_tf_segformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,6 @@ def test_inputs_embeds(self):
def test_model_common_attributes(self):
pass

@unittest.skip("Test was written for TF 1.x and isn't really relevant here")
def test_compile_tf_model(self):
pass

def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()

Expand Down
46 changes: 0 additions & 46 deletions tests/models/vit_mae/test_modeling_tf_vit_mae.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,52 +283,6 @@ def check_pt_tf_models(self, tf_model, pt_model, tf_inputs_dict):

super().check_pt_tf_models(tf_model, pt_model, tf_inputs_dict)

# overwrite from common since TFViTMAEForPretraining outputs loss along with
# logits and mask indices. loss and mask indices are not suitable for integration
# with other keras modules.
def test_compile_tf_model(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy")

for model_class in self.all_model_classes:
# `pixel_values` implies that the input is an image
inputs = tf.keras.Input(
batch_shape=(
3,
self.model_tester.num_channels,
self.model_tester.image_size,
self.model_tester.image_size,
),
name="pixel_values",
dtype="float32",
)

# Prepare our model
model = model_class(config)
model(self._prepare_for_class(inputs_dict, model_class)) # Model must be called before saving.
# Let's load it from the disk to be sure we can use pretrained weights
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=False)
model = model_class.from_pretrained(tmpdirname)

outputs_dict = model(inputs)
hidden_states = outputs_dict[0]

# `TFViTMAEForPreTraining` outputs are not recommended to be used for
# downstream application. This is just to check if the outputs of
# `TFViTMAEForPreTraining` can be integrated with other keras modules.
if model_class.__name__ == "TFViTMAEForPreTraining":
hidden_states = outputs_dict["logits"]

# Add a dense layer on top to test integration with other keras modules
outputs = tf.keras.layers.Dense(2, activation="softmax", name="outputs")(hidden_states)

# Compile extended model
extended_model = tf.keras.Model(inputs=[inputs], outputs=[outputs])
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

# overwrite from common since TFViTMAEForPretraining has random masking, we need to fix the noise
# to generate masks during test
def test_keras_save_load(self):
Expand Down
Loading

0 comments on commit 62d71f4

Please sign in to comment.