Use random_attention_mask for TF tests (#16517)

* use random_attention_mask for TF tests * Fix for TFCLIP test (for now). Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>

Use random_attention_mask for TF tests (#16517)
* use random_attention_mask for TF tests * Fix for TFCLIP test (for now). Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2199382d · Yih-Dar · GitHub · 823dbf8a · 2199382d · 2199382d
Unverified Commit 2199382d authored Apr 01, 2022 by Yih-Dar Committed by GitHub Apr 01, 2022
9 changed files
--- a/tests/openai/test_modeling_tf_openai.py
+++ b/tests/openai/test_modeling_tf_openai.py
@@ -20,7 +20,7 @@ from transformers import OpenAIGPTConfig, is_tf_available
 from transformers.testing_utils import require_tf, slow
 from ..test_configuration_common import ConfigTester
-from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor
+from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask
 if is_tf_available():
@@ -70,7 +70,7 @@ class TFOpenAIGPTModelTester:
        input_mask = None
        if self.use_input_mask:
-            input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
+            input_mask = random_attention_mask([self.batch_size, self.seq_length])
        token_type_ids = None
        if self.use_token_type_ids:

--- a/tests/rembert/test_modeling_tf_rembert.py
+++ b/tests/rembert/test_modeling_tf_rembert.py
@@ -20,7 +20,7 @@ from transformers import RemBertConfig, is_tf_available
 from transformers.testing_utils import require_tf, slow
 from ..test_configuration_common import ConfigTester
-from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor
+from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
 if is_tf_available():
@@ -95,7 +95,7 @@ class TFRemBertModelTester:
        input_mask = None
        if self.use_input_mask:
-            input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
+            input_mask = random_attention_mask([self.batch_size, self.seq_length])
        token_type_ids = None
        if self.use_token_type_ids:

--- a/tests/roberta/test_modeling_tf_roberta.py
+++ b/tests/roberta/test_modeling_tf_roberta.py
@@ -20,7 +20,7 @@ from transformers import RobertaConfig, is_tf_available
 from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow
 from ..test_configuration_common import ConfigTester
-from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor
+from ..test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
 if is_tf_available():
@@ -72,7 +72,7 @@ class TFRobertaModelTester:
        input_mask = None
        if self.use_input_mask:
-            input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
+            input_mask = random_attention_mask([self.batch_size, self.seq_length])
        token_type_ids = None
        if self.use_token_type_ids:

--- a/tests/roformer/test_modeling_tf_roformer.py
+++ b/tests/roformer/test_modeling_tf_roformer.py
@@ -20,7 +20,7 @@ from transformers import RoFormerConfig, is_tf_available
 from transformers.testing_utils import require_tf, slow
 from ..test_configuration_common import ConfigTester
-from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor
+from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask
 if is_tf_available():
@@ -95,7 +95,7 @@ class TFRoFormerModelTester:
        input_mask = None
        if self.use_input_mask:
-            input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
+            input_mask = random_attention_mask([self.batch_size, self.seq_length])
        token_type_ids = None
        if self.use_token_type_ids:

--- a/tests/t5/test_modeling_tf_t5.py
+++ b/tests/t5/test_modeling_tf_t5.py
@@ -20,7 +20,7 @@ from transformers.testing_utils import require_sentencepiece, require_tf, requir
 from transformers.utils import cached_property
 from ..test_configuration_common import ConfigTester
-from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor
+from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask
 if is_tf_available():
@@ -58,7 +58,7 @@ class TFT5ModelTester:
        input_mask = None
        if self.use_input_mask:
-            input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
+            input_mask = random_attention_mask([self.batch_size, self.seq_length])
        token_labels = None
        if self.use_labels:

--- a/tests/tapas/test_modeling_tf_tapas.py
+++ b/tests/tapas/test_modeling_tf_tapas.py
@@ -38,7 +38,7 @@ from transformers.testing_utils import require_tensorflow_probability, require_t
 from transformers.utils import cached_property
 from ..test_configuration_common import ConfigTester
-from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor
+from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask
 if is_tf_available():
@@ -158,7 +158,7 @@ class TFTapasModelTester:
        input_mask = None
        if self.use_input_mask:
-            input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
+            input_mask = random_attention_mask([self.batch_size, self.seq_length])
        token_type_ids = []
        for type_vocab_size in self.type_vocab_sizes:

--- a/tests/test_modeling_tf_common.py
+++ b/tests/test_modeling_tf_common.py
@@ -1440,7 +1440,7 @@ def ids_tensor(shape, vocab_size, rng=None, name=None, dtype=None):
 def random_attention_mask(shape, rng=None, name=None, dtype=None):
    attn_mask = ids_tensor(shape, vocab_size=2, rng=None, name=None, dtype=dtype)
    # make sure that at least one token is attended to for each batch
-    attn_mask = tf.concat([tf.constant(value=1, shape=(shape[0], 1), dtype=dtype), attn_mask[:, 1:]], axis=1)
+    attn_mask = tf.concat([attn_mask[:, :-1], tf.ones_like(attn_mask[:, -1:], dtype=dtype)], axis=-1)
    return attn_mask

--- a/tests/xlm/test_modeling_tf_xlm.py
+++ b/tests/xlm/test_modeling_tf_xlm.py
@@ -20,7 +20,7 @@ from transformers import is_tf_available
 from transformers.testing_utils import require_tf, slow
 from ..test_configuration_common import ConfigTester
-from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor
+from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask
 if is_tf_available():
@@ -75,7 +75,7 @@ class TFXLMModelTester:
    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
-        input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32)
+        input_mask = random_attention_mask([self.batch_size, self.seq_length], dtype=tf.float32)
        input_lengths = None
        if self.use_input_lengths:

--- a/tests/xlnet/test_modeling_tf_xlnet.py
+++ b/tests/xlnet/test_modeling_tf_xlnet.py
@@ -22,7 +22,7 @@ from transformers import XLNetConfig, is_tf_available
 from transformers.testing_utils import require_tf, slow
 from ..test_configuration_common import ConfigTester
-from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor
+from ..test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask
 if is_tf_available():
@@ -75,7 +75,7 @@ class TFXLNetModelTester:
        input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
        input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
        segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
-        input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32)
+        input_mask = random_attention_mask([self.batch_size, self.seq_length], dtype=tf.float32)
        input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size)
        perm_mask = tf.zeros((self.batch_size, self.seq_length + 1, self.seq_length), dtype=tf.float32)