Black 20 release

a75c64d8 · Lysandre · e78c1103 · a75c64d8 · a75c64d8 · a75c64d8
Commit a75c64d8 authored Aug 26, 2020 by Lysandre
20 changed files
--- a/tests/test_modeling_longformer.py
+++ b/tests/test_modeling_longformer.py
@@ -40,7 +40,8 @@ if is_torch_available():

 class LongformerModelTester:
    def __init__(
-        self, parent,
+        self,
+        parent,
    ):
        self.parent = parent
        self.batch_size = 13

--- a/tests/test_modeling_mobilebert.py
+++ b/tests/test_modeling_mobilebert.py
@@ -217,7 +217,10 @@ class MobileBertModelTester:
        model.to(torch_device)
        model.eval()
        result = model(
-            input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, next_sentence_label=sequence_labels,
+            input_ids,
+            attention_mask=input_mask,
+            token_type_ids=token_type_ids,
+            next_sentence_label=sequence_labels,
        )
        self.parent.assertEqual(result.logits.shape, (self.batch_size, 2))

@@ -397,7 +400,11 @@ class MobileBertModelTest(ModelTesterMixin, unittest.TestCase):


 def _long_tensor(tok_lst):
-    return torch.tensor(tok_lst, dtype=torch.long, device=torch_device,)
+    return torch.tensor(
+        tok_lst,
+        dtype=torch.long,
+        device=torch_device,
+    )


 TOLERANCE = 1e-3

--- a/tests/test_modeling_openai.py
+++ b/tests/test_modeling_openai.py
@@ -37,7 +37,8 @@ if is_torch_available():

 class OpenAIGPTModelTester:
    def __init__(
-        self, parent,
+        self,
+        parent,
    ):
        self.parent = parent
        self.batch_size = 13

--- a/tests/test_modeling_reformer.py
+++ b/tests/test_modeling_reformer.py
@@ -240,14 +240,19 @@ class ReformerModelTester:
        half_input_ids = input_ids[:, :half_seq_len]

        # normal padded
-        attn_mask = torch.cat([torch.ones_like(half_input_ids), torch.zeros_like(half_input_ids)], dim=-1,)
+        attn_mask = torch.cat(
+            [torch.ones_like(half_input_ids), torch.zeros_like(half_input_ids)],
+            dim=-1,
+        )
        input_ids_padded = torch.cat(
-            [half_input_ids, ids_tensor((self.batch_size, half_seq_len), self.vocab_size)], dim=-1,
+            [half_input_ids, ids_tensor((self.batch_size, half_seq_len), self.vocab_size)],
+            dim=-1,
        )

        # shifted padded
        input_ids_roll = torch.cat(
-            [half_input_ids, ids_tensor((self.batch_size, half_seq_len), self.vocab_size)], dim=-1,
+            [half_input_ids, ids_tensor((self.batch_size, half_seq_len), self.vocab_size)],
+            dim=-1,
        )
        input_ids_roll = torch.roll(input_ids_roll, roll, dims=-1)
        attn_mask_roll = torch.roll(attn_mask, roll, dims=-1)
@@ -283,13 +288,21 @@ class ReformerModelTester:
        torch.manual_seed(layer.attention_seed)
        attn_outputs = layer.attention(hidden_states, attention_mask=input_mask)
        self.parent.assertTrue(
-            torch.allclose(prev_attn_output + attn_outputs.hidden_states, next_attn_output, atol=1e-3,)
+            torch.allclose(
+                prev_attn_output + attn_outputs.hidden_states,
+                next_attn_output,
+                atol=1e-3,
+            )
        )

        torch.manual_seed(layer.feed_forward_seed)
        feed_forward_hidden_states = layer.feed_forward(next_attn_output)
        self.parent.assertTrue(
-            torch.allclose(next_hidden_states, hidden_states + feed_forward_hidden_states, atol=1e-3,)
+            torch.allclose(
+                next_hidden_states,
+                hidden_states + feed_forward_hidden_states,
+                atol=1e-3,
+            )
        )

    def create_and_check_reformer_feed_backward_chunking(self, config, input_ids, input_mask, choice_labels):
@@ -416,7 +429,10 @@ class ReformerModelTester:
        model.to(torch_device)
        model.eval()
        result = model(
-            input_ids, attention_mask=input_mask, start_positions=choice_labels, end_positions=choice_labels,
+            input_ids,
+            attention_mask=input_mask,
+            start_positions=choice_labels,
+            end_positions=choice_labels,
        )
        self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
        self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
@@ -887,7 +903,9 @@ class ReformerIntegrationTests(unittest.TestCase):
        reformer_output = layer(prev_attn_output=hidden_states.clone(), hidden_states=hidden_states)
        output_slice = reformer_output.hidden_states[0, 0, :5]
        expected_output_slice = torch.tensor(
-            [1.6879, -1.3083, -0.4708, 1.3555, -0.6292], dtype=torch.float, device=torch_device,
+            [1.6879, -1.3083, -0.4708, 1.3555, -0.6292],
+            dtype=torch.float,
+            device=torch_device,
        )
        self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))

@@ -902,11 +920,15 @@ class ReformerIntegrationTests(unittest.TestCase):
        layer = ReformerLayer(ReformerConfig(**config)).to(torch_device)
        layer.eval()
        reformer_output = layer(
-            prev_attn_output=hidden_states.clone(), hidden_states=hidden_states, attention_mask=attn_mask,
+            prev_attn_output=hidden_states.clone(),
+            hidden_states=hidden_states,
+            attention_mask=attn_mask,
        )
        output_slice = reformer_output.hidden_states[0, 0, :5]
        expected_output_slice = torch.tensor(
-            [1.6439, -1.2306, -0.5108, 1.3006, -0.6537], dtype=torch.float, device=torch_device,
+            [1.6439, -1.2306, -0.5108, 1.3006, -0.6537],
+            dtype=torch.float,
+            device=torch_device,
        )
        self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))

@@ -922,7 +944,9 @@ class ReformerIntegrationTests(unittest.TestCase):
        reformer_output = layer(prev_attn_output=hidden_states, hidden_states=hidden_states)
        output_slice = reformer_output.hidden_states[0, 0, :5]
        expected_output_slice = torch.tensor(
-            [1.4212, -2.0576, -0.9688, 1.4599, -0.1344], dtype=torch.float, device=torch_device,
+            [1.4212, -2.0576, -0.9688, 1.4599, -0.1344],
+            dtype=torch.float,
+            device=torch_device,
        )
        self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))

@@ -935,10 +959,16 @@ class ReformerIntegrationTests(unittest.TestCase):
        torch.manual_seed(0)
        layer = ReformerLayer(ReformerConfig(**config)).to(torch_device)
        layer.eval()
-        reformer_output = layer(prev_attn_output=hidden_states, hidden_states=hidden_states, attention_mask=attn_mask,)
+        reformer_output = layer(
+            prev_attn_output=hidden_states,
+            hidden_states=hidden_states,
+            attention_mask=attn_mask,
+        )
        output_slice = reformer_output.hidden_states[0, 0, :5]
        expected_output_slice = torch.tensor(
-            [1.4750, -2.0235, -0.9743, 1.4463, -0.1269], dtype=torch.float, device=torch_device,
+            [1.4750, -2.0235, -0.9743, 1.4463, -0.1269],
+            dtype=torch.float,
+            device=torch_device,
        )
        self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))

@@ -953,7 +983,9 @@ class ReformerIntegrationTests(unittest.TestCase):
        hidden_states = model(input_ids=input_ids, attention_mask=attn_mask)[0]
        output_slice = hidden_states[0, 0, :5]
        expected_output_slice = torch.tensor(
-            [-0.9896, -0.9396, -1.0831, -0.0597, 0.2456], dtype=torch.float, device=torch_device,
+            [-0.9896, -0.9396, -1.0831, -0.0597, 0.2456],
+            dtype=torch.float,
+            device=torch_device,
        )
        self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))

@@ -967,7 +999,9 @@ class ReformerIntegrationTests(unittest.TestCase):
        hidden_states = model(input_ids=input_ids, attention_mask=attn_mask)[0]
        output_slice = hidden_states[0, 0, :5]
        expected_output_slice = torch.tensor(
-            [-1.6791, 0.7171, 0.1594, 0.4063, 1.2584], dtype=torch.float, device=torch_device,
+            [-1.6791, 0.7171, 0.1594, 0.4063, 1.2584],
+            dtype=torch.float,
+            device=torch_device,
        )
        self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))

@@ -983,7 +1017,9 @@ class ReformerIntegrationTests(unittest.TestCase):
        hidden_states = model(input_ids=input_ids, attention_mask=attn_mask)[0]
        output_slice = hidden_states[1, -1, :5]
        expected_output_slice = torch.tensor(
-            [0.0256, -0.0121, 0.0636, 0.0024, -0.0393], dtype=torch.float, device=torch_device,
+            [0.0256, -0.0121, 0.0636, 0.0024, -0.0393],
+            dtype=torch.float,
+            device=torch_device,
        )
        self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))

@@ -1005,15 +1041,21 @@ class ReformerIntegrationTests(unittest.TestCase):
        # check last grads to cover all proable errors
        grad_slice_word = model.reformer.embeddings.word_embeddings.weight.grad[0, :5]
        expected_grad_slice_word = torch.tensor(
-            [-0.0005, 0.0001, 0.0002, 0.0003, 0.0006], dtype=torch.float, device=torch_device,
+            [-0.0005, 0.0001, 0.0002, 0.0003, 0.0006],
+            dtype=torch.float,
+            device=torch_device,
        )
        grad_slice_position_factor_1 = model.reformer.embeddings.position_embeddings.weights[0][1, 0, -5:]
        expected_grad_slice_pos_fac_1 = torch.tensor(
-            [0.0037, -1.3793, -1.0231, -1.5230, -2.5306], dtype=torch.float, device=torch_device,
+            [0.0037, -1.3793, -1.0231, -1.5230, -2.5306],
+            dtype=torch.float,
+            device=torch_device,
        )
        grad_slice_position_factor_2 = model.reformer.embeddings.position_embeddings.weights[1][0, 1, :5]
        expected_grad_slice_pos_fac_2 = torch.tensor(
-            [-1.3165, 0.5168, 0.7785, 1.0811, -0.9830], dtype=torch.float, device=torch_device,
+            [-1.3165, 0.5168, 0.7785, 1.0811, -0.9830],
+            dtype=torch.float,
+            device=torch_device,
        )
        self.assertTrue(torch.allclose(grad_slice_word, expected_grad_slice_word, atol=1e-3))
        self.assertTrue(torch.allclose(grad_slice_position_factor_1, expected_grad_slice_pos_fac_1, atol=1e-3))
@@ -1038,15 +1080,21 @@ class ReformerIntegrationTests(unittest.TestCase):
        # check last grads to cover all proable errors
        grad_slice_word = model.reformer.embeddings.word_embeddings.weight.grad[0, :5]
        expected_grad_slice_word = torch.tensor(
-            [2.6357e-05, 4.3358e-04, -8.4985e-04, 1.0094e-04, 3.8954e-04], dtype=torch.float, device=torch_device,
+            [2.6357e-05, 4.3358e-04, -8.4985e-04, 1.0094e-04, 3.8954e-04],
+            dtype=torch.float,
+            device=torch_device,
        )
        grad_slice_position_factor_1 = model.reformer.embeddings.position_embeddings.weights[0][1, 0, -5:]
        expected_grad_slice_pos_fac_1 = torch.tensor(
-            [-0.0984, 0.6283, 0.4282, 1.2960, 0.6897], dtype=torch.float, device=torch_device,
+            [-0.0984, 0.6283, 0.4282, 1.2960, 0.6897],
+            dtype=torch.float,
+            device=torch_device,
        )
        grad_slice_position_factor_2 = model.reformer.embeddings.position_embeddings.weights[1][0, 1, :5]
        expected_grad_slice_pos_fac_2 = torch.tensor(
-            [0.4626, -0.0231, -0.0172, 0.1081, 0.3805], dtype=torch.float, device=torch_device,
+            [0.4626, -0.0231, -0.0172, 0.1081, 0.3805],
+            dtype=torch.float,
+            device=torch_device,
        )
        self.assertTrue(torch.allclose(grad_slice_word, expected_grad_slice_word, atol=1e-3))
        self.assertTrue(torch.allclose(grad_slice_position_factor_1, expected_grad_slice_pos_fac_1, atol=1e-3))

--- a/tests/test_modeling_roberta.py
+++ b/tests/test_modeling_roberta.py
@@ -45,7 +45,8 @@ if is_torch_available():

 class RobertaModelTester:
    def __init__(
-        self, parent,
+        self,
+        parent,
    ):
        self.parent = parent
        self.batch_size = 13
@@ -352,7 +353,7 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
            self.assertIsNotNone(model)

    def test_create_position_ids_respects_padding_index(self):
-        """ Ensure that the default position ids only assign a sequential . This is a regression
+        """Ensure that the default position ids only assign a sequential . This is a regression
        test for https://github.com/huggingface/transformers/issues/1761

        The position ids should be masked with the embedding object's padding index. Therefore, the
@@ -371,7 +372,7 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
        self.assertTrue(torch.all(torch.eq(position_ids, expected_positions)))

    def test_create_position_ids_from_inputs_embeds(self):
-        """ Ensure that the default position ids only assign a sequential . This is a regression
+        """Ensure that the default position ids only assign a sequential . This is a regression
        test for https://github.com/huggingface/transformers/issues/1761

        The position ids should be masked with the embedding object's padding index. Therefore, the

--- a/tests/test_modeling_t5.py
+++ b/tests/test_modeling_t5.py
@@ -101,7 +101,13 @@ class T5ModelTester:
        )

    def check_prepare_lm_labels_via_shift_left(
-        self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,
+        self,
+        config,
+        input_ids,
+        decoder_input_ids,
+        attention_mask,
+        decoder_attention_mask,
+        lm_labels,
    ):
        model = T5Model(config=config)
        model.to(torch_device)
@@ -134,7 +140,13 @@ class T5ModelTester:
                self.parent.assertListEqual(decoder_input_ids_slice[1:].tolist(), lm_labels_slice[:-1].tolist())

    def create_and_check_model(
-        self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,
+        self,
+        config,
+        input_ids,
+        decoder_input_ids,
+        attention_mask,
+        decoder_attention_mask,
+        lm_labels,
    ):
        model = T5Model(config=config)
        model.to(torch_device)
@@ -160,7 +172,13 @@ class T5ModelTester:
        self.parent.assertEqual(len(decoder_past[1][0]), 4)

    def create_and_check_with_lm_head(
-        self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,
+        self,
+        config,
+        input_ids,
+        decoder_input_ids,
+        attention_mask,
+        decoder_attention_mask,
+        lm_labels,
    ):
        model = T5ForConditionalGeneration(config=config).to(torch_device).eval()
        outputs = model(
@@ -174,7 +192,13 @@ class T5ModelTester:
        self.parent.assertEqual(outputs["loss"].size(), ())

    def create_and_check_decoder_model_past(
-        self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,
+        self,
+        config,
+        input_ids,
+        decoder_input_ids,
+        attention_mask,
+        decoder_attention_mask,
+        lm_labels,
    ):
        model = T5Model(config=config).get_decoder().to(torch_device).eval()
        # first forward pass
@@ -205,7 +229,13 @@ class T5ModelTester:
        self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3))

    def create_and_check_decoder_model_attention_mask_past(
-        self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,
+        self,
+        config,
+        input_ids,
+        decoder_input_ids,
+        attention_mask,
+        decoder_attention_mask,
+        lm_labels,
    ):
        model = T5Model(config=config).get_decoder()
        model.to(torch_device)
@@ -231,7 +261,8 @@ class T5ModelTester:
        # append to next input_ids and attn_mask
        next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)
        attn_mask = torch.cat(
-            [attn_mask, torch.ones((attn_mask.shape[0], 1), dtype=torch.long, device=torch_device)], dim=1,
+            [attn_mask, torch.ones((attn_mask.shape[0], 1), dtype=torch.long, device=torch_device)],
+            dim=1,
        )

        # get two different outputs
@@ -249,7 +280,13 @@ class T5ModelTester:
        self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3))

    def create_and_check_generate_with_past_key_value_states(
-        self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,
+        self,
+        config,
+        input_ids,
+        decoder_input_ids,
+        attention_mask,
+        decoder_attention_mask,
+        lm_labels,
    ):
        model = T5ForConditionalGeneration(config=config).to(torch_device).eval()
        torch.manual_seed(0)
@@ -261,14 +298,26 @@ class T5ModelTester:
        self.parent.assertTrue(torch.all(output_with_past_cache == output_without_past_cache))

    def create_and_check_model_fp16_forward(
-        self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,
+        self,
+        config,
+        input_ids,
+        decoder_input_ids,
+        attention_mask,
+        decoder_attention_mask,
+        lm_labels,
    ):
        model = T5Model(config=config).to(torch_device).half().eval()
        output = model(input_ids, decoder_input_ids=input_ids, attention_mask=attention_mask)["last_hidden_state"]
        self.parent.assertFalse(torch.isnan(output).any().item())

    def create_and_check_encoder_decoder_shared_weights(
-        self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,
+        self,
+        config,
+        input_ids,
+        decoder_input_ids,
+        attention_mask,
+        decoder_attention_mask,
+        lm_labels,
    ):
        for model_class in [T5Model, T5ForConditionalGeneration]:
            torch.manual_seed(0)
@@ -339,7 +388,14 @@ class T5ModelTester:

    def prepare_config_and_inputs_for_common(self):
        config_and_inputs = self.prepare_config_and_inputs()
-        (config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,) = config_and_inputs
+        (
+            config,
+            input_ids,
+            decoder_input_ids,
+            attention_mask,
+            decoder_attention_mask,
+            lm_labels,
+        ) = config_and_inputs

        inputs_dict = {
            "input_ids": input_ids,
@@ -412,7 +468,11 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase):
        model = T5Model(config_and_inputs[0]).to(torch_device)
        with tempfile.TemporaryDirectory() as tmpdirname:
            torch.onnx.export(
-                model, config_and_inputs[1], f"{tmpdirname}/t5_test.onnx", export_params=True, opset_version=9,
+                model,
+                config_and_inputs[1],
+                f"{tmpdirname}/t5_test.onnx",
+                export_params=True,
+                opset_version=9,
            )


@@ -469,7 +529,8 @@ class T5ModelIntegrationTests(unittest.TestCase):
        )
        decoded = tok.batch_decode(hypotheses_batch, skip_special_tokens=True, clean_up_tokenization_spaces=False)
        self.assertListEqual(
-            expected_summaries, decoded,
+            expected_summaries,
+            decoded,
        )

    @slow

--- a/tests/test_modeling_tf_camembert.py
+++ b/tests/test_modeling_tf_camembert.py
@@ -33,7 +33,8 @@ class TFCamembertModelIntegrationTest(unittest.TestCase):
        model = TFCamembertModel.from_pretrained("jplu/tf-camembert-base")

        input_ids = tf.convert_to_tensor(
-            [[5, 121, 11, 660, 16, 730, 25543, 110, 83, 6]], dtype=tf.int32,
+            [[5, 121, 11, 660, 16, 730, 25543, 110, 83, 6]],
+            dtype=tf.int32,
        )  # J'aime le camembert !"

        output = model(input_ids)["last_hidden_state"]
@@ -41,7 +42,8 @@ class TFCamembertModelIntegrationTest(unittest.TestCase):
        self.assertEqual(output.shape, expected_shape)
        # compare the actual values for a slice.
        expected_slice = tf.convert_to_tensor(
-            [[[-0.0254, 0.0235, 0.1027], [0.0606, -0.1811, -0.0418], [-0.1561, -0.1127, 0.2687]]], dtype=tf.float32,
+            [[[-0.0254, 0.0235, 0.1027], [0.0606, -0.1811, -0.0418], [-0.1561, -0.1127, 0.2687]]],
+            dtype=tf.float32,
        )
        # camembert = torch.hub.load('pytorch/fairseq', 'camembert.v0')
        # camembert.eval()

--- a/tests/test_modeling_tf_common.py
+++ b/tests/test_modeling_tf_common.py
@@ -155,7 +155,8 @@ class TFModelTesterMixin:
                self.assertEqual(len(outputs), num_out)
                self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
                self.assertListEqual(
-                    list(hidden_states[0].shape[-2:]), [self.model_tester.seq_length, self.model_tester.hidden_size],
+                    list(hidden_states[0].shape[-2:]),
+                    [self.model_tester.seq_length, self.model_tester.hidden_size],
                )

    @slow
@@ -486,7 +487,8 @@ class TFModelTesterMixin:
            hidden_states = [t.numpy() for t in outputs[-1]]
            self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
            self.assertListEqual(
-                list(hidden_states[0].shape[-2:]), [self.model_tester.seq_length, self.model_tester.hidden_size],
+                list(hidden_states[0].shape[-2:]),
+                [self.model_tester.seq_length, self.model_tester.hidden_size],
            )

        for model_class in self.all_model_classes:
@@ -591,9 +593,15 @@ class TFModelTesterMixin:
                    x = wte([input_ids, None, None, None], mode="embedding")
                except Exception:
                    if hasattr(self.model_tester, "embedding_size"):
-                        x = tf.ones(input_ids.shape + [self.model_tester.embedding_size], dtype=tf.dtypes.float32,)
+                        x = tf.ones(
+                            input_ids.shape + [self.model_tester.embedding_size],
+                            dtype=tf.dtypes.float32,
+                        )
                    else:
-                        x = tf.ones(input_ids.shape + [self.model_tester.hidden_size], dtype=tf.dtypes.float32,)
+                        x = tf.ones(
+                            input_ids.shape + [self.model_tester.hidden_size],
+                            dtype=tf.dtypes.float32,
+                        )
        return x

    def test_inputs_embeds(self):
@@ -700,7 +708,14 @@ class TFModelTesterMixin:
                model.generate(input_ids, do_sample=False, num_return_sequences=3, num_beams=2)

            # num_return_sequences > 1, sample
-            self._check_generated_ids(model.generate(input_ids, do_sample=True, num_beams=2, num_return_sequences=2,))
+            self._check_generated_ids(
+                model.generate(
+                    input_ids,
+                    do_sample=True,
+                    num_beams=2,
+                    num_return_sequences=2,
+                )
+            )
            # num_return_sequences > 1, greedy
            self._check_generated_ids(model.generate(input_ids, do_sample=False, num_beams=2, num_return_sequences=2))

@@ -895,7 +910,8 @@ class UtilsFunctionsTest(unittest.TestCase):
        )

        non_inf_expected_idx = tf.convert_to_tensor(
-            [[0, 0], [0, 9], [0, 10], [0, 25], [0, 26], [1, 13], [1, 17], [1, 18], [1, 20], [1, 27]], dtype=tf.int32,
+            [[0, 0], [0, 9], [0, 10], [0, 25], [0, 26], [1, 13], [1, 17], [1, 18], [1, 20], [1, 27]],
+            dtype=tf.int32,
        )  # expected non filtered idx as noted above

        non_inf_expected_output = tf.convert_to_tensor(
@@ -907,7 +923,8 @@ class UtilsFunctionsTest(unittest.TestCase):

        non_inf_output = output[output != -float("inf")]
        non_inf_idx = tf.cast(
-            tf.where(tf.not_equal(output, tf.constant(-float("inf"), dtype=tf.float32))), dtype=tf.int32,
+            tf.where(tf.not_equal(output, tf.constant(-float("inf"), dtype=tf.float32))),
+            dtype=tf.int32,
        )

        tf.debugging.assert_near(non_inf_output, non_inf_expected_output, rtol=1e-12)

--- a/tests/test_modeling_tf_ctrl.py
+++ b/tests/test_modeling_tf_ctrl.py
@@ -31,7 +31,8 @@ if is_tf_available():

 class TFCTRLModelTester(object):
    def __init__(
-        self, parent,
+        self,
+        parent,
    ):
        self.parent = parent
        self.batch_size = 13

--- a/tests/test_modeling_tf_distilbert.py
+++ b/tests/test_modeling_tf_distilbert.py
@@ -39,7 +39,8 @@ if is_tf_available():

 class TFDistilBertModelTester:
    def __init__(
-        self, parent,
+        self,
+        parent,
    ):
        self.parent = parent
        self.batch_size = 13

--- a/tests/test_modeling_tf_electra.py
+++ b/tests/test_modeling_tf_electra.py
@@ -39,7 +39,8 @@ if is_tf_available():

 class TFElectraModelTester:
    def __init__(
-        self, parent,
+        self,
+        parent,
    ):
        self.parent = parent
        self.batch_size = 13

--- a/tests/test_modeling_tf_flaubert.py
+++ b/tests/test_modeling_tf_flaubert.py
@@ -40,7 +40,8 @@ if is_tf_available():

 class TFFlaubertModelTester:
    def __init__(
-        self, parent,
+        self,
+        parent,
    ):
        self.parent = parent
        self.batch_size = 13
@@ -337,7 +338,8 @@ class TFFlaubertModelIntegrationTest(unittest.TestCase):
        model = TFFlaubertModel.from_pretrained("jplu/tf-flaubert-small-cased")

        input_ids = tf.convert_to_tensor(
-            [[0, 158, 735, 2592, 1424, 6727, 82, 1]], dtype=tf.int32,
+            [[0, 158, 735, 2592, 1424, 6727, 82, 1]],
+            dtype=tf.int32,
        )  # "J'aime flaubert !"

        output = model(input_ids)[0]

--- a/tests/test_modeling_tf_gpt2.py
+++ b/tests/test_modeling_tf_gpt2.py
@@ -37,7 +37,8 @@ if is_tf_available():

 class TFGPT2ModelTester:
    def __init__(
-        self, parent,
+        self,
+        parent,
    ):
        self.parent = parent
        self.batch_size = 13

--- a/tests/test_modeling_tf_longformer.py
+++ b/tests/test_modeling_tf_longformer.py
@@ -45,7 +45,8 @@ if is_tf_available():

 class TFLongformerModelTester:
    def __init__(
-        self, parent,
+        self,
+        parent,
    ):
        self.parent = parent
        self.batch_size = 13
@@ -228,7 +229,8 @@ class TFLongformerModelTester:
        # global attention mask has to be partly defined
        # to trace all weights
        global_attention_mask = tf.concat(
-            [tf.zeros_like(input_ids)[:, :-1], tf.ones_like(input_ids)[:, -1:]], axis=-1,
+            [tf.zeros_like(input_ids)[:, :-1], tf.ones_like(input_ids)[:, -1:]],
+            axis=-1,
        )

        inputs_dict = {
@@ -267,7 +269,13 @@ class TFLongformerModelTest(TFModelTesterMixin, unittest.TestCase):
    test_torchscript = False

    all_model_classes = (
-        (TFLongformerModel, TFLongformerForMaskedLM, TFLongformerForQuestionAnswering,) if is_tf_available() else ()
+        (
+            TFLongformerModel,
+            TFLongformerForMaskedLM,
+            TFLongformerForQuestionAnswering,
+        )
+        if is_tf_available()
+        else ()
    )

    def setUp(self):

--- a/tests/test_modeling_tf_openai.py
+++ b/tests/test_modeling_tf_openai.py
@@ -36,7 +36,8 @@ if is_tf_available():

 class TFOpenAIGPTModelTester:
    def __init__(
-        self, parent,
+        self,
+        parent,
    ):
        self.parent = parent
        self.batch_size = 13

--- a/tests/test_modeling_tf_roberta.py
+++ b/tests/test_modeling_tf_roberta.py
@@ -40,7 +40,8 @@ if is_tf_available():

 class TFRobertaModelTester:
    def __init__(
-        self, parent,
+        self,
+        parent,
    ):
        self.parent = parent
        self.batch_size = 13

--- a/tests/test_modeling_tf_t5.py
+++ b/tests/test_modeling_tf_t5.py
@@ -32,7 +32,8 @@ if is_tf_available():

 class TFT5ModelTester:
    def __init__(
-        self, parent,
+        self,
+        parent,
    ):
        self.parent = parent
        self.batch_size = 13
@@ -181,7 +182,10 @@ class TFT5ModelTester:

        # append to next input_ids and attn_mask
        next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
-        attn_mask = tf.concat([attn_mask, tf.ones((attn_mask.shape[0], 1), dtype=tf.int32)], axis=1,)
+        attn_mask = tf.concat(
+            [attn_mask, tf.ones((attn_mask.shape[0], 1), dtype=tf.int32)],
+            axis=1,
+        )

        # get two different outputs
        output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0]

--- a/tests/test_modeling_tf_transfo_xl.py
+++ b/tests/test_modeling_tf_transfo_xl.py
@@ -32,7 +32,8 @@ if is_tf_available():

 class TFTransfoXLModelTester:
    def __init__(
-        self, parent,
+        self,
+        parent,
    ):
        self.parent = parent
        self.batch_size = 13

--- a/tests/test_modeling_tf_xlm.py
+++ b/tests/test_modeling_tf_xlm.py
@@ -40,7 +40,8 @@ if is_tf_available():

 class TFXLMModelTester:
    def __init__(
-        self, parent,
+        self,
+        parent,
    ):
        self.parent = parent
        self.batch_size = 13

--- a/tests/test_modeling_tf_xlnet.py
+++ b/tests/test_modeling_tf_xlnet.py
@@ -40,7 +40,8 @@ if is_tf_available():

 class TFXLNetModelTester:
    def __init__(
-        self, parent,
+        self,
+        parent,
    ):
        self.parent = parent
        self.batch_size = 13