Fix slow gpu tests lysandre (#4487)

* There is one missing key in BERT * Correct device for CamemBERT model * RoBERTa tokenization adding prefix space * Style

Fix slow gpu tests lysandre (#4487)
* There is one missing key in BERT * Correct device for CamemBERT model * RoBERTa tokenization adding prefix space * Style
14cb5b35 · Lysandre Debut · GitHub · 6dc52c78 · 14cb5b35 · 14cb5b35
Unverified Commit 14cb5b35 authored May 20, 2020 by Lysandre Debut Committed by GitHub May 20, 2020
Showing with 8 additions and 4 deletions

tests/test_modeling_auto.py tests/test_modeling_auto.py +3 -2

tests/test_modeling_camembert.py tests/test_modeling_camembert.py +1 -0

tests/test_tokenization_roberta.py tests/test_tokenization_roberta.py +4 -2

No files found.
--- a/tests/test_modeling_auto.py
+++ b/tests/test_modeling_auto.py
@@ -80,8 +80,9 @@ class AutoModelTest(unittest.TestCase):
            model, loading_info = AutoModelForPreTraining.from_pretrained(model_name, output_loading_info=True)
            self.assertIsNotNone(model)
            self.assertIsInstance(model, BertForPreTraining)
-            for value in loading_info.values():
+            for key, value in loading_info.items():
-                self.assertEqual(len(value), 0)
+                # Only one value should not be initialized and in the missing keys.
+                self.assertEqual(len(value), 1 if key == "missing_keys" else 0)
    @slow
    def test_lmhead_model_from_pretrained(self):

--- a/tests/test_modeling_camembert.py
+++ b/tests/test_modeling_camembert.py
@@ -30,6 +30,7 @@ class CamembertModelIntegrationTest(unittest.TestCase):
    @slow
    def test_output_embeds_base_model(self):
        model = CamembertModel.from_pretrained("camembert-base")
+        model.to(torch_device)
        input_ids = torch.tensor(
            [[5, 121, 11, 660, 16, 730, 25543, 110, 83, 6]], device=torch_device, dtype=torch.long,

--- a/tests/test_tokenization_roberta.py
+++ b/tests/test_tokenization_roberta.py
@@ -100,9 +100,11 @@ class RobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        text = tokenizer.encode("sequence builders", add_special_tokens=False)
        text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)
-        encoded_text_from_decode = tokenizer.encode("sequence builders", add_special_tokens=True)
+        encoded_text_from_decode = tokenizer.encode(
+            "sequence builders", add_special_tokens=True, add_prefix_space=False
+        )
        encoded_pair_from_decode = tokenizer.encode(
-            "sequence builders", "multi-sequence build", add_special_tokens=True
+            "sequence builders", "multi-sequence build", add_special_tokens=True, add_prefix_space=False
        )
        encoded_sentence = tokenizer.build_inputs_with_special_tokens(text)