Unverified Commit 14cb5b35 authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

Fix slow gpu tests lysandre (#4487)

* There is one missing key in BERT

* Correct device for CamemBERT model

* RoBERTa tokenization adding prefix space

* Style
parent 6dc52c78
...@@ -80,8 +80,9 @@ class AutoModelTest(unittest.TestCase): ...@@ -80,8 +80,9 @@ class AutoModelTest(unittest.TestCase):
model, loading_info = AutoModelForPreTraining.from_pretrained(model_name, output_loading_info=True) model, loading_info = AutoModelForPreTraining.from_pretrained(model_name, output_loading_info=True)
self.assertIsNotNone(model) self.assertIsNotNone(model)
self.assertIsInstance(model, BertForPreTraining) self.assertIsInstance(model, BertForPreTraining)
for value in loading_info.values(): for key, value in loading_info.items():
self.assertEqual(len(value), 0) # Only one value should not be initialized and in the missing keys.
self.assertEqual(len(value), 1 if key == "missing_keys" else 0)
@slow @slow
def test_lmhead_model_from_pretrained(self): def test_lmhead_model_from_pretrained(self):
......
...@@ -30,6 +30,7 @@ class CamembertModelIntegrationTest(unittest.TestCase): ...@@ -30,6 +30,7 @@ class CamembertModelIntegrationTest(unittest.TestCase):
@slow @slow
def test_output_embeds_base_model(self): def test_output_embeds_base_model(self):
model = CamembertModel.from_pretrained("camembert-base") model = CamembertModel.from_pretrained("camembert-base")
model.to(torch_device)
input_ids = torch.tensor( input_ids = torch.tensor(
[[5, 121, 11, 660, 16, 730, 25543, 110, 83, 6]], device=torch_device, dtype=torch.long, [[5, 121, 11, 660, 16, 730, 25543, 110, 83, 6]], device=torch_device, dtype=torch.long,
......
...@@ -100,9 +100,11 @@ class RobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -100,9 +100,11 @@ class RobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
text = tokenizer.encode("sequence builders", add_special_tokens=False) text = tokenizer.encode("sequence builders", add_special_tokens=False)
text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False) text_2 = tokenizer.encode("multi-sequence build", add_special_tokens=False)
encoded_text_from_decode = tokenizer.encode("sequence builders", add_special_tokens=True) encoded_text_from_decode = tokenizer.encode(
"sequence builders", add_special_tokens=True, add_prefix_space=False
)
encoded_pair_from_decode = tokenizer.encode( encoded_pair_from_decode = tokenizer.encode(
"sequence builders", "multi-sequence build", add_special_tokens=True "sequence builders", "multi-sequence build", add_special_tokens=True, add_prefix_space=False
) )
encoded_sentence = tokenizer.build_inputs_with_special_tokens(text) encoded_sentence = tokenizer.build_inputs_with_special_tokens(text)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment