Commit a75c64d8 authored by Lysandre's avatar Lysandre
Browse files

Black 20 release

parent e78c1103
...@@ -40,7 +40,8 @@ if is_torch_available(): ...@@ -40,7 +40,8 @@ if is_torch_available():
class LongformerModelTester: class LongformerModelTester:
def __init__( def __init__(
self, parent, self,
parent,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = 13
......
...@@ -217,7 +217,10 @@ class MobileBertModelTester: ...@@ -217,7 +217,10 @@ class MobileBertModelTester:
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
result = model( result = model(
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, next_sentence_label=sequence_labels, input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
next_sentence_label=sequence_labels,
) )
self.parent.assertEqual(result.logits.shape, (self.batch_size, 2)) self.parent.assertEqual(result.logits.shape, (self.batch_size, 2))
...@@ -397,7 +400,11 @@ class MobileBertModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -397,7 +400,11 @@ class MobileBertModelTest(ModelTesterMixin, unittest.TestCase):
def _long_tensor(tok_lst): def _long_tensor(tok_lst):
return torch.tensor(tok_lst, dtype=torch.long, device=torch_device,) return torch.tensor(
tok_lst,
dtype=torch.long,
device=torch_device,
)
TOLERANCE = 1e-3 TOLERANCE = 1e-3
......
...@@ -37,7 +37,8 @@ if is_torch_available(): ...@@ -37,7 +37,8 @@ if is_torch_available():
class OpenAIGPTModelTester: class OpenAIGPTModelTester:
def __init__( def __init__(
self, parent, self,
parent,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = 13
......
...@@ -240,14 +240,19 @@ class ReformerModelTester: ...@@ -240,14 +240,19 @@ class ReformerModelTester:
half_input_ids = input_ids[:, :half_seq_len] half_input_ids = input_ids[:, :half_seq_len]
# normal padded # normal padded
attn_mask = torch.cat([torch.ones_like(half_input_ids), torch.zeros_like(half_input_ids)], dim=-1,) attn_mask = torch.cat(
[torch.ones_like(half_input_ids), torch.zeros_like(half_input_ids)],
dim=-1,
)
input_ids_padded = torch.cat( input_ids_padded = torch.cat(
[half_input_ids, ids_tensor((self.batch_size, half_seq_len), self.vocab_size)], dim=-1, [half_input_ids, ids_tensor((self.batch_size, half_seq_len), self.vocab_size)],
dim=-1,
) )
# shifted padded # shifted padded
input_ids_roll = torch.cat( input_ids_roll = torch.cat(
[half_input_ids, ids_tensor((self.batch_size, half_seq_len), self.vocab_size)], dim=-1, [half_input_ids, ids_tensor((self.batch_size, half_seq_len), self.vocab_size)],
dim=-1,
) )
input_ids_roll = torch.roll(input_ids_roll, roll, dims=-1) input_ids_roll = torch.roll(input_ids_roll, roll, dims=-1)
attn_mask_roll = torch.roll(attn_mask, roll, dims=-1) attn_mask_roll = torch.roll(attn_mask, roll, dims=-1)
...@@ -283,13 +288,21 @@ class ReformerModelTester: ...@@ -283,13 +288,21 @@ class ReformerModelTester:
torch.manual_seed(layer.attention_seed) torch.manual_seed(layer.attention_seed)
attn_outputs = layer.attention(hidden_states, attention_mask=input_mask) attn_outputs = layer.attention(hidden_states, attention_mask=input_mask)
self.parent.assertTrue( self.parent.assertTrue(
torch.allclose(prev_attn_output + attn_outputs.hidden_states, next_attn_output, atol=1e-3,) torch.allclose(
prev_attn_output + attn_outputs.hidden_states,
next_attn_output,
atol=1e-3,
)
) )
torch.manual_seed(layer.feed_forward_seed) torch.manual_seed(layer.feed_forward_seed)
feed_forward_hidden_states = layer.feed_forward(next_attn_output) feed_forward_hidden_states = layer.feed_forward(next_attn_output)
self.parent.assertTrue( self.parent.assertTrue(
torch.allclose(next_hidden_states, hidden_states + feed_forward_hidden_states, atol=1e-3,) torch.allclose(
next_hidden_states,
hidden_states + feed_forward_hidden_states,
atol=1e-3,
)
) )
def create_and_check_reformer_feed_backward_chunking(self, config, input_ids, input_mask, choice_labels): def create_and_check_reformer_feed_backward_chunking(self, config, input_ids, input_mask, choice_labels):
...@@ -416,7 +429,10 @@ class ReformerModelTester: ...@@ -416,7 +429,10 @@ class ReformerModelTester:
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
result = model( result = model(
input_ids, attention_mask=input_mask, start_positions=choice_labels, end_positions=choice_labels, input_ids,
attention_mask=input_mask,
start_positions=choice_labels,
end_positions=choice_labels,
) )
self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length)) self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
...@@ -468,7 +484,7 @@ class ReformerModelTester: ...@@ -468,7 +484,7 @@ class ReformerModelTester:
class ReformerTesterMixin: class ReformerTesterMixin:
""" """
Reformer Local and Reformer LSH run essentially the same tests Reformer Local and Reformer LSH run essentially the same tests
""" """
def test_config(self): def test_config(self):
...@@ -887,7 +903,9 @@ class ReformerIntegrationTests(unittest.TestCase): ...@@ -887,7 +903,9 @@ class ReformerIntegrationTests(unittest.TestCase):
reformer_output = layer(prev_attn_output=hidden_states.clone(), hidden_states=hidden_states) reformer_output = layer(prev_attn_output=hidden_states.clone(), hidden_states=hidden_states)
output_slice = reformer_output.hidden_states[0, 0, :5] output_slice = reformer_output.hidden_states[0, 0, :5]
expected_output_slice = torch.tensor( expected_output_slice = torch.tensor(
[1.6879, -1.3083, -0.4708, 1.3555, -0.6292], dtype=torch.float, device=torch_device, [1.6879, -1.3083, -0.4708, 1.3555, -0.6292],
dtype=torch.float,
device=torch_device,
) )
self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3)) self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
...@@ -902,11 +920,15 @@ class ReformerIntegrationTests(unittest.TestCase): ...@@ -902,11 +920,15 @@ class ReformerIntegrationTests(unittest.TestCase):
layer = ReformerLayer(ReformerConfig(**config)).to(torch_device) layer = ReformerLayer(ReformerConfig(**config)).to(torch_device)
layer.eval() layer.eval()
reformer_output = layer( reformer_output = layer(
prev_attn_output=hidden_states.clone(), hidden_states=hidden_states, attention_mask=attn_mask, prev_attn_output=hidden_states.clone(),
hidden_states=hidden_states,
attention_mask=attn_mask,
) )
output_slice = reformer_output.hidden_states[0, 0, :5] output_slice = reformer_output.hidden_states[0, 0, :5]
expected_output_slice = torch.tensor( expected_output_slice = torch.tensor(
[1.6439, -1.2306, -0.5108, 1.3006, -0.6537], dtype=torch.float, device=torch_device, [1.6439, -1.2306, -0.5108, 1.3006, -0.6537],
dtype=torch.float,
device=torch_device,
) )
self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3)) self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
...@@ -922,7 +944,9 @@ class ReformerIntegrationTests(unittest.TestCase): ...@@ -922,7 +944,9 @@ class ReformerIntegrationTests(unittest.TestCase):
reformer_output = layer(prev_attn_output=hidden_states, hidden_states=hidden_states) reformer_output = layer(prev_attn_output=hidden_states, hidden_states=hidden_states)
output_slice = reformer_output.hidden_states[0, 0, :5] output_slice = reformer_output.hidden_states[0, 0, :5]
expected_output_slice = torch.tensor( expected_output_slice = torch.tensor(
[1.4212, -2.0576, -0.9688, 1.4599, -0.1344], dtype=torch.float, device=torch_device, [1.4212, -2.0576, -0.9688, 1.4599, -0.1344],
dtype=torch.float,
device=torch_device,
) )
self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3)) self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
...@@ -935,10 +959,16 @@ class ReformerIntegrationTests(unittest.TestCase): ...@@ -935,10 +959,16 @@ class ReformerIntegrationTests(unittest.TestCase):
torch.manual_seed(0) torch.manual_seed(0)
layer = ReformerLayer(ReformerConfig(**config)).to(torch_device) layer = ReformerLayer(ReformerConfig(**config)).to(torch_device)
layer.eval() layer.eval()
reformer_output = layer(prev_attn_output=hidden_states, hidden_states=hidden_states, attention_mask=attn_mask,) reformer_output = layer(
prev_attn_output=hidden_states,
hidden_states=hidden_states,
attention_mask=attn_mask,
)
output_slice = reformer_output.hidden_states[0, 0, :5] output_slice = reformer_output.hidden_states[0, 0, :5]
expected_output_slice = torch.tensor( expected_output_slice = torch.tensor(
[1.4750, -2.0235, -0.9743, 1.4463, -0.1269], dtype=torch.float, device=torch_device, [1.4750, -2.0235, -0.9743, 1.4463, -0.1269],
dtype=torch.float,
device=torch_device,
) )
self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3)) self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
...@@ -953,7 +983,9 @@ class ReformerIntegrationTests(unittest.TestCase): ...@@ -953,7 +983,9 @@ class ReformerIntegrationTests(unittest.TestCase):
hidden_states = model(input_ids=input_ids, attention_mask=attn_mask)[0] hidden_states = model(input_ids=input_ids, attention_mask=attn_mask)[0]
output_slice = hidden_states[0, 0, :5] output_slice = hidden_states[0, 0, :5]
expected_output_slice = torch.tensor( expected_output_slice = torch.tensor(
[-0.9896, -0.9396, -1.0831, -0.0597, 0.2456], dtype=torch.float, device=torch_device, [-0.9896, -0.9396, -1.0831, -0.0597, 0.2456],
dtype=torch.float,
device=torch_device,
) )
self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3)) self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
...@@ -967,7 +999,9 @@ class ReformerIntegrationTests(unittest.TestCase): ...@@ -967,7 +999,9 @@ class ReformerIntegrationTests(unittest.TestCase):
hidden_states = model(input_ids=input_ids, attention_mask=attn_mask)[0] hidden_states = model(input_ids=input_ids, attention_mask=attn_mask)[0]
output_slice = hidden_states[0, 0, :5] output_slice = hidden_states[0, 0, :5]
expected_output_slice = torch.tensor( expected_output_slice = torch.tensor(
[-1.6791, 0.7171, 0.1594, 0.4063, 1.2584], dtype=torch.float, device=torch_device, [-1.6791, 0.7171, 0.1594, 0.4063, 1.2584],
dtype=torch.float,
device=torch_device,
) )
self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3)) self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
...@@ -983,7 +1017,9 @@ class ReformerIntegrationTests(unittest.TestCase): ...@@ -983,7 +1017,9 @@ class ReformerIntegrationTests(unittest.TestCase):
hidden_states = model(input_ids=input_ids, attention_mask=attn_mask)[0] hidden_states = model(input_ids=input_ids, attention_mask=attn_mask)[0]
output_slice = hidden_states[1, -1, :5] output_slice = hidden_states[1, -1, :5]
expected_output_slice = torch.tensor( expected_output_slice = torch.tensor(
[0.0256, -0.0121, 0.0636, 0.0024, -0.0393], dtype=torch.float, device=torch_device, [0.0256, -0.0121, 0.0636, 0.0024, -0.0393],
dtype=torch.float,
device=torch_device,
) )
self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3)) self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
...@@ -1005,15 +1041,21 @@ class ReformerIntegrationTests(unittest.TestCase): ...@@ -1005,15 +1041,21 @@ class ReformerIntegrationTests(unittest.TestCase):
# check last grads to cover all proable errors # check last grads to cover all proable errors
grad_slice_word = model.reformer.embeddings.word_embeddings.weight.grad[0, :5] grad_slice_word = model.reformer.embeddings.word_embeddings.weight.grad[0, :5]
expected_grad_slice_word = torch.tensor( expected_grad_slice_word = torch.tensor(
[-0.0005, 0.0001, 0.0002, 0.0003, 0.0006], dtype=torch.float, device=torch_device, [-0.0005, 0.0001, 0.0002, 0.0003, 0.0006],
dtype=torch.float,
device=torch_device,
) )
grad_slice_position_factor_1 = model.reformer.embeddings.position_embeddings.weights[0][1, 0, -5:] grad_slice_position_factor_1 = model.reformer.embeddings.position_embeddings.weights[0][1, 0, -5:]
expected_grad_slice_pos_fac_1 = torch.tensor( expected_grad_slice_pos_fac_1 = torch.tensor(
[0.0037, -1.3793, -1.0231, -1.5230, -2.5306], dtype=torch.float, device=torch_device, [0.0037, -1.3793, -1.0231, -1.5230, -2.5306],
dtype=torch.float,
device=torch_device,
) )
grad_slice_position_factor_2 = model.reformer.embeddings.position_embeddings.weights[1][0, 1, :5] grad_slice_position_factor_2 = model.reformer.embeddings.position_embeddings.weights[1][0, 1, :5]
expected_grad_slice_pos_fac_2 = torch.tensor( expected_grad_slice_pos_fac_2 = torch.tensor(
[-1.3165, 0.5168, 0.7785, 1.0811, -0.9830], dtype=torch.float, device=torch_device, [-1.3165, 0.5168, 0.7785, 1.0811, -0.9830],
dtype=torch.float,
device=torch_device,
) )
self.assertTrue(torch.allclose(grad_slice_word, expected_grad_slice_word, atol=1e-3)) self.assertTrue(torch.allclose(grad_slice_word, expected_grad_slice_word, atol=1e-3))
self.assertTrue(torch.allclose(grad_slice_position_factor_1, expected_grad_slice_pos_fac_1, atol=1e-3)) self.assertTrue(torch.allclose(grad_slice_position_factor_1, expected_grad_slice_pos_fac_1, atol=1e-3))
...@@ -1038,15 +1080,21 @@ class ReformerIntegrationTests(unittest.TestCase): ...@@ -1038,15 +1080,21 @@ class ReformerIntegrationTests(unittest.TestCase):
# check last grads to cover all proable errors # check last grads to cover all proable errors
grad_slice_word = model.reformer.embeddings.word_embeddings.weight.grad[0, :5] grad_slice_word = model.reformer.embeddings.word_embeddings.weight.grad[0, :5]
expected_grad_slice_word = torch.tensor( expected_grad_slice_word = torch.tensor(
[2.6357e-05, 4.3358e-04, -8.4985e-04, 1.0094e-04, 3.8954e-04], dtype=torch.float, device=torch_device, [2.6357e-05, 4.3358e-04, -8.4985e-04, 1.0094e-04, 3.8954e-04],
dtype=torch.float,
device=torch_device,
) )
grad_slice_position_factor_1 = model.reformer.embeddings.position_embeddings.weights[0][1, 0, -5:] grad_slice_position_factor_1 = model.reformer.embeddings.position_embeddings.weights[0][1, 0, -5:]
expected_grad_slice_pos_fac_1 = torch.tensor( expected_grad_slice_pos_fac_1 = torch.tensor(
[-0.0984, 0.6283, 0.4282, 1.2960, 0.6897], dtype=torch.float, device=torch_device, [-0.0984, 0.6283, 0.4282, 1.2960, 0.6897],
dtype=torch.float,
device=torch_device,
) )
grad_slice_position_factor_2 = model.reformer.embeddings.position_embeddings.weights[1][0, 1, :5] grad_slice_position_factor_2 = model.reformer.embeddings.position_embeddings.weights[1][0, 1, :5]
expected_grad_slice_pos_fac_2 = torch.tensor( expected_grad_slice_pos_fac_2 = torch.tensor(
[0.4626, -0.0231, -0.0172, 0.1081, 0.3805], dtype=torch.float, device=torch_device, [0.4626, -0.0231, -0.0172, 0.1081, 0.3805],
dtype=torch.float,
device=torch_device,
) )
self.assertTrue(torch.allclose(grad_slice_word, expected_grad_slice_word, atol=1e-3)) self.assertTrue(torch.allclose(grad_slice_word, expected_grad_slice_word, atol=1e-3))
self.assertTrue(torch.allclose(grad_slice_position_factor_1, expected_grad_slice_pos_fac_1, atol=1e-3)) self.assertTrue(torch.allclose(grad_slice_position_factor_1, expected_grad_slice_pos_fac_1, atol=1e-3))
......
...@@ -45,7 +45,8 @@ if is_torch_available(): ...@@ -45,7 +45,8 @@ if is_torch_available():
class RobertaModelTester: class RobertaModelTester:
def __init__( def __init__(
self, parent, self,
parent,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = 13
...@@ -352,7 +353,7 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -352,7 +353,7 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
self.assertIsNotNone(model) self.assertIsNotNone(model)
def test_create_position_ids_respects_padding_index(self): def test_create_position_ids_respects_padding_index(self):
""" Ensure that the default position ids only assign a sequential . This is a regression """Ensure that the default position ids only assign a sequential . This is a regression
test for https://github.com/huggingface/transformers/issues/1761 test for https://github.com/huggingface/transformers/issues/1761
The position ids should be masked with the embedding object's padding index. Therefore, the The position ids should be masked with the embedding object's padding index. Therefore, the
...@@ -371,7 +372,7 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -371,7 +372,7 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
self.assertTrue(torch.all(torch.eq(position_ids, expected_positions))) self.assertTrue(torch.all(torch.eq(position_ids, expected_positions)))
def test_create_position_ids_from_inputs_embeds(self): def test_create_position_ids_from_inputs_embeds(self):
""" Ensure that the default position ids only assign a sequential . This is a regression """Ensure that the default position ids only assign a sequential . This is a regression
test for https://github.com/huggingface/transformers/issues/1761 test for https://github.com/huggingface/transformers/issues/1761
The position ids should be masked with the embedding object's padding index. Therefore, the The position ids should be masked with the embedding object's padding index. Therefore, the
......
...@@ -101,7 +101,13 @@ class T5ModelTester: ...@@ -101,7 +101,13 @@ class T5ModelTester:
) )
def check_prepare_lm_labels_via_shift_left( def check_prepare_lm_labels_via_shift_left(
self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, self,
config,
input_ids,
decoder_input_ids,
attention_mask,
decoder_attention_mask,
lm_labels,
): ):
model = T5Model(config=config) model = T5Model(config=config)
model.to(torch_device) model.to(torch_device)
...@@ -134,7 +140,13 @@ class T5ModelTester: ...@@ -134,7 +140,13 @@ class T5ModelTester:
self.parent.assertListEqual(decoder_input_ids_slice[1:].tolist(), lm_labels_slice[:-1].tolist()) self.parent.assertListEqual(decoder_input_ids_slice[1:].tolist(), lm_labels_slice[:-1].tolist())
def create_and_check_model( def create_and_check_model(
self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, self,
config,
input_ids,
decoder_input_ids,
attention_mask,
decoder_attention_mask,
lm_labels,
): ):
model = T5Model(config=config) model = T5Model(config=config)
model.to(torch_device) model.to(torch_device)
...@@ -160,7 +172,13 @@ class T5ModelTester: ...@@ -160,7 +172,13 @@ class T5ModelTester:
self.parent.assertEqual(len(decoder_past[1][0]), 4) self.parent.assertEqual(len(decoder_past[1][0]), 4)
def create_and_check_with_lm_head( def create_and_check_with_lm_head(
self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, self,
config,
input_ids,
decoder_input_ids,
attention_mask,
decoder_attention_mask,
lm_labels,
): ):
model = T5ForConditionalGeneration(config=config).to(torch_device).eval() model = T5ForConditionalGeneration(config=config).to(torch_device).eval()
outputs = model( outputs = model(
...@@ -174,7 +192,13 @@ class T5ModelTester: ...@@ -174,7 +192,13 @@ class T5ModelTester:
self.parent.assertEqual(outputs["loss"].size(), ()) self.parent.assertEqual(outputs["loss"].size(), ())
def create_and_check_decoder_model_past( def create_and_check_decoder_model_past(
self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, self,
config,
input_ids,
decoder_input_ids,
attention_mask,
decoder_attention_mask,
lm_labels,
): ):
model = T5Model(config=config).get_decoder().to(torch_device).eval() model = T5Model(config=config).get_decoder().to(torch_device).eval()
# first forward pass # first forward pass
...@@ -205,7 +229,13 @@ class T5ModelTester: ...@@ -205,7 +229,13 @@ class T5ModelTester:
self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3)) self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3))
def create_and_check_decoder_model_attention_mask_past( def create_and_check_decoder_model_attention_mask_past(
self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, self,
config,
input_ids,
decoder_input_ids,
attention_mask,
decoder_attention_mask,
lm_labels,
): ):
model = T5Model(config=config).get_decoder() model = T5Model(config=config).get_decoder()
model.to(torch_device) model.to(torch_device)
...@@ -231,7 +261,8 @@ class T5ModelTester: ...@@ -231,7 +261,8 @@ class T5ModelTester:
# append to next input_ids and attn_mask # append to next input_ids and attn_mask
next_input_ids = torch.cat([input_ids, next_tokens], dim=-1) next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)
attn_mask = torch.cat( attn_mask = torch.cat(
[attn_mask, torch.ones((attn_mask.shape[0], 1), dtype=torch.long, device=torch_device)], dim=1, [attn_mask, torch.ones((attn_mask.shape[0], 1), dtype=torch.long, device=torch_device)],
dim=1,
) )
# get two different outputs # get two different outputs
...@@ -249,7 +280,13 @@ class T5ModelTester: ...@@ -249,7 +280,13 @@ class T5ModelTester:
self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3)) self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3))
def create_and_check_generate_with_past_key_value_states( def create_and_check_generate_with_past_key_value_states(
self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, self,
config,
input_ids,
decoder_input_ids,
attention_mask,
decoder_attention_mask,
lm_labels,
): ):
model = T5ForConditionalGeneration(config=config).to(torch_device).eval() model = T5ForConditionalGeneration(config=config).to(torch_device).eval()
torch.manual_seed(0) torch.manual_seed(0)
...@@ -261,14 +298,26 @@ class T5ModelTester: ...@@ -261,14 +298,26 @@ class T5ModelTester:
self.parent.assertTrue(torch.all(output_with_past_cache == output_without_past_cache)) self.parent.assertTrue(torch.all(output_with_past_cache == output_without_past_cache))
def create_and_check_model_fp16_forward( def create_and_check_model_fp16_forward(
self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, self,
config,
input_ids,
decoder_input_ids,
attention_mask,
decoder_attention_mask,
lm_labels,
): ):
model = T5Model(config=config).to(torch_device).half().eval() model = T5Model(config=config).to(torch_device).half().eval()
output = model(input_ids, decoder_input_ids=input_ids, attention_mask=attention_mask)["last_hidden_state"] output = model(input_ids, decoder_input_ids=input_ids, attention_mask=attention_mask)["last_hidden_state"]
self.parent.assertFalse(torch.isnan(output).any().item()) self.parent.assertFalse(torch.isnan(output).any().item())
def create_and_check_encoder_decoder_shared_weights( def create_and_check_encoder_decoder_shared_weights(
self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels, self,
config,
input_ids,
decoder_input_ids,
attention_mask,
decoder_attention_mask,
lm_labels,
): ):
for model_class in [T5Model, T5ForConditionalGeneration]: for model_class in [T5Model, T5ForConditionalGeneration]:
torch.manual_seed(0) torch.manual_seed(0)
...@@ -339,7 +388,14 @@ class T5ModelTester: ...@@ -339,7 +388,14 @@ class T5ModelTester:
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,) = config_and_inputs (
config,
input_ids,
decoder_input_ids,
attention_mask,
decoder_attention_mask,
lm_labels,
) = config_and_inputs
inputs_dict = { inputs_dict = {
"input_ids": input_ids, "input_ids": input_ids,
...@@ -412,7 +468,11 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -412,7 +468,11 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase):
model = T5Model(config_and_inputs[0]).to(torch_device) model = T5Model(config_and_inputs[0]).to(torch_device)
with tempfile.TemporaryDirectory() as tmpdirname: with tempfile.TemporaryDirectory() as tmpdirname:
torch.onnx.export( torch.onnx.export(
model, config_and_inputs[1], f"{tmpdirname}/t5_test.onnx", export_params=True, opset_version=9, model,
config_and_inputs[1],
f"{tmpdirname}/t5_test.onnx",
export_params=True,
opset_version=9,
) )
...@@ -469,7 +529,8 @@ class T5ModelIntegrationTests(unittest.TestCase): ...@@ -469,7 +529,8 @@ class T5ModelIntegrationTests(unittest.TestCase):
) )
decoded = tok.batch_decode(hypotheses_batch, skip_special_tokens=True, clean_up_tokenization_spaces=False) decoded = tok.batch_decode(hypotheses_batch, skip_special_tokens=True, clean_up_tokenization_spaces=False)
self.assertListEqual( self.assertListEqual(
expected_summaries, decoded, expected_summaries,
decoded,
) )
@slow @slow
......
...@@ -33,7 +33,8 @@ class TFCamembertModelIntegrationTest(unittest.TestCase): ...@@ -33,7 +33,8 @@ class TFCamembertModelIntegrationTest(unittest.TestCase):
model = TFCamembertModel.from_pretrained("jplu/tf-camembert-base") model = TFCamembertModel.from_pretrained("jplu/tf-camembert-base")
input_ids = tf.convert_to_tensor( input_ids = tf.convert_to_tensor(
[[5, 121, 11, 660, 16, 730, 25543, 110, 83, 6]], dtype=tf.int32, [[5, 121, 11, 660, 16, 730, 25543, 110, 83, 6]],
dtype=tf.int32,
) # J'aime le camembert !" ) # J'aime le camembert !"
output = model(input_ids)["last_hidden_state"] output = model(input_ids)["last_hidden_state"]
...@@ -41,7 +42,8 @@ class TFCamembertModelIntegrationTest(unittest.TestCase): ...@@ -41,7 +42,8 @@ class TFCamembertModelIntegrationTest(unittest.TestCase):
self.assertEqual(output.shape, expected_shape) self.assertEqual(output.shape, expected_shape)
# compare the actual values for a slice. # compare the actual values for a slice.
expected_slice = tf.convert_to_tensor( expected_slice = tf.convert_to_tensor(
[[[-0.0254, 0.0235, 0.1027], [0.0606, -0.1811, -0.0418], [-0.1561, -0.1127, 0.2687]]], dtype=tf.float32, [[[-0.0254, 0.0235, 0.1027], [0.0606, -0.1811, -0.0418], [-0.1561, -0.1127, 0.2687]]],
dtype=tf.float32,
) )
# camembert = torch.hub.load('pytorch/fairseq', 'camembert.v0') # camembert = torch.hub.load('pytorch/fairseq', 'camembert.v0')
# camembert.eval() # camembert.eval()
......
...@@ -155,7 +155,8 @@ class TFModelTesterMixin: ...@@ -155,7 +155,8 @@ class TFModelTesterMixin:
self.assertEqual(len(outputs), num_out) self.assertEqual(len(outputs), num_out)
self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1) self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
self.assertListEqual( self.assertListEqual(
list(hidden_states[0].shape[-2:]), [self.model_tester.seq_length, self.model_tester.hidden_size], list(hidden_states[0].shape[-2:]),
[self.model_tester.seq_length, self.model_tester.hidden_size],
) )
@slow @slow
...@@ -486,7 +487,8 @@ class TFModelTesterMixin: ...@@ -486,7 +487,8 @@ class TFModelTesterMixin:
hidden_states = [t.numpy() for t in outputs[-1]] hidden_states = [t.numpy() for t in outputs[-1]]
self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1) self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
self.assertListEqual( self.assertListEqual(
list(hidden_states[0].shape[-2:]), [self.model_tester.seq_length, self.model_tester.hidden_size], list(hidden_states[0].shape[-2:]),
[self.model_tester.seq_length, self.model_tester.hidden_size],
) )
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
...@@ -591,9 +593,15 @@ class TFModelTesterMixin: ...@@ -591,9 +593,15 @@ class TFModelTesterMixin:
x = wte([input_ids, None, None, None], mode="embedding") x = wte([input_ids, None, None, None], mode="embedding")
except Exception: except Exception:
if hasattr(self.model_tester, "embedding_size"): if hasattr(self.model_tester, "embedding_size"):
x = tf.ones(input_ids.shape + [self.model_tester.embedding_size], dtype=tf.dtypes.float32,) x = tf.ones(
input_ids.shape + [self.model_tester.embedding_size],
dtype=tf.dtypes.float32,
)
else: else:
x = tf.ones(input_ids.shape + [self.model_tester.hidden_size], dtype=tf.dtypes.float32,) x = tf.ones(
input_ids.shape + [self.model_tester.hidden_size],
dtype=tf.dtypes.float32,
)
return x return x
def test_inputs_embeds(self): def test_inputs_embeds(self):
...@@ -700,7 +708,14 @@ class TFModelTesterMixin: ...@@ -700,7 +708,14 @@ class TFModelTesterMixin:
model.generate(input_ids, do_sample=False, num_return_sequences=3, num_beams=2) model.generate(input_ids, do_sample=False, num_return_sequences=3, num_beams=2)
# num_return_sequences > 1, sample # num_return_sequences > 1, sample
self._check_generated_ids(model.generate(input_ids, do_sample=True, num_beams=2, num_return_sequences=2,)) self._check_generated_ids(
model.generate(
input_ids,
do_sample=True,
num_beams=2,
num_return_sequences=2,
)
)
# num_return_sequences > 1, greedy # num_return_sequences > 1, greedy
self._check_generated_ids(model.generate(input_ids, do_sample=False, num_beams=2, num_return_sequences=2)) self._check_generated_ids(model.generate(input_ids, do_sample=False, num_beams=2, num_return_sequences=2))
...@@ -895,7 +910,8 @@ class UtilsFunctionsTest(unittest.TestCase): ...@@ -895,7 +910,8 @@ class UtilsFunctionsTest(unittest.TestCase):
) )
non_inf_expected_idx = tf.convert_to_tensor( non_inf_expected_idx = tf.convert_to_tensor(
[[0, 0], [0, 9], [0, 10], [0, 25], [0, 26], [1, 13], [1, 17], [1, 18], [1, 20], [1, 27]], dtype=tf.int32, [[0, 0], [0, 9], [0, 10], [0, 25], [0, 26], [1, 13], [1, 17], [1, 18], [1, 20], [1, 27]],
dtype=tf.int32,
) # expected non filtered idx as noted above ) # expected non filtered idx as noted above
non_inf_expected_output = tf.convert_to_tensor( non_inf_expected_output = tf.convert_to_tensor(
...@@ -907,7 +923,8 @@ class UtilsFunctionsTest(unittest.TestCase): ...@@ -907,7 +923,8 @@ class UtilsFunctionsTest(unittest.TestCase):
non_inf_output = output[output != -float("inf")] non_inf_output = output[output != -float("inf")]
non_inf_idx = tf.cast( non_inf_idx = tf.cast(
tf.where(tf.not_equal(output, tf.constant(-float("inf"), dtype=tf.float32))), dtype=tf.int32, tf.where(tf.not_equal(output, tf.constant(-float("inf"), dtype=tf.float32))),
dtype=tf.int32,
) )
tf.debugging.assert_near(non_inf_output, non_inf_expected_output, rtol=1e-12) tf.debugging.assert_near(non_inf_output, non_inf_expected_output, rtol=1e-12)
......
...@@ -31,7 +31,8 @@ if is_tf_available(): ...@@ -31,7 +31,8 @@ if is_tf_available():
class TFCTRLModelTester(object): class TFCTRLModelTester(object):
def __init__( def __init__(
self, parent, self,
parent,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = 13
......
...@@ -39,7 +39,8 @@ if is_tf_available(): ...@@ -39,7 +39,8 @@ if is_tf_available():
class TFDistilBertModelTester: class TFDistilBertModelTester:
def __init__( def __init__(
self, parent, self,
parent,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = 13
......
...@@ -39,7 +39,8 @@ if is_tf_available(): ...@@ -39,7 +39,8 @@ if is_tf_available():
class TFElectraModelTester: class TFElectraModelTester:
def __init__( def __init__(
self, parent, self,
parent,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = 13
......
...@@ -40,7 +40,8 @@ if is_tf_available(): ...@@ -40,7 +40,8 @@ if is_tf_available():
class TFFlaubertModelTester: class TFFlaubertModelTester:
def __init__( def __init__(
self, parent, self,
parent,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = 13
...@@ -337,7 +338,8 @@ class TFFlaubertModelIntegrationTest(unittest.TestCase): ...@@ -337,7 +338,8 @@ class TFFlaubertModelIntegrationTest(unittest.TestCase):
model = TFFlaubertModel.from_pretrained("jplu/tf-flaubert-small-cased") model = TFFlaubertModel.from_pretrained("jplu/tf-flaubert-small-cased")
input_ids = tf.convert_to_tensor( input_ids = tf.convert_to_tensor(
[[0, 158, 735, 2592, 1424, 6727, 82, 1]], dtype=tf.int32, [[0, 158, 735, 2592, 1424, 6727, 82, 1]],
dtype=tf.int32,
) # "J'aime flaubert !" ) # "J'aime flaubert !"
output = model(input_ids)[0] output = model(input_ids)[0]
......
...@@ -37,7 +37,8 @@ if is_tf_available(): ...@@ -37,7 +37,8 @@ if is_tf_available():
class TFGPT2ModelTester: class TFGPT2ModelTester:
def __init__( def __init__(
self, parent, self,
parent,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = 13
......
...@@ -36,7 +36,7 @@ if is_tf_available(): ...@@ -36,7 +36,7 @@ if is_tf_available():
def shape_list(x): def shape_list(x):
""" """
copied from transformers.modeling_tf_utils copied from transformers.modeling_tf_utils
""" """
static = x.shape.as_list() static = x.shape.as_list()
dynamic = tf.shape(x) dynamic = tf.shape(x)
...@@ -45,7 +45,8 @@ if is_tf_available(): ...@@ -45,7 +45,8 @@ if is_tf_available():
class TFLongformerModelTester: class TFLongformerModelTester:
def __init__( def __init__(
self, parent, self,
parent,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = 13
...@@ -228,7 +229,8 @@ class TFLongformerModelTester: ...@@ -228,7 +229,8 @@ class TFLongformerModelTester:
# global attention mask has to be partly defined # global attention mask has to be partly defined
# to trace all weights # to trace all weights
global_attention_mask = tf.concat( global_attention_mask = tf.concat(
[tf.zeros_like(input_ids)[:, :-1], tf.ones_like(input_ids)[:, -1:]], axis=-1, [tf.zeros_like(input_ids)[:, :-1], tf.ones_like(input_ids)[:, -1:]],
axis=-1,
) )
inputs_dict = { inputs_dict = {
...@@ -267,7 +269,13 @@ class TFLongformerModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -267,7 +269,13 @@ class TFLongformerModelTest(TFModelTesterMixin, unittest.TestCase):
test_torchscript = False test_torchscript = False
all_model_classes = ( all_model_classes = (
(TFLongformerModel, TFLongformerForMaskedLM, TFLongformerForQuestionAnswering,) if is_tf_available() else () (
TFLongformerModel,
TFLongformerForMaskedLM,
TFLongformerForQuestionAnswering,
)
if is_tf_available()
else ()
) )
def setUp(self): def setUp(self):
......
...@@ -36,7 +36,8 @@ if is_tf_available(): ...@@ -36,7 +36,8 @@ if is_tf_available():
class TFOpenAIGPTModelTester: class TFOpenAIGPTModelTester:
def __init__( def __init__(
self, parent, self,
parent,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = 13
......
...@@ -40,7 +40,8 @@ if is_tf_available(): ...@@ -40,7 +40,8 @@ if is_tf_available():
class TFRobertaModelTester: class TFRobertaModelTester:
def __init__( def __init__(
self, parent, self,
parent,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = 13
......
...@@ -32,7 +32,8 @@ if is_tf_available(): ...@@ -32,7 +32,8 @@ if is_tf_available():
class TFT5ModelTester: class TFT5ModelTester:
def __init__( def __init__(
self, parent, self,
parent,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = 13
...@@ -181,7 +182,10 @@ class TFT5ModelTester: ...@@ -181,7 +182,10 @@ class TFT5ModelTester:
# append to next input_ids and attn_mask # append to next input_ids and attn_mask
next_input_ids = tf.concat([input_ids, next_tokens], axis=-1) next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
attn_mask = tf.concat([attn_mask, tf.ones((attn_mask.shape[0], 1), dtype=tf.int32)], axis=1,) attn_mask = tf.concat(
[attn_mask, tf.ones((attn_mask.shape[0], 1), dtype=tf.int32)],
axis=1,
)
# get two different outputs # get two different outputs
output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0] output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0]
......
...@@ -32,7 +32,8 @@ if is_tf_available(): ...@@ -32,7 +32,8 @@ if is_tf_available():
class TFTransfoXLModelTester: class TFTransfoXLModelTester:
def __init__( def __init__(
self, parent, self,
parent,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = 13
......
...@@ -40,7 +40,8 @@ if is_tf_available(): ...@@ -40,7 +40,8 @@ if is_tf_available():
class TFXLMModelTester: class TFXLMModelTester:
def __init__( def __init__(
self, parent, self,
parent,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = 13
......
...@@ -40,7 +40,8 @@ if is_tf_available(): ...@@ -40,7 +40,8 @@ if is_tf_available():
class TFXLNetModelTester: class TFXLNetModelTester:
def __init__( def __init__(
self, parent, self,
parent,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = 13
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment