Unverified Commit 1073a2bd authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Switch `return_dict` to `True` by default. (#8530)

* Use the CI to identify failing tests

* Remove from all examples and tests

* More default switch

* Fixes

* More test fixes

* More fixes

* Last fixes hopefully

* Use the CI to identify failing tests

* Remove from all examples and tests

* More default switch

* Fixes

* More test fixes

* More fixes

* Last fixes hopefully

* Run on the real suite

* Fix slow tests
parent 0d0a0785
...@@ -1063,7 +1063,7 @@ class TFBartForConditionalGeneration(TFPretrainedBartModel): ...@@ -1063,7 +1063,7 @@ class TFBartForConditionalGeneration(TFPretrainedBartModel):
TXT = "My friends are <mask> but they eat too many carbs." TXT = "My friends are <mask> but they eat too many carbs."
model = TFBartForConditionalGeneration.from_pretrained(mname) model = TFBartForConditionalGeneration.from_pretrained(mname)
batch = tokenizer([TXT], return_tensors='tf') batch = tokenizer([TXT], return_tensors='tf')
logits = model(inputs=batch.input_ids, return_dict=True).logits logits = model(inputs=batch.input_ids).logits
probs = tf.nn.softmax(logits[0]) probs = tf.nn.softmax(logits[0])
# probs[5] is associated with the mask token # probs[5] is associated with the mask token
""" """
......
...@@ -466,7 +466,7 @@ class TFDPRContextEncoder(TFDPRPretrainedContextEncoder): ...@@ -466,7 +466,7 @@ class TFDPRContextEncoder(TFDPRPretrainedContextEncoder):
>>> from transformers import TFDPRContextEncoder, DPRContextEncoderTokenizer >>> from transformers import TFDPRContextEncoder, DPRContextEncoderTokenizer
>>> tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base') >>> tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
>>> model = TFDPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', return_dict=True, from_pt=True) >>> model = TFDPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', from_pt=True)
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='tf')["input_ids"] >>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='tf')["input_ids"]
>>> embeddings = model(input_ids).pooler_output >>> embeddings = model(input_ids).pooler_output
""" """
...@@ -565,7 +565,7 @@ class TFDPRQuestionEncoder(TFDPRPretrainedQuestionEncoder): ...@@ -565,7 +565,7 @@ class TFDPRQuestionEncoder(TFDPRPretrainedQuestionEncoder):
>>> from transformers import TFDPRQuestionEncoder, DPRQuestionEncoderTokenizer >>> from transformers import TFDPRQuestionEncoder, DPRQuestionEncoderTokenizer
>>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base') >>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
>>> model = TFDPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base', return_dict=True, from_pt=True) >>> model = TFDPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base', from_pt=True)
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='tf')["input_ids"] >>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='tf')["input_ids"]
>>> embeddings = model(input_ids).pooler_output >>> embeddings = model(input_ids).pooler_output
""" """
...@@ -663,7 +663,7 @@ class TFDPRReader(TFDPRPretrainedReader): ...@@ -663,7 +663,7 @@ class TFDPRReader(TFDPRPretrainedReader):
>>> from transformers import TFDPRReader, DPRReaderTokenizer >>> from transformers import TFDPRReader, DPRReaderTokenizer
>>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base') >>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base')
>>> model = TFDPRReader.from_pretrained('facebook/dpr-reader-single-nq-base', return_dict=True, from_pt=True) >>> model = TFDPRReader.from_pretrained('facebook/dpr-reader-single-nq-base', from_pt=True)
>>> encoded_inputs = tokenizer( >>> encoded_inputs = tokenizer(
... questions=["What is love ?"], ... questions=["What is love ?"],
... titles=["Haddaway"], ... titles=["Haddaway"],
......
...@@ -634,7 +634,7 @@ class TFFunnelEncoder(tf.keras.layers.Layer): ...@@ -634,7 +634,7 @@ class TFFunnelEncoder(tf.keras.layers.Layer):
token_type_ids=None, token_type_ids=None,
output_attentions=False, output_attentions=False,
output_hidden_states=False, output_hidden_states=False,
return_dict=False, return_dict=True,
training=False, training=False,
): ):
# The pooling is not implemented on long tensors, so we convert this mask. # The pooling is not implemented on long tensors, so we convert this mask.
...@@ -719,7 +719,7 @@ class TFFunnelDecoder(tf.keras.layers.Layer): ...@@ -719,7 +719,7 @@ class TFFunnelDecoder(tf.keras.layers.Layer):
token_type_ids=None, token_type_ids=None,
output_attentions=False, output_attentions=False,
output_hidden_states=False, output_hidden_states=False,
return_dict=False, return_dict=True,
training=False, training=False,
): ):
upsampled_hidden = upsample( upsampled_hidden = upsample(
......
...@@ -1275,6 +1275,7 @@ class TFLxmertForPreTraining(TFLxmertPreTrainedModel): ...@@ -1275,6 +1275,7 @@ class TFLxmertForPreTraining(TFLxmertPreTrainedModel):
obj_labels = inputs.pop("obj_labels", obj_labels) obj_labels = inputs.pop("obj_labels", obj_labels)
matched_label = inputs.pop("matched_label", matched_label) matched_label = inputs.pop("matched_label", matched_label)
ans = inputs.pop("ans", ans) ans = inputs.pop("ans", ans)
return_dict = return_dict if return_dict is not None else self.lxmert.return_dict
lxmert_output = self.lxmert( lxmert_output = self.lxmert(
inputs, inputs,
......
...@@ -1022,7 +1022,7 @@ class TFT5Model(TFT5PreTrainedModel): ...@@ -1022,7 +1022,7 @@ class TFT5Model(TFT5PreTrainedModel):
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="tf").input_ids # Batch size 1 >>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="tf").input_ids # Batch size 1
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="tf").input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="tf").input_ids # Batch size 1
>>> outputs = model(input_ids, decoder_input_ids=decoder_input_ids, return_dict=True) >>> outputs = model(input_ids, decoder_input_ids=decoder_input_ids)
""" """
...@@ -1219,7 +1219,7 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling ...@@ -1219,7 +1219,7 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling
>>> from transformers import T5Tokenizer, TFT5ForConditionalGeneration >>> from transformers import T5Tokenizer, TFT5ForConditionalGeneration
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small', return_dict=True) >>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = TFT5ForConditionalGeneration.from_pretrained('t5-small') >>> model = TFT5ForConditionalGeneration.from_pretrained('t5-small')
>>> inputs = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='tf').input_ids >>> inputs = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='tf').input_ids
......
...@@ -1020,7 +1020,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel): ...@@ -1020,7 +1020,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
>>> import torch >>> import torch
>>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') >>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>> model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048', return_dict=True) >>> model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
>>> start_positions = torch.tensor([1]) >>> start_positions = torch.tensor([1])
......
...@@ -46,7 +46,7 @@ class XLMProphetNetEncoder(ProphetNetEncoder): ...@@ -46,7 +46,7 @@ class XLMProphetNetEncoder(ProphetNetEncoder):
>>> import torch >>> import torch
>>> tokenizer = XLMProphetNetTokenizer.from_pretrained('microsoft/xprophetnet-large-wiki100-cased') >>> tokenizer = XLMProphetNetTokenizer.from_pretrained('microsoft/xprophetnet-large-wiki100-cased')
>>> model = XLMProphetNetEncoder.from_pretrained('patrickvonplaten/xprophetnet-large-uncased-standalone', return_dict=True) >>> model = XLMProphetNetEncoder.from_pretrained('patrickvonplaten/xprophetnet-large-uncased-standalone')
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder." >>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
...@@ -68,7 +68,7 @@ class XLMProphetNetDecoder(ProphetNetDecoder): ...@@ -68,7 +68,7 @@ class XLMProphetNetDecoder(ProphetNetDecoder):
>>> import torch >>> import torch
>>> tokenizer = XLMProphetNetTokenizer.from_pretrained('microsoft/xprophetnet-large-wiki100-cased') >>> tokenizer = XLMProphetNetTokenizer.from_pretrained('microsoft/xprophetnet-large-wiki100-cased')
>>> model = XLMProphetNetDecoder.from_pretrained('patrickvonplaten/xprophetnet-large-uncased-standalone', add_cross_attention=False, return_dict=True) >>> model = XLMProphetNetDecoder.from_pretrained('patrickvonplaten/xprophetnet-large-uncased-standalone', add_cross_attention=False)
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder." >>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
...@@ -93,7 +93,7 @@ class XLMProphetNetModel(ProphetNetModel): ...@@ -93,7 +93,7 @@ class XLMProphetNetModel(ProphetNetModel):
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1 >>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids, return_dict=True) >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
>>> last_hidden_states = outputs.last_hidden_state # main stream hidden states >>> last_hidden_states = outputs.last_hidden_state # main stream hidden states
>>> last_hidden_states_ngram = outputs.last_hidden_state_ngram # predict hidden states >>> last_hidden_states_ngram = outputs.last_hidden_state_ngram # predict hidden states
...@@ -116,7 +116,7 @@ class XLMProphetNetForConditionalGeneration(ProphetNetForConditionalGeneration): ...@@ -116,7 +116,7 @@ class XLMProphetNetForConditionalGeneration(ProphetNetForConditionalGeneration):
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1 >>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids, return_dict=True) >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
>>> logits_next_token = outputs.logits # logits to predict next token as usual >>> logits_next_token = outputs.logits # logits to predict next token as usual
>>> logits_ngram_next_tokens = outputs.logits_ngram # logits to predict 2nd, 3rd, ... next tokens >>> logits_ngram_next_tokens = outputs.logits_ngram # logits to predict 2nd, 3rd, ... next tokens
...@@ -136,7 +136,7 @@ class XLMProphetNetForCausalLM(ProphetNetForCausalLM): ...@@ -136,7 +136,7 @@ class XLMProphetNetForCausalLM(ProphetNetForCausalLM):
>>> import torch >>> import torch
>>> tokenizer = XLMProphetNetTokenizer.from_pretrained('microsoft/xprophetnet-large-wiki100-cased') >>> tokenizer = XLMProphetNetTokenizer.from_pretrained('microsoft/xprophetnet-large-wiki100-cased')
>>> model = XLMProphetNetForCausalLM.from_pretrained('patrickvonplaten/xprophetnet-decoder-clm-large-uncased', return_dict=True) >>> model = XLMProphetNetForCausalLM.from_pretrained('patrickvonplaten/xprophetnet-decoder-clm-large-uncased')
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder." >>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
...@@ -158,7 +158,7 @@ class XLMProphetNetForCausalLM(ProphetNetForCausalLM): ...@@ -158,7 +158,7 @@ class XLMProphetNetForCausalLM(ProphetNetForCausalLM):
... ) ... )
>>> input_ids = tokenizer_enc(ARTICLE, return_tensors="pt").input_ids >>> input_ids = tokenizer_enc(ARTICLE, return_tensors="pt").input_ids
>>> labels = tokenizer_dec("us rejects charges against its ambassador in bolivia", return_tensors="pt").input_ids >>> labels = tokenizer_dec("us rejects charges against its ambassador in bolivia", return_tensors="pt").input_ids
>>> outputs = model(input_ids=input_ids, decoder_input_ids=labels[:, :-1], labels=labels[:, 1:], return_dict=True) >>> outputs = model(input_ids=input_ids, decoder_input_ids=labels[:, :-1], labels=labels[:, 1:])
>>> loss = outputs.loss >>> loss = outputs.loss
""" """
......
...@@ -1381,7 +1381,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel): ...@@ -1381,7 +1381,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
>>> import torch >>> import torch
>>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') >>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
>>> model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased', return_dict=True) >>> model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased')
>>> # We show how to setup inputs to predict a next token using a bi-directional context. >>> # We show how to setup inputs to predict a next token using a bi-directional context.
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=False)).unsqueeze(0) # We will predict the masked token >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=False)).unsqueeze(0) # We will predict the masked token
...@@ -1916,7 +1916,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel): ...@@ -1916,7 +1916,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
>>> import torch >>> import torch
>>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased') >>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
>>> model = XLNetForQuestionAnswering.from_pretrained('xlnet-base-cased', return_dict=True) >>> model = XLNetForQuestionAnswering.from_pretrained('xlnet-base-cased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
>>> start_positions = torch.tensor([1]) >>> start_positions = torch.tensor([1])
......
...@@ -118,7 +118,6 @@ class {{cookiecutter.camelcase_modelname}}ModelTester: ...@@ -118,7 +118,6 @@ class {{cookiecutter.camelcase_modelname}}ModelTester:
type_vocab_size=self.type_vocab_size, type_vocab_size=self.type_vocab_size,
is_decoder=False, is_decoder=False,
initializer_range=self.initializer_range, initializer_range=self.initializer_range,
return_dict=True,
) )
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
......
...@@ -118,7 +118,7 @@ class GenerationTesterMixin: ...@@ -118,7 +118,7 @@ class GenerationTesterMixin:
@staticmethod @staticmethod
def _get_encoder_outputs(model, input_ids, attention_mask, num_interleave=1): def _get_encoder_outputs(model, input_ids, attention_mask, num_interleave=1):
encoder = model.get_encoder() encoder = model.get_encoder()
encoder_outputs = encoder(input_ids, attention_mask=attention_mask, return_dict=True) encoder_outputs = encoder(input_ids, attention_mask=attention_mask)
encoder_outputs["last_hidden_state"] = encoder_outputs.last_hidden_state.repeat_interleave( encoder_outputs["last_hidden_state"] = encoder_outputs.last_hidden_state.repeat_interleave(
num_interleave, dim=0 num_interleave, dim=0
) )
...@@ -344,6 +344,7 @@ class GenerationTesterMixin: ...@@ -344,6 +344,7 @@ class GenerationTesterMixin:
def test_beam_sample_generate(self): def test_beam_sample_generate(self):
for model_class in self.all_generative_model_classes: for model_class in self.all_generative_model_classes:
config, input_ids, attention_mask, max_length = self._get_input_ids_and_config() config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
print("Return dict", config.return_dict)
logits_warper_kwargs, logits_warper = self._get_warper_and_kwargs(num_beams=1) logits_warper_kwargs, logits_warper = self._get_warper_and_kwargs(num_beams=1)
model = model_class(config).to(torch_device) model = model_class(config).to(torch_device)
......
...@@ -102,7 +102,6 @@ class AlbertModelTester: ...@@ -102,7 +102,6 @@ class AlbertModelTester:
type_vocab_size=self.type_vocab_size, type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range, initializer_range=self.initializer_range,
num_hidden_groups=self.num_hidden_groups, num_hidden_groups=self.num_hidden_groups,
return_dict=True,
) )
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
......
...@@ -259,7 +259,6 @@ class BartHeadTests(unittest.TestCase): ...@@ -259,7 +259,6 @@ class BartHeadTests(unittest.TestCase):
eos_token_id=2, eos_token_id=2,
pad_token_id=1, pad_token_id=1,
bos_token_id=0, bos_token_id=0,
return_dict=True,
) )
return config, input_ids, batch_size return config, input_ids, batch_size
...@@ -310,7 +309,6 @@ class BartHeadTests(unittest.TestCase): ...@@ -310,7 +309,6 @@ class BartHeadTests(unittest.TestCase):
encoder_ffn_dim=8, encoder_ffn_dim=8,
decoder_ffn_dim=8, decoder_ffn_dim=8,
max_position_embeddings=48, max_position_embeddings=48,
return_dict=True,
) )
lm_model = BartForConditionalGeneration(config).to(torch_device) lm_model = BartForConditionalGeneration(config).to(torch_device)
context = torch.Tensor([[71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 2, 1]]).long().to(torch_device) context = torch.Tensor([[71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 2, 1]]).long().to(torch_device)
...@@ -713,6 +711,6 @@ class FastIntegrationTests(unittest.TestCase): ...@@ -713,6 +711,6 @@ class FastIntegrationTests(unittest.TestCase):
padding="longest", padding="longest",
truncation=True, truncation=True,
) )
features = self.xsum_1_1_model.get_encoder()(**batch, return_dict=True).last_hidden_state features = self.xsum_1_1_model.get_encoder()(**batch).last_hidden_state
expected = [[-0.0828, -0.0251, -0.0674], [0.1277, 0.3311, -0.0255], [0.2613, -0.0840, -0.2763]] expected = [[-0.0828, -0.0251, -0.0674], [0.1277, 0.3311, -0.0255], [0.2613, -0.0840, -0.2763]]
assert_tensors_close(features[0, :3, :3], torch.tensor(expected), atol=1e-3) assert_tensors_close(features[0, :3, :3], torch.tensor(expected), atol=1e-3)
...@@ -124,7 +124,6 @@ class BertModelTester: ...@@ -124,7 +124,6 @@ class BertModelTester:
type_vocab_size=self.type_vocab_size, type_vocab_size=self.type_vocab_size,
is_decoder=False, is_decoder=False,
initializer_range=self.initializer_range, initializer_range=self.initializer_range,
return_dict=True,
) )
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
......
...@@ -89,7 +89,6 @@ class BertGenerationEncoderTester: ...@@ -89,7 +89,6 @@ class BertGenerationEncoderTester:
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
is_decoder=False, is_decoder=False,
initializer_range=self.initializer_range, initializer_range=self.initializer_range,
return_dict=True,
) )
return config, input_ids, input_mask, token_labels return config, input_ids, input_mask, token_labels
......
...@@ -31,7 +31,7 @@ if is_torch_available(): ...@@ -31,7 +31,7 @@ if is_torch_available():
class CamembertModelIntegrationTest(unittest.TestCase): class CamembertModelIntegrationTest(unittest.TestCase):
@slow @slow
def test_output_embeds_base_model(self): def test_output_embeds_base_model(self):
model = CamembertModel.from_pretrained("camembert-base", return_dict=True) model = CamembertModel.from_pretrained("camembert-base")
model.to(torch_device) model.to(torch_device)
input_ids = torch.tensor( input_ids = torch.tensor(
......
...@@ -657,7 +657,7 @@ class ModelTesterMixin: ...@@ -657,7 +657,7 @@ class ModelTesterMixin:
model.eval() model.eval()
with torch.no_grad(): with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class), return_dict=True) outputs = model(**self._prepare_for_class(inputs_dict, model_class))
hidden_states = outputs["hidden_states"] if "hidden_states" in outputs else outputs[-1] hidden_states = outputs["hidden_states"] if "hidden_states" in outputs else outputs[-1]
expected_num_layers = getattr( expected_num_layers = getattr(
......
...@@ -94,7 +94,6 @@ class CTRLModelTester: ...@@ -94,7 +94,6 @@ class CTRLModelTester:
n_ctx=self.max_position_embeddings, n_ctx=self.max_position_embeddings,
# type_vocab_size=self.type_vocab_size, # type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range, # initializer_range=self.initializer_range,
return_dict=True,
) )
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
......
...@@ -148,7 +148,7 @@ class DebertaModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -148,7 +148,7 @@ class DebertaModelTest(ModelTesterMixin, unittest.TestCase):
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def check_loss_output(self, result): def check_loss_output(self, result):
self.parent.assertListEqual(list(result["loss"].size()), []) self.parent.assertListEqual(list(result.loss.size()), [])
def create_and_check_deberta_model( def create_and_check_deberta_model(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
...@@ -160,11 +160,8 @@ class DebertaModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -160,11 +160,8 @@ class DebertaModelTest(ModelTesterMixin, unittest.TestCase):
sequence_output = model(input_ids, token_type_ids=token_type_ids)[0] sequence_output = model(input_ids, token_type_ids=token_type_ids)[0]
sequence_output = model(input_ids)[0] sequence_output = model(input_ids)[0]
result = {
"sequence_output": sequence_output,
}
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size] list(sequence_output.size()), [self.batch_size, self.seq_length, self.hidden_size]
) )
def create_and_check_deberta_for_sequence_classification( def create_and_check_deberta_for_sequence_classification(
...@@ -174,14 +171,8 @@ class DebertaModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -174,14 +171,8 @@ class DebertaModelTest(ModelTesterMixin, unittest.TestCase):
model = DebertaForSequenceClassification(config) model = DebertaForSequenceClassification(config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, logits = model( result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels)
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels self.parent.assertListEqual(list(result.logits.size()), [self.batch_size, self.num_labels])
)
result = {
"loss": loss,
"logits": logits,
}
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_labels])
self.check_loss_output(result) self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
......
...@@ -110,7 +110,6 @@ if is_torch_available(): ...@@ -110,7 +110,6 @@ if is_torch_available():
attention_dropout=self.attention_probs_dropout_prob, attention_dropout=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
initializer_range=self.initializer_range, initializer_range=self.initializer_range,
return_dict=True,
) )
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
......
...@@ -117,7 +117,6 @@ class DPRModelTester: ...@@ -117,7 +117,6 @@ class DPRModelTester:
type_vocab_size=self.type_vocab_size, type_vocab_size=self.type_vocab_size,
is_decoder=False, is_decoder=False,
initializer_range=self.initializer_range, initializer_range=self.initializer_range,
return_dict=True,
) )
config = DPRConfig(projection_dim=self.projection_dim, **config.to_dict()) config = DPRConfig(projection_dim=self.projection_dim, **config.to_dict())
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment