Unverified Commit 1073a2bd authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Switch `return_dict` to `True` by default. (#8530)

* Use the CI to identify failing tests

* Remove from all examples and tests

* More default switch

* Fixes

* More test fixes

* More fixes

* Last fixes hopefully

* Use the CI to identify failing tests

* Remove from all examples and tests

* More default switch

* Fixes

* More test fixes

* More fixes

* Last fixes hopefully

* Run on the real suite

* Fix slow tests
parent 0d0a0785
......@@ -329,7 +329,7 @@ class BartEncoder(nn.Module):
self.layer_norm = LayerNorm(config.d_model) if config.add_final_layer_norm else None
def forward(
self, input_ids, attention_mask=None, output_attentions=False, output_hidden_states=False, return_dict=False
self, input_ids, attention_mask=None, output_attentions=False, output_hidden_states=False, return_dict=True
):
"""
Args:
......@@ -528,7 +528,7 @@ class BartDecoder(nn.Module):
use_cache=False,
output_attentions=False,
output_hidden_states=False,
return_dict=False,
return_dict=True,
**unused,
):
"""
......
......@@ -446,7 +446,7 @@ class BertEncoder(nn.Module):
encoder_attention_mask=None,
output_attentions=False,
output_hidden_states=False,
return_dict=False,
return_dict=True,
):
all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None
......@@ -920,7 +920,7 @@ class BertForPreTraining(BertPreTrainedModel):
>>> import torch
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = BertForPreTraining.from_pretrained('bert-base-uncased', return_dict=True)
>>> model = BertForPreTraining.from_pretrained('bert-base-uncased')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......@@ -1036,7 +1036,7 @@ class BertLMHeadModel(BertPreTrainedModel):
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
>>> config = BertConfig.from_pretrained("bert-base-cased")
>>> config.is_decoder = True
>>> model = BertLMHeadModel.from_pretrained('bert-base-cased', config=config, return_dict=True)
>>> model = BertLMHeadModel.from_pretrained('bert-base-cased', config=config)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......@@ -1250,7 +1250,7 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
>>> import torch
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased', return_dict=True)
>>> model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
......
......@@ -463,7 +463,7 @@ class BertGenerationDecoder(BertGenerationPreTrainedModel):
>>> tokenizer = BertGenerationTokenizer.from_pretrained('google/bert_for_seq_generation_L-24_bbc_encoder')
>>> config = BertGenerationConfig.from_pretrained("google/bert_for_seq_generation_L-24_bbc_encoder")
>>> config.is_decoder = True
>>> model = BertGenerationDecoder.from_pretrained('google/bert_for_seq_generation_L-24_bbc_encoder', config=config, return_dict=True)
>>> model = BertGenerationDecoder.from_pretrained('google/bert_for_seq_generation_L-24_bbc_encoder', config=config)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......
......@@ -384,7 +384,7 @@ class DebertaEncoder(nn.Module):
output_attentions=False,
query_states=None,
relative_pos=None,
return_dict=False,
return_dict=True,
):
attention_mask = self.get_attention_mask(attention_mask)
relative_pos = self.get_rel_pos(hidden_states, query_states, relative_pos)
......
......@@ -885,7 +885,7 @@ class DistilBertForMultipleChoice(DistilBertPreTrainedModel):
>>> import torch
>>> tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
>>> model = DistilBertForMultipleChoice.from_pretrained('distilbert-base-cased', return_dict=True)
>>> model = DistilBertForMultipleChoice.from_pretrained('distilbert-base-cased')
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> choice0 = "It is eaten with a fork and a knife."
......
......@@ -455,7 +455,7 @@ class DPRContextEncoder(DPRPretrainedContextEncoder):
>>> from transformers import DPRContextEncoder, DPRContextEncoderTokenizer
>>> tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
>>> model = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', return_dict=True)
>>> model = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
>>> embeddings = model(input_ids).pooler_output
"""
......@@ -533,7 +533,7 @@ class DPRQuestionEncoder(DPRPretrainedQuestionEncoder):
>>> from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer
>>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
>>> model = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base', return_dict=True)
>>> model = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
>>> embeddings = model(input_ids).pooler_output
"""
......@@ -609,7 +609,7 @@ class DPRReader(DPRPretrainedReader):
>>> from transformers import DPRReader, DPRReaderTokenizer
>>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base')
>>> model = DPRReader.from_pretrained('facebook/dpr-reader-single-nq-base', return_dict=True)
>>> model = DPRReader.from_pretrained('facebook/dpr-reader-single-nq-base')
>>> encoded_inputs = tokenizer(
... questions=["What is love ?"],
... titles=["Haddaway"],
......
......@@ -442,7 +442,7 @@ class ElectraEncoder(nn.Module):
encoder_attention_mask=None,
output_attentions=False,
output_hidden_states=False,
return_dict=False,
return_dict=True,
):
all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None
......
......@@ -370,7 +370,7 @@ class EncoderDecoderModel(PreTrainedModel):
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids)
>>> # training
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids, return_dict=True)
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids)
>>> loss, logits = outputs.loss, outputs.logits
>>> # save and load from pretrained
......
......@@ -434,7 +434,7 @@ class FSMTEncoder(nn.Module):
) # type: List[EncoderLayer]
def forward(
self, input_ids, attention_mask=None, output_attentions=False, output_hidden_states=False, return_dict=False
self, input_ids, attention_mask=None, output_attentions=False, output_hidden_states=False, return_dict=True
):
"""
Args:
......@@ -617,7 +617,7 @@ class FSMTDecoder(nn.Module):
use_cache=False,
output_attentions=False,
output_hidden_states=False,
return_dict=False,
return_dict=True,
**unused,
):
"""
......
......@@ -619,7 +619,7 @@ class FunnelEncoder(nn.Module):
token_type_ids=None,
output_attentions=False,
output_hidden_states=False,
return_dict=False,
return_dict=True,
):
# The pooling is not implemented on long tensors, so we convert this mask.
attention_mask = attention_mask.type_as(inputs_embeds)
......@@ -698,7 +698,7 @@ class FunnelDecoder(nn.Module):
token_type_ids=None,
output_attentions=False,
output_hidden_states=False,
return_dict=False,
return_dict=True,
):
upsampled_hidden = upsample(
final_hidden,
......@@ -1111,7 +1111,7 @@ class FunnelForPreTraining(FunnelPreTrainedModel):
>>> import torch
>>> tokenizer = FunnelTokenizer.from_pretrained('funnel-transformer/small')
>>> model = FunnelForPreTraining.from_pretrained('funnel-transformer/small', return_dict=True)
>>> model = FunnelForPreTraining.from_pretrained('funnel-transformer/small')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors= "pt")
>>> logits = model(**inputs).logits
......
......@@ -911,7 +911,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
>>> from transformers import GPT2Tokenizer, GPT2DoubleHeadsModel
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
>>> model = GPT2DoubleHeadsModel.from_pretrained('gpt2, return_dict=True)
>>> model = GPT2DoubleHeadsModel.from_pretrained('gpt2')
>>> # Add a [CLS] to the vocabulary (we should train it also!)
>>> num_added_tokens = tokenizer.add_special_tokens({'cls_token': '[CLS]'})
......
......@@ -376,7 +376,7 @@ class LayoutLMEncoder(nn.Module):
encoder_attention_mask=None,
output_attentions=False,
output_hidden_states=False,
return_dict=False,
return_dict=True,
):
all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None
......
......@@ -1050,7 +1050,7 @@ class LongformerEncoder(nn.Module):
attention_mask=None,
output_attentions=False,
output_hidden_states=False,
return_dict=False,
return_dict=True,
):
is_index_masked = attention_mask < 0
......@@ -1388,7 +1388,7 @@ class LongformerModel(LongformerPreTrainedModel):
>>> import torch
>>> from transformers import LongformerModel, LongformerTokenizer
>>> model = LongformerModel.from_pretrained('allenai/longformer-base-4096', return_dict=True)
>>> model = LongformerModel.from_pretrained('allenai/longformer-base-4096')
>>> tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
>>> SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document
......@@ -1526,7 +1526,7 @@ class LongformerForMaskedLM(LongformerPreTrainedModel):
>>> import torch
>>> from transformers import LongformerForMaskedLM, LongformerTokenizer
>>> model = LongformerForMaskedLM.from_pretrained('allenai/longformer-base-4096', return_dict=True)
>>> model = LongformerForMaskedLM.from_pretrained('allenai/longformer-base-4096')
>>> tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
>>> SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document
......@@ -1742,7 +1742,7 @@ class LongformerForQuestionAnswering(LongformerPreTrainedModel):
>>> import torch
>>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa")
>>> model = LongformerForQuestionAnswering.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa", return_dict=True)
>>> model = LongformerForQuestionAnswering.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa")
>>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
>>> encoding = tokenizer(question, text, return_tensors="pt")
......
......@@ -558,7 +558,7 @@ class MobileBertEncoder(nn.Module):
encoder_attention_mask=None,
output_attentions=False,
output_hidden_states=False,
return_dict=False,
return_dict=True,
):
all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
......@@ -1006,7 +1006,7 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel):
>>> import torch
>>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
>>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased", return_dict=True)
>>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids)
......@@ -1216,7 +1216,7 @@ class MobileBertForNextSentencePrediction(MobileBertPreTrainedModel):
>>> import torch
>>> tokenizer = MobileBertTokenizer.from_pretrained('google/mobilebert-uncased')
>>> model = MobileBertForNextSentencePrediction.from_pretrained('google/mobilebert-uncased', return_dict=True)
>>> model = MobileBertForNextSentencePrediction.from_pretrained('google/mobilebert-uncased')
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
......
......@@ -670,7 +670,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
>>> import torch
>>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
>>> model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt', return_dict=True)
>>> model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt')
>>> tokenizer.add_special_tokens({'cls_token': '[CLS]'}) # Add a [CLS] to the vocabulary (we should train it also!)
>>> model.resize_token_embeddings(len(tokenizer))
......
......@@ -1180,7 +1180,7 @@ class ProphetNetEncoder(ProphetNetPreTrainedModel):
>>> import torch
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
>>> model = ProphetNetEncoder.from_pretrained('patrickvonplaten/prophetnet-large-uncased-standalone', return_dict=True)
>>> model = ProphetNetEncoder.from_pretrained('patrickvonplaten/prophetnet-large-uncased-standalone')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......@@ -1319,7 +1319,7 @@ class ProphetNetDecoder(ProphetNetPreTrainedModel):
>>> import torch
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
>>> model = ProphetNetDecoder.from_pretrained('patrickvonplaten/prophetnet-large-uncased-standalone', add_cross_attention=False, return_dict=True)
>>> model = ProphetNetDecoder.from_pretrained('patrickvonplaten/prophetnet-large-uncased-standalone', add_cross_attention=False)
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......@@ -1621,7 +1621,7 @@ class ProphetNetModel(ProphetNetPreTrainedModel):
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids, return_dict=True)
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
>>> last_hidden_states = outputs.last_hidden_state # main stream hidden states
>>> last_hidden_states_ngram = outputs.last_hidden_state_ngram # predict hidden states
......@@ -1731,7 +1731,7 @@ class ProphetNetForConditionalGeneration(ProphetNetPreTrainedModel):
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids, return_dict=True)
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
>>> logits_next_token = outputs.logits # logits to predict next token as usual
>>> logits_ngram_next_tokens = outputs.logits_ngram # logits to predict 2nd, 3rd, ... next tokens
......@@ -1940,10 +1940,10 @@ class ProphetNetForCausalLM(ProphetNetPreTrainedModel):
>>> import torch
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
>>> model = ProphetNetForCausalLM.from_pretrained('patrickvonplaten/prophetnet-decoder-clm-large-uncased', return_dict=True)
>>> model = ProphetNetForCausalLM.from_pretrained('patrickvonplaten/prophetnet-decoder-clm-large-uncased')
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs, return_dict=True)
>>> outputs = model(**inputs)
>>> logits = outputs.logits
......@@ -1962,7 +1962,7 @@ class ProphetNetForCausalLM(ProphetNetPreTrainedModel):
... )
>>> input_ids = tokenizer_enc(ARTICLE, return_tensors="pt").input_ids
>>> labels = tokenizer_dec("us rejects charges against its ambassador in bolivia", return_tensors="pt").input_ids
>>> outputs = model(input_ids=input_ids, decoder_input_ids=labels[:, :-1], labels=labels[:, 1:], return_dict=True)
>>> outputs = model(input_ids=input_ids, decoder_input_ids=labels[:, :-1], labels=labels[:, 1:])
>>> loss = outputs.loss
"""
......
......@@ -558,9 +558,7 @@ class RagModel(RagPreTrainedModel):
if encoder_outputs is None:
if has_to_retrieve:
question_enc_outputs = self.question_encoder(
input_ids, attention_mask=attention_mask, return_dict=True
)
question_enc_outputs = self.question_encoder(input_ids, attention_mask=attention_mask)
question_encoder_last_hidden_state = question_enc_outputs[0] # hidden states of question encoder
retriever_outputs = self.retriever(
......@@ -620,7 +618,6 @@ class RagModel(RagPreTrainedModel):
decoder_attention_mask=decoder_attention_mask,
past_key_values=past_key_values,
use_cache=use_cache,
return_dict=True,
)
if not has_to_retrieve:
......@@ -1364,7 +1361,7 @@ class RagTokenForGeneration(RagPreTrainedModel):
batch_size = context_input_ids.shape[0] // n_docs
encoder = self.rag.generator.get_encoder()
encoder_outputs = encoder(input_ids=context_input_ids, attention_mask=context_attention_mask, return_dict=True)
encoder_outputs = encoder(input_ids=context_input_ids, attention_mask=context_attention_mask)
input_ids = torch.full(
(batch_size * num_beams, 1),
......
......@@ -390,7 +390,7 @@ class RobertaEncoder(nn.Module):
encoder_attention_mask=None,
output_attentions=False,
output_hidden_states=False,
return_dict=False,
return_dict=True,
):
all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None
......@@ -770,7 +770,7 @@ class RobertaForCausalLM(RobertaPreTrainedModel):
>>> import torch
>>> tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
>>> config = RobertaConfig.from_pretrained("roberta-base", return_dict=True)
>>> config = RobertaConfig.from_pretrained("roberta-base")
>>> config.is_decoder = True
>>> model = RobertaForCausalLM.from_pretrained('roberta-base', config=config)
......
......@@ -314,7 +314,7 @@ class SqueezeBertEncoder(nn.Module):
head_mask=None,
output_attentions=False,
output_hidden_states=False,
return_dict=False,
return_dict=True,
):
if head_mask is None:
......
......@@ -534,7 +534,7 @@ class T5Block(nn.Module):
past_key_value=None,
use_cache=False,
output_attentions=False,
return_dict=False,
return_dict=True,
):
if past_key_value is not None:
......@@ -1022,7 +1022,7 @@ class T5Model(T5PreTrainedModel):
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids, return_dict=True)
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
>>> last_hidden_states = outputs.last_hidden_state
"""
......@@ -1177,7 +1177,7 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
>>> from transformers import T5Tokenizer, T5ForConditionalGeneration
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = T5ForConditionalGeneration.from_pretrained('t5-small', return_dict=True)
>>> model = T5ForConditionalGeneration.from_pretrained('t5-small')
>>> input_ids = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='pt').input_ids
>>> labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2> </s>', return_tensors='pt').input_ids
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment