Unverified Commit 1073a2bd authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Switch `return_dict` to `True` by default. (#8530)

* Use the CI to identify failing tests

* Remove from all examples and tests

* More default switch

* Fixes

* More test fixes

* More fixes

* Last fixes hopefully

* Use the CI to identify failing tests

* Remove from all examples and tests

* More default switch

* Fixes

* More test fixes

* More fixes

* Last fixes hopefully

* Run on the real suite

* Fix slow tests
parent 0d0a0785
...@@ -329,7 +329,7 @@ class BartEncoder(nn.Module): ...@@ -329,7 +329,7 @@ class BartEncoder(nn.Module):
self.layer_norm = LayerNorm(config.d_model) if config.add_final_layer_norm else None self.layer_norm = LayerNorm(config.d_model) if config.add_final_layer_norm else None
def forward( def forward(
self, input_ids, attention_mask=None, output_attentions=False, output_hidden_states=False, return_dict=False self, input_ids, attention_mask=None, output_attentions=False, output_hidden_states=False, return_dict=True
): ):
""" """
Args: Args:
...@@ -528,7 +528,7 @@ class BartDecoder(nn.Module): ...@@ -528,7 +528,7 @@ class BartDecoder(nn.Module):
use_cache=False, use_cache=False,
output_attentions=False, output_attentions=False,
output_hidden_states=False, output_hidden_states=False,
return_dict=False, return_dict=True,
**unused, **unused,
): ):
""" """
......
...@@ -446,7 +446,7 @@ class BertEncoder(nn.Module): ...@@ -446,7 +446,7 @@ class BertEncoder(nn.Module):
encoder_attention_mask=None, encoder_attention_mask=None,
output_attentions=False, output_attentions=False,
output_hidden_states=False, output_hidden_states=False,
return_dict=False, return_dict=True,
): ):
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None all_self_attentions = () if output_attentions else None
...@@ -920,7 +920,7 @@ class BertForPreTraining(BertPreTrainedModel): ...@@ -920,7 +920,7 @@ class BertForPreTraining(BertPreTrainedModel):
>>> import torch >>> import torch
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = BertForPreTraining.from_pretrained('bert-base-uncased', return_dict=True) >>> model = BertForPreTraining.from_pretrained('bert-base-uncased')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
...@@ -1036,7 +1036,7 @@ class BertLMHeadModel(BertPreTrainedModel): ...@@ -1036,7 +1036,7 @@ class BertLMHeadModel(BertPreTrainedModel):
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased') >>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
>>> config = BertConfig.from_pretrained("bert-base-cased") >>> config = BertConfig.from_pretrained("bert-base-cased")
>>> config.is_decoder = True >>> config.is_decoder = True
>>> model = BertLMHeadModel.from_pretrained('bert-base-cased', config=config, return_dict=True) >>> model = BertLMHeadModel.from_pretrained('bert-base-cased', config=config)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
...@@ -1250,7 +1250,7 @@ class BertForNextSentencePrediction(BertPreTrainedModel): ...@@ -1250,7 +1250,7 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
>>> import torch >>> import torch
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased', return_dict=True) >>> model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced." >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light." >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
......
...@@ -463,7 +463,7 @@ class BertGenerationDecoder(BertGenerationPreTrainedModel): ...@@ -463,7 +463,7 @@ class BertGenerationDecoder(BertGenerationPreTrainedModel):
>>> tokenizer = BertGenerationTokenizer.from_pretrained('google/bert_for_seq_generation_L-24_bbc_encoder') >>> tokenizer = BertGenerationTokenizer.from_pretrained('google/bert_for_seq_generation_L-24_bbc_encoder')
>>> config = BertGenerationConfig.from_pretrained("google/bert_for_seq_generation_L-24_bbc_encoder") >>> config = BertGenerationConfig.from_pretrained("google/bert_for_seq_generation_L-24_bbc_encoder")
>>> config.is_decoder = True >>> config.is_decoder = True
>>> model = BertGenerationDecoder.from_pretrained('google/bert_for_seq_generation_L-24_bbc_encoder', config=config, return_dict=True) >>> model = BertGenerationDecoder.from_pretrained('google/bert_for_seq_generation_L-24_bbc_encoder', config=config)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
......
...@@ -384,7 +384,7 @@ class DebertaEncoder(nn.Module): ...@@ -384,7 +384,7 @@ class DebertaEncoder(nn.Module):
output_attentions=False, output_attentions=False,
query_states=None, query_states=None,
relative_pos=None, relative_pos=None,
return_dict=False, return_dict=True,
): ):
attention_mask = self.get_attention_mask(attention_mask) attention_mask = self.get_attention_mask(attention_mask)
relative_pos = self.get_rel_pos(hidden_states, query_states, relative_pos) relative_pos = self.get_rel_pos(hidden_states, query_states, relative_pos)
......
...@@ -885,7 +885,7 @@ class DistilBertForMultipleChoice(DistilBertPreTrainedModel): ...@@ -885,7 +885,7 @@ class DistilBertForMultipleChoice(DistilBertPreTrainedModel):
>>> import torch >>> import torch
>>> tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased') >>> tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
>>> model = DistilBertForMultipleChoice.from_pretrained('distilbert-base-cased', return_dict=True) >>> model = DistilBertForMultipleChoice.from_pretrained('distilbert-base-cased')
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced." >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> choice0 = "It is eaten with a fork and a knife." >>> choice0 = "It is eaten with a fork and a knife."
......
...@@ -455,7 +455,7 @@ class DPRContextEncoder(DPRPretrainedContextEncoder): ...@@ -455,7 +455,7 @@ class DPRContextEncoder(DPRPretrainedContextEncoder):
>>> from transformers import DPRContextEncoder, DPRContextEncoderTokenizer >>> from transformers import DPRContextEncoder, DPRContextEncoderTokenizer
>>> tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base') >>> tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
>>> model = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base', return_dict=True) >>> model = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"] >>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
>>> embeddings = model(input_ids).pooler_output >>> embeddings = model(input_ids).pooler_output
""" """
...@@ -533,7 +533,7 @@ class DPRQuestionEncoder(DPRPretrainedQuestionEncoder): ...@@ -533,7 +533,7 @@ class DPRQuestionEncoder(DPRPretrainedQuestionEncoder):
>>> from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer >>> from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer
>>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base') >>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
>>> model = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base', return_dict=True) >>> model = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"] >>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
>>> embeddings = model(input_ids).pooler_output >>> embeddings = model(input_ids).pooler_output
""" """
...@@ -609,7 +609,7 @@ class DPRReader(DPRPretrainedReader): ...@@ -609,7 +609,7 @@ class DPRReader(DPRPretrainedReader):
>>> from transformers import DPRReader, DPRReaderTokenizer >>> from transformers import DPRReader, DPRReaderTokenizer
>>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base') >>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base')
>>> model = DPRReader.from_pretrained('facebook/dpr-reader-single-nq-base', return_dict=True) >>> model = DPRReader.from_pretrained('facebook/dpr-reader-single-nq-base')
>>> encoded_inputs = tokenizer( >>> encoded_inputs = tokenizer(
... questions=["What is love ?"], ... questions=["What is love ?"],
... titles=["Haddaway"], ... titles=["Haddaway"],
......
...@@ -442,7 +442,7 @@ class ElectraEncoder(nn.Module): ...@@ -442,7 +442,7 @@ class ElectraEncoder(nn.Module):
encoder_attention_mask=None, encoder_attention_mask=None,
output_attentions=False, output_attentions=False,
output_hidden_states=False, output_hidden_states=False,
return_dict=False, return_dict=True,
): ):
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None all_self_attentions = () if output_attentions else None
......
...@@ -370,7 +370,7 @@ class EncoderDecoderModel(PreTrainedModel): ...@@ -370,7 +370,7 @@ class EncoderDecoderModel(PreTrainedModel):
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids) >>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids)
>>> # training >>> # training
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids, return_dict=True) >>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids)
>>> loss, logits = outputs.loss, outputs.logits >>> loss, logits = outputs.loss, outputs.logits
>>> # save and load from pretrained >>> # save and load from pretrained
......
...@@ -434,7 +434,7 @@ class FSMTEncoder(nn.Module): ...@@ -434,7 +434,7 @@ class FSMTEncoder(nn.Module):
) # type: List[EncoderLayer] ) # type: List[EncoderLayer]
def forward( def forward(
self, input_ids, attention_mask=None, output_attentions=False, output_hidden_states=False, return_dict=False self, input_ids, attention_mask=None, output_attentions=False, output_hidden_states=False, return_dict=True
): ):
""" """
Args: Args:
...@@ -617,7 +617,7 @@ class FSMTDecoder(nn.Module): ...@@ -617,7 +617,7 @@ class FSMTDecoder(nn.Module):
use_cache=False, use_cache=False,
output_attentions=False, output_attentions=False,
output_hidden_states=False, output_hidden_states=False,
return_dict=False, return_dict=True,
**unused, **unused,
): ):
""" """
......
...@@ -619,7 +619,7 @@ class FunnelEncoder(nn.Module): ...@@ -619,7 +619,7 @@ class FunnelEncoder(nn.Module):
token_type_ids=None, token_type_ids=None,
output_attentions=False, output_attentions=False,
output_hidden_states=False, output_hidden_states=False,
return_dict=False, return_dict=True,
): ):
# The pooling is not implemented on long tensors, so we convert this mask. # The pooling is not implemented on long tensors, so we convert this mask.
attention_mask = attention_mask.type_as(inputs_embeds) attention_mask = attention_mask.type_as(inputs_embeds)
...@@ -698,7 +698,7 @@ class FunnelDecoder(nn.Module): ...@@ -698,7 +698,7 @@ class FunnelDecoder(nn.Module):
token_type_ids=None, token_type_ids=None,
output_attentions=False, output_attentions=False,
output_hidden_states=False, output_hidden_states=False,
return_dict=False, return_dict=True,
): ):
upsampled_hidden = upsample( upsampled_hidden = upsample(
final_hidden, final_hidden,
...@@ -1111,7 +1111,7 @@ class FunnelForPreTraining(FunnelPreTrainedModel): ...@@ -1111,7 +1111,7 @@ class FunnelForPreTraining(FunnelPreTrainedModel):
>>> import torch >>> import torch
>>> tokenizer = FunnelTokenizer.from_pretrained('funnel-transformer/small') >>> tokenizer = FunnelTokenizer.from_pretrained('funnel-transformer/small')
>>> model = FunnelForPreTraining.from_pretrained('funnel-transformer/small', return_dict=True) >>> model = FunnelForPreTraining.from_pretrained('funnel-transformer/small')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors= "pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors= "pt")
>>> logits = model(**inputs).logits >>> logits = model(**inputs).logits
......
...@@ -911,7 +911,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): ...@@ -911,7 +911,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
>>> from transformers import GPT2Tokenizer, GPT2DoubleHeadsModel >>> from transformers import GPT2Tokenizer, GPT2DoubleHeadsModel
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2') >>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
>>> model = GPT2DoubleHeadsModel.from_pretrained('gpt2, return_dict=True) >>> model = GPT2DoubleHeadsModel.from_pretrained('gpt2')
>>> # Add a [CLS] to the vocabulary (we should train it also!) >>> # Add a [CLS] to the vocabulary (we should train it also!)
>>> num_added_tokens = tokenizer.add_special_tokens({'cls_token': '[CLS]'}) >>> num_added_tokens = tokenizer.add_special_tokens({'cls_token': '[CLS]'})
......
...@@ -376,7 +376,7 @@ class LayoutLMEncoder(nn.Module): ...@@ -376,7 +376,7 @@ class LayoutLMEncoder(nn.Module):
encoder_attention_mask=None, encoder_attention_mask=None,
output_attentions=False, output_attentions=False,
output_hidden_states=False, output_hidden_states=False,
return_dict=False, return_dict=True,
): ):
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None all_self_attentions = () if output_attentions else None
......
...@@ -1050,7 +1050,7 @@ class LongformerEncoder(nn.Module): ...@@ -1050,7 +1050,7 @@ class LongformerEncoder(nn.Module):
attention_mask=None, attention_mask=None,
output_attentions=False, output_attentions=False,
output_hidden_states=False, output_hidden_states=False,
return_dict=False, return_dict=True,
): ):
is_index_masked = attention_mask < 0 is_index_masked = attention_mask < 0
...@@ -1388,7 +1388,7 @@ class LongformerModel(LongformerPreTrainedModel): ...@@ -1388,7 +1388,7 @@ class LongformerModel(LongformerPreTrainedModel):
>>> import torch >>> import torch
>>> from transformers import LongformerModel, LongformerTokenizer >>> from transformers import LongformerModel, LongformerTokenizer
>>> model = LongformerModel.from_pretrained('allenai/longformer-base-4096', return_dict=True) >>> model = LongformerModel.from_pretrained('allenai/longformer-base-4096')
>>> tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096') >>> tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
>>> SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document >>> SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document
...@@ -1526,7 +1526,7 @@ class LongformerForMaskedLM(LongformerPreTrainedModel): ...@@ -1526,7 +1526,7 @@ class LongformerForMaskedLM(LongformerPreTrainedModel):
>>> import torch >>> import torch
>>> from transformers import LongformerForMaskedLM, LongformerTokenizer >>> from transformers import LongformerForMaskedLM, LongformerTokenizer
>>> model = LongformerForMaskedLM.from_pretrained('allenai/longformer-base-4096', return_dict=True) >>> model = LongformerForMaskedLM.from_pretrained('allenai/longformer-base-4096')
>>> tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096') >>> tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
>>> SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document >>> SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000) # long input document
...@@ -1742,7 +1742,7 @@ class LongformerForQuestionAnswering(LongformerPreTrainedModel): ...@@ -1742,7 +1742,7 @@ class LongformerForQuestionAnswering(LongformerPreTrainedModel):
>>> import torch >>> import torch
>>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa") >>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa")
>>> model = LongformerForQuestionAnswering.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa", return_dict=True) >>> model = LongformerForQuestionAnswering.from_pretrained("allenai/longformer-large-4096-finetuned-triviaqa")
>>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" >>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
>>> encoding = tokenizer(question, text, return_tensors="pt") >>> encoding = tokenizer(question, text, return_tensors="pt")
......
...@@ -558,7 +558,7 @@ class MobileBertEncoder(nn.Module): ...@@ -558,7 +558,7 @@ class MobileBertEncoder(nn.Module):
encoder_attention_mask=None, encoder_attention_mask=None,
output_attentions=False, output_attentions=False,
output_hidden_states=False, output_hidden_states=False,
return_dict=False, return_dict=True,
): ):
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None all_attentions = () if output_attentions else None
...@@ -1006,7 +1006,7 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel): ...@@ -1006,7 +1006,7 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel):
>>> import torch >>> import torch
>>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased") >>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
>>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased", return_dict=True) >>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) >>> outputs = model(input_ids)
...@@ -1216,7 +1216,7 @@ class MobileBertForNextSentencePrediction(MobileBertPreTrainedModel): ...@@ -1216,7 +1216,7 @@ class MobileBertForNextSentencePrediction(MobileBertPreTrainedModel):
>>> import torch >>> import torch
>>> tokenizer = MobileBertTokenizer.from_pretrained('google/mobilebert-uncased') >>> tokenizer = MobileBertTokenizer.from_pretrained('google/mobilebert-uncased')
>>> model = MobileBertForNextSentencePrediction.from_pretrained('google/mobilebert-uncased', return_dict=True) >>> model = MobileBertForNextSentencePrediction.from_pretrained('google/mobilebert-uncased')
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced." >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light." >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
......
...@@ -670,7 +670,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): ...@@ -670,7 +670,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
>>> import torch >>> import torch
>>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') >>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
>>> model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt', return_dict=True) >>> model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt')
>>> tokenizer.add_special_tokens({'cls_token': '[CLS]'}) # Add a [CLS] to the vocabulary (we should train it also!) >>> tokenizer.add_special_tokens({'cls_token': '[CLS]'}) # Add a [CLS] to the vocabulary (we should train it also!)
>>> model.resize_token_embeddings(len(tokenizer)) >>> model.resize_token_embeddings(len(tokenizer))
......
...@@ -1180,7 +1180,7 @@ class ProphetNetEncoder(ProphetNetPreTrainedModel): ...@@ -1180,7 +1180,7 @@ class ProphetNetEncoder(ProphetNetPreTrainedModel):
>>> import torch >>> import torch
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased') >>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
>>> model = ProphetNetEncoder.from_pretrained('patrickvonplaten/prophetnet-large-uncased-standalone', return_dict=True) >>> model = ProphetNetEncoder.from_pretrained('patrickvonplaten/prophetnet-large-uncased-standalone')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
...@@ -1319,7 +1319,7 @@ class ProphetNetDecoder(ProphetNetPreTrainedModel): ...@@ -1319,7 +1319,7 @@ class ProphetNetDecoder(ProphetNetPreTrainedModel):
>>> import torch >>> import torch
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased') >>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
>>> model = ProphetNetDecoder.from_pretrained('patrickvonplaten/prophetnet-large-uncased-standalone', add_cross_attention=False, return_dict=True) >>> model = ProphetNetDecoder.from_pretrained('patrickvonplaten/prophetnet-large-uncased-standalone', add_cross_attention=False)
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder." >>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
...@@ -1621,7 +1621,7 @@ class ProphetNetModel(ProphetNetPreTrainedModel): ...@@ -1621,7 +1621,7 @@ class ProphetNetModel(ProphetNetPreTrainedModel):
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1 >>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids, return_dict=True) >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
>>> last_hidden_states = outputs.last_hidden_state # main stream hidden states >>> last_hidden_states = outputs.last_hidden_state # main stream hidden states
>>> last_hidden_states_ngram = outputs.last_hidden_state_ngram # predict hidden states >>> last_hidden_states_ngram = outputs.last_hidden_state_ngram # predict hidden states
...@@ -1731,7 +1731,7 @@ class ProphetNetForConditionalGeneration(ProphetNetPreTrainedModel): ...@@ -1731,7 +1731,7 @@ class ProphetNetForConditionalGeneration(ProphetNetPreTrainedModel):
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1 >>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids, return_dict=True) >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
>>> logits_next_token = outputs.logits # logits to predict next token as usual >>> logits_next_token = outputs.logits # logits to predict next token as usual
>>> logits_ngram_next_tokens = outputs.logits_ngram # logits to predict 2nd, 3rd, ... next tokens >>> logits_ngram_next_tokens = outputs.logits_ngram # logits to predict 2nd, 3rd, ... next tokens
...@@ -1940,10 +1940,10 @@ class ProphetNetForCausalLM(ProphetNetPreTrainedModel): ...@@ -1940,10 +1940,10 @@ class ProphetNetForCausalLM(ProphetNetPreTrainedModel):
>>> import torch >>> import torch
>>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased') >>> tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased')
>>> model = ProphetNetForCausalLM.from_pretrained('patrickvonplaten/prophetnet-decoder-clm-large-uncased', return_dict=True) >>> model = ProphetNetForCausalLM.from_pretrained('patrickvonplaten/prophetnet-decoder-clm-large-uncased')
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder." >>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs, return_dict=True) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
...@@ -1962,7 +1962,7 @@ class ProphetNetForCausalLM(ProphetNetPreTrainedModel): ...@@ -1962,7 +1962,7 @@ class ProphetNetForCausalLM(ProphetNetPreTrainedModel):
... ) ... )
>>> input_ids = tokenizer_enc(ARTICLE, return_tensors="pt").input_ids >>> input_ids = tokenizer_enc(ARTICLE, return_tensors="pt").input_ids
>>> labels = tokenizer_dec("us rejects charges against its ambassador in bolivia", return_tensors="pt").input_ids >>> labels = tokenizer_dec("us rejects charges against its ambassador in bolivia", return_tensors="pt").input_ids
>>> outputs = model(input_ids=input_ids, decoder_input_ids=labels[:, :-1], labels=labels[:, 1:], return_dict=True) >>> outputs = model(input_ids=input_ids, decoder_input_ids=labels[:, :-1], labels=labels[:, 1:])
>>> loss = outputs.loss >>> loss = outputs.loss
""" """
......
...@@ -558,9 +558,7 @@ class RagModel(RagPreTrainedModel): ...@@ -558,9 +558,7 @@ class RagModel(RagPreTrainedModel):
if encoder_outputs is None: if encoder_outputs is None:
if has_to_retrieve: if has_to_retrieve:
question_enc_outputs = self.question_encoder( question_enc_outputs = self.question_encoder(input_ids, attention_mask=attention_mask)
input_ids, attention_mask=attention_mask, return_dict=True
)
question_encoder_last_hidden_state = question_enc_outputs[0] # hidden states of question encoder question_encoder_last_hidden_state = question_enc_outputs[0] # hidden states of question encoder
retriever_outputs = self.retriever( retriever_outputs = self.retriever(
...@@ -620,7 +618,6 @@ class RagModel(RagPreTrainedModel): ...@@ -620,7 +618,6 @@ class RagModel(RagPreTrainedModel):
decoder_attention_mask=decoder_attention_mask, decoder_attention_mask=decoder_attention_mask,
past_key_values=past_key_values, past_key_values=past_key_values,
use_cache=use_cache, use_cache=use_cache,
return_dict=True,
) )
if not has_to_retrieve: if not has_to_retrieve:
...@@ -1364,7 +1361,7 @@ class RagTokenForGeneration(RagPreTrainedModel): ...@@ -1364,7 +1361,7 @@ class RagTokenForGeneration(RagPreTrainedModel):
batch_size = context_input_ids.shape[0] // n_docs batch_size = context_input_ids.shape[0] // n_docs
encoder = self.rag.generator.get_encoder() encoder = self.rag.generator.get_encoder()
encoder_outputs = encoder(input_ids=context_input_ids, attention_mask=context_attention_mask, return_dict=True) encoder_outputs = encoder(input_ids=context_input_ids, attention_mask=context_attention_mask)
input_ids = torch.full( input_ids = torch.full(
(batch_size * num_beams, 1), (batch_size * num_beams, 1),
......
...@@ -390,7 +390,7 @@ class RobertaEncoder(nn.Module): ...@@ -390,7 +390,7 @@ class RobertaEncoder(nn.Module):
encoder_attention_mask=None, encoder_attention_mask=None,
output_attentions=False, output_attentions=False,
output_hidden_states=False, output_hidden_states=False,
return_dict=False, return_dict=True,
): ):
all_hidden_states = () if output_hidden_states else None all_hidden_states = () if output_hidden_states else None
all_self_attentions = () if output_attentions else None all_self_attentions = () if output_attentions else None
...@@ -770,7 +770,7 @@ class RobertaForCausalLM(RobertaPreTrainedModel): ...@@ -770,7 +770,7 @@ class RobertaForCausalLM(RobertaPreTrainedModel):
>>> import torch >>> import torch
>>> tokenizer = RobertaTokenizer.from_pretrained('roberta-base') >>> tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
>>> config = RobertaConfig.from_pretrained("roberta-base", return_dict=True) >>> config = RobertaConfig.from_pretrained("roberta-base")
>>> config.is_decoder = True >>> config.is_decoder = True
>>> model = RobertaForCausalLM.from_pretrained('roberta-base', config=config) >>> model = RobertaForCausalLM.from_pretrained('roberta-base', config=config)
......
...@@ -314,7 +314,7 @@ class SqueezeBertEncoder(nn.Module): ...@@ -314,7 +314,7 @@ class SqueezeBertEncoder(nn.Module):
head_mask=None, head_mask=None,
output_attentions=False, output_attentions=False,
output_hidden_states=False, output_hidden_states=False,
return_dict=False, return_dict=True,
): ):
if head_mask is None: if head_mask is None:
......
...@@ -534,7 +534,7 @@ class T5Block(nn.Module): ...@@ -534,7 +534,7 @@ class T5Block(nn.Module):
past_key_value=None, past_key_value=None,
use_cache=False, use_cache=False,
output_attentions=False, output_attentions=False,
return_dict=False, return_dict=True,
): ):
if past_key_value is not None: if past_key_value is not None:
...@@ -1022,7 +1022,7 @@ class T5Model(T5PreTrainedModel): ...@@ -1022,7 +1022,7 @@ class T5Model(T5PreTrainedModel):
>>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1 >>> input_ids = tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="pt").input_ids # Batch size 1
>>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1 >>> decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids, return_dict=True) >>> outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
""" """
...@@ -1177,7 +1177,7 @@ class T5ForConditionalGeneration(T5PreTrainedModel): ...@@ -1177,7 +1177,7 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
>>> from transformers import T5Tokenizer, T5ForConditionalGeneration >>> from transformers import T5Tokenizer, T5ForConditionalGeneration
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small') >>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = T5ForConditionalGeneration.from_pretrained('t5-small', return_dict=True) >>> model = T5ForConditionalGeneration.from_pretrained('t5-small')
>>> input_ids = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='pt').input_ids >>> input_ids = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='pt').input_ids
>>> labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2> </s>', return_tensors='pt').input_ids >>> labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2> </s>', return_tensors='pt').input_ids
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment