Unverified Commit 1073a2bd authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Switch `return_dict` to `True` by default. (#8530)

* Use the CI to identify failing tests

* Remove from all examples and tests

* More default switch

* Fixes

* More test fixes

* More fixes

* Last fixes hopefully

* Use the CI to identify failing tests

* Remove from all examples and tests

* More default switch

* Fixes

* More test fixes

* More fixes

* Last fixes hopefully

* Run on the real suite

* Fix slow tests
parent 0d0a0785
......@@ -40,7 +40,7 @@ Usage:
labels = tokenizer('This is a short summary', return_tensors="pt").input_ids
# train...
loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels, return_dict=True).loss
loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
loss.backward()
......
......@@ -64,7 +64,7 @@ token. T5 can be trained / fine-tuned both in a supervised and unsupervised fash
input_ids = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='pt').input_ids
labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2>', return_tensors='pt').input_ids
# the forward function automatically creates the correct decoder_input_ids
loss = model(input_ids=input_ids, labels=labels, return_dict=True).loss
loss = model(input_ids=input_ids, labels=labels).loss
- Supervised training
......@@ -77,7 +77,7 @@ token. T5 can be trained / fine-tuned both in a supervised and unsupervised fash
input_ids = tokenizer('translate English to German: The house is wonderful.', return_tensors='pt').input_ids
labels = tokenizer('Das Haus ist wunderbar.', return_tensors='pt').input_ids
# the forward function automatically creates the correct decoder_input_ids
loss = model(input_ids=input_ids, labels=labels, return_dict=True).loss
loss = model(input_ids=input_ids, labels=labels).loss
T5Config
......
......@@ -89,7 +89,7 @@ each other. The process is the following:
>>> import torch
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc")
>>> model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc", return_dict=True)
>>> model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc")
>>> classes = ["not paraphrase", "is paraphrase"]
......@@ -122,7 +122,7 @@ each other. The process is the following:
>>> import tensorflow as tf
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc")
>>> model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc", return_dict=True)
>>> model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc")
>>> classes = ["not paraphrase", "is paraphrase"]
......@@ -211,7 +211,7 @@ Here is an example of question answering using a model and a tokenizer. The proc
>>> import torch
>>> tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
>>> model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad", return_dict=True)
>>> model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
>>> text = r"""
... 🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
......@@ -253,7 +253,7 @@ Here is an example of question answering using a model and a tokenizer. The proc
>>> import tensorflow as tf
>>> tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
>>> model = TFAutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad", return_dict=True)
>>> model = TFAutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
>>> text = r"""
... 🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
......@@ -373,7 +373,7 @@ Here is an example of doing masked language modeling using a model and a tokeniz
>>> import torch
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
>>> model = AutoModelWithLMHead.from_pretrained("distilbert-base-cased", return_dict=True)
>>> model = AutoModelWithLMHead.from_pretrained("distilbert-base-cased")
>>> sequence = f"Distilled models are smaller than the models they mimic. Using them instead of the large versions would help {tokenizer.mask_token} our carbon footprint."
......@@ -389,7 +389,7 @@ Here is an example of doing masked language modeling using a model and a tokeniz
>>> import tensorflow as tf
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
>>> model = TFAutoModelWithLMHead.from_pretrained("distilbert-base-cased", return_dict=True)
>>> model = TFAutoModelWithLMHead.from_pretrained("distilbert-base-cased")
>>> sequence = f"Distilled models are smaller than the models they mimic. Using them instead of the large versions would help {tokenizer.mask_token} our carbon footprint."
......@@ -437,7 +437,7 @@ of tokens.
>>> from torch.nn import functional as F
>>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
>>> model = AutoModelWithLMHead.from_pretrained("gpt2", return_dict=True)
>>> model = AutoModelWithLMHead.from_pretrained("gpt2")
>>> sequence = f"Hugging Face is based in DUMBO, New York City, and "
......@@ -461,7 +461,7 @@ of tokens.
>>> import tensorflow as tf
>>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
>>> model = TFAutoModelWithLMHead.from_pretrained("gpt2", return_dict=True)
>>> model = TFAutoModelWithLMHead.from_pretrained("gpt2")
>>> sequence = f"Hugging Face is based in DUMBO, New York City, and "
......@@ -520,7 +520,7 @@ Here is an example of text generation using ``XLNet`` and its tokenizer.
>>> ## PYTORCH CODE
>>> from transformers import AutoModelWithLMHead, AutoTokenizer
>>> model = AutoModelWithLMHead.from_pretrained("xlnet-base-cased", return_dict=True)
>>> model = AutoModelWithLMHead.from_pretrained("xlnet-base-cased")
>>> tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")
>>> # Padding text helps XLNet with short prompts - proposed by Aman Rusia in https://github.com/rusiaaman/XLNet-gen#methodology
......@@ -545,7 +545,7 @@ Here is an example of text generation using ``XLNet`` and its tokenizer.
>>> ## TENSORFLOW CODE
>>> from transformers import TFAutoModelWithLMHead, AutoTokenizer
>>> model = TFAutoModelWithLMHead.from_pretrained("xlnet-base-cased", return_dict=True)
>>> model = TFAutoModelWithLMHead.from_pretrained("xlnet-base-cased")
>>> tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")
>>> # Padding text helps XLNet with short prompts - proposed by Aman Rusia in https://github.com/rusiaaman/XLNet-gen#methodology
......@@ -664,7 +664,7 @@ Here is an example of doing named entity recognition, using a model and a tokeni
>>> from transformers import AutoModelForTokenClassification, AutoTokenizer
>>> import torch
>>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english", return_dict=True)
>>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
>>> label_list = [
......@@ -692,7 +692,7 @@ Here is an example of doing named entity recognition, using a model and a tokeni
>>> from transformers import TFAutoModelForTokenClassification, AutoTokenizer
>>> import tensorflow as tf
>>> model = TFAutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english", return_dict=True)
>>> model = TFAutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
>>> label_list = [
......@@ -790,7 +790,7 @@ CNN / Daily Mail), it yields very good results.
>>> ## PYTORCH CODE
>>> from transformers import AutoModelWithLMHead, AutoTokenizer
>>> model = AutoModelWithLMHead.from_pretrained("t5-base", return_dict=True)
>>> model = AutoModelWithLMHead.from_pretrained("t5-base")
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
>>> # T5 uses a max_length of 512 so we cut the article to 512 tokens.
......@@ -799,7 +799,7 @@ CNN / Daily Mail), it yields very good results.
>>> ## TENSORFLOW CODE
>>> from transformers import TFAutoModelWithLMHead, AutoTokenizer
>>> model = TFAutoModelWithLMHead.from_pretrained("t5-base", return_dict=True)
>>> model = TFAutoModelWithLMHead.from_pretrained("t5-base")
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
>>> # T5 uses a max_length of 512 so we cut the article to 512 tokens.
......@@ -843,7 +843,7 @@ Here is an example of doing translation using a model and a tokenizer. The proce
>>> ## PYTORCH CODE
>>> from transformers import AutoModelWithLMHead, AutoTokenizer
>>> model = AutoModelWithLMHead.from_pretrained("t5-base", return_dict=True)
>>> model = AutoModelWithLMHead.from_pretrained("t5-base")
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
>>> inputs = tokenizer.encode("translate English to German: Hugging Face is a technology company based in New York and Paris", return_tensors="pt")
......@@ -851,7 +851,7 @@ Here is an example of doing translation using a model and a tokenizer. The proce
>>> ## TENSORFLOW CODE
>>> from transformers import TFAutoModelWithLMHead, AutoTokenizer
>>> model = TFAutoModelWithLMHead.from_pretrained("t5-base", return_dict=True)
>>> model = TFAutoModelWithLMHead.from_pretrained("t5-base")
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
>>> inputs = tokenizer.encode("translate English to German: Hugging Face is a technology company based in New York and Paris", return_tensors="tf")
......
......@@ -39,7 +39,7 @@ head on top of the encoder with an output size of 2. Models are initialized in `
.. code-block:: python
from transformers import BertForSequenceClassification
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True)
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
model.train()
This is useful because it allows us to make use of the pre-trained BERT encoder and easily train it on whatever
......
......@@ -210,7 +210,6 @@
" visual_feats=features,\n",
" visual_pos=normalized_boxes,\n",
" token_type_ids=inputs.token_type_ids,\n",
" return_dict=True,\n",
" output_attentions=False,\n",
" )\n",
" output_vqa = lxmert_vqa(\n",
......@@ -219,7 +218,6 @@
" visual_feats=features,\n",
" visual_pos=normalized_boxes,\n",
" token_type_ids=inputs.token_type_ids,\n",
" return_dict=True,\n",
" output_attentions=False,\n",
" )\n",
" # get prediction\n",
......@@ -266,4 +264,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
\ No newline at end of file
......@@ -321,7 +321,7 @@ def evaluate(args, model, tokenizer, prefix=""):
eval_feature = features[feature_index.item()]
unique_id = int(eval_feature.unique_id)
output = [to_list(output[i]) for output in outputs]
output = [to_list(output[i]) for output in outputs.to_tuple()]
# Some models (XLNet, XLM) use 5 arguments for their predictions, while the other "simpler"
# models only use two.
......
......@@ -95,7 +95,7 @@ def evaluate_batch_retrieval(args, rag_model, questions):
truncation=True,
)["input_ids"].to(args.device)
question_enc_outputs = rag_model.rag.question_encoder(retriever_input_ids, return_dict=True)
question_enc_outputs = rag_model.rag.question_encoder(retriever_input_ids)
question_enc_pool_output = question_enc_outputs.pooler_output
result = rag_model.retriever(
......
......@@ -204,7 +204,6 @@ class GenerativeQAModule(BaseTransformer):
decoder_input_ids=decoder_input_ids,
use_cache=False,
labels=lm_labels,
return_dict=True,
**rag_kwargs,
)
......
......@@ -47,7 +47,7 @@ def embed(documents: dict, ctx_encoder: DPRContextEncoder, ctx_tokenizer: DPRCon
input_ids = ctx_tokenizer(
documents["title"], documents["text"], truncation=True, padding="longest", return_tensors="pt"
)["input_ids"]
embeddings = ctx_encoder(input_ids.to(device=device), return_dict=True).pooler_output
embeddings = ctx_encoder(input_ids.to(device=device)).pooler_output
return {"embeddings": embeddings.detach().cpu().numpy()}
......
......@@ -153,7 +153,6 @@ class SummarizationDistiller(SummarizationModule):
output_hidden_states=self.do_calc_hidden_loss,
output_attentions=False,
use_cache=False,
return_dict=True,
)
lm_logits = student_outputs.logits
......@@ -179,7 +178,6 @@ class SummarizationDistiller(SummarizationModule):
input_ids,
attention_mask=src_mask,
output_hidden_states=self.do_calc_hidden_loss,
return_dict=True,
)
if self.different_base_models:
teacher_enc_outputs = all_teacher_encoder_outputs.last_hidden_state
......@@ -199,7 +197,6 @@ class SummarizationDistiller(SummarizationModule):
decoder_input_ids=decoder_input_ids,
output_hidden_states=self.do_calc_hidden_loss,
use_cache=False, # since we are not passing labels, never let this default to True
return_dict=True,
)
dec_mask = decoder_input_ids.ne(pad_token_id)
loss_ce = self.calc_ce_loss(dec_mask, lm_logits, teacher_outputs.logits)
......
......@@ -185,7 +185,7 @@ class TestSummarizationDistiller(TestCasePlus):
@require_torch_non_multi_gpu_but_fix_me
def test_loss_fn(self):
model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY, return_dict=True)
model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY)
input_ids, mask = model.dummy_inputs["input_ids"], model.dummy_inputs["attention_mask"]
target_ids = torch.tensor([[0, 4, 8, 2], [0, 8, 2, 1]], dtype=torch.long, device=model.device)
decoder_input_ids = target_ids[:, :-1].contiguous() # Why this line?
......
......@@ -23,7 +23,7 @@ target_str = "us rejects charges against its ambassador in bolivia"
input_ids = tokenizer(input_str, return_tensors="pt").input_ids
labels = tokenizer(target_str, return_tensors="pt").input_ids
loss = model(input_ids, labels=labels, return_dict=True).loss
loss = model(input_ids, labels=labels).loss
```
### Citation
......
......@@ -26,7 +26,7 @@ target_str = "us rejects charges against its ambassador in bolivia"
input_ids = tokenizer(input_str, return_tensors="pt").input_ids
labels = tokenizer(target_str, return_tensors="pt").input_ids
loss = model(input_ids, labels=labels, return_dict=True).loss
loss = model(input_ids, labels=labels).loss
```
Note that since this model is a multi-lingual model it can be fine-tuned on all kinds of other languages.
......
......@@ -45,7 +45,7 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np
tokenizer = AutoTokenizer.from_pretrained('mrm8488/codebert-base-finetuned-detect-insecure-code')
model = AutoModelForSequenceClassification.from_pretrained('mrm8488/codebert-base-finetuned-detect-insecure-code', return_dict=True)
model = AutoModelForSequenceClassification.from_pretrained('mrm8488/codebert-base-finetuned-detect-insecure-code')
inputs = tokenizer("your code here", return_tensors="pt", truncation=True, padding='max_length')
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
......
......@@ -13,7 +13,7 @@ sentences = ["Hello World", "Hallo Welt"]
encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=64, return_tensors='pt')
with torch.no_grad():
model_output = model(**encoded_input, return_dict=True)
model_output = model(**encoded_input)
embeddings = model_output.pooler_output
embeddings = torch.nn.functional.normalize(embeddings)
......
......@@ -59,7 +59,7 @@ print(f"num of params {tiny_model.num_parameters()}")
# Test
batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"])
outputs = tiny_model(**batch, return_dict=True)
outputs = tiny_model(**batch)
print("test output:", len(outputs.logits[0]))
......
......@@ -30,7 +30,7 @@ print(f"num of params {tiny_model.num_parameters()}")
# Test
batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"])
outputs = tiny_model(**batch, return_dict=True)
outputs = tiny_model(**batch)
print("test output:", len(outputs.logits[0]))
......
......@@ -55,7 +55,7 @@ class PretrainedConfig(object):
Whether or not the model should returns all attentions.
use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`):
Whether or not the model should return the last key/values attentions (not used by all models).
return_dict (:obj:`bool`, `optional`, defaults to :obj:`False`):
return_dict (:obj:`bool`, `optional`, defaults to :obj:`True`):
Whether or not the model should return a :class:`~transformers.file_utils.ModelOutput` instead of a plain
tuple.
is_encoder_decoder (:obj:`bool`, `optional`, defaults to :obj:`False`):
......@@ -163,7 +163,7 @@ class PretrainedConfig(object):
def __init__(self, **kwargs):
# Attributes with defaults
self.return_dict = kwargs.pop("return_dict", False)
self.return_dict = kwargs.pop("return_dict", True)
self.output_hidden_states = kwargs.pop("output_hidden_states", False)
self.output_attentions = kwargs.pop("output_attentions", False)
self.use_cache = kwargs.pop("use_cache", True) # Not used by all models
......
......@@ -559,7 +559,7 @@ PT_TOKEN_CLASSIFICATION_SAMPLE = r"""
>>> import torch
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> labels = torch.tensor([1] * inputs["input_ids"].size(1)).unsqueeze(0) # Batch size 1
......@@ -576,7 +576,7 @@ PT_QUESTION_ANSWERING_SAMPLE = r"""
>>> import torch
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
>>> inputs = tokenizer(question, text, return_tensors='pt')
......@@ -596,7 +596,7 @@ PT_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
>>> import torch
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
......@@ -612,7 +612,7 @@ PT_MASKED_LM_SAMPLE = r"""
>>> import torch
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="pt")
>>> labels = tokenizer("The capital of France is Paris.", return_tensors="pt")["input_ids"]
......@@ -629,7 +629,7 @@ PT_BASE_MODEL_SAMPLE = r"""
>>> import torch
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......@@ -644,7 +644,7 @@ PT_MULTIPLE_CHOICE_SAMPLE = r"""
>>> import torch
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> choice0 = "It is eaten with a fork and a knife."
......@@ -666,7 +666,7 @@ PT_CAUSAL_LM_SAMPLE = r"""
>>> from transformers import {tokenizer_class}, {model_class}
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
>>> model = {model_class}.from_pretrained('{checkpoint})
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs, labels=inputs["input_ids"])
......@@ -681,7 +681,7 @@ TF_TOKEN_CLASSIFICATION_SAMPLE = r"""
>>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
>>> input_ids = inputs["input_ids"]
......@@ -699,7 +699,7 @@ TF_QUESTION_ANSWERING_SAMPLE = r"""
>>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
>>> input_dict = tokenizer(question, text, return_tensors='tf')
......@@ -718,7 +718,7 @@ TF_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
>>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
>>> inputs["labels"] = tf.reshape(tf.constant(1), (-1, 1)) # Batch size 1
......@@ -735,7 +735,7 @@ TF_MASKED_LM_SAMPLE = r"""
>>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="tf")
>>> inputs["labels"] = tokenizer("The capital of France is Paris.", return_tensors="tf")["input_ids"]
......@@ -752,7 +752,7 @@ TF_BASE_MODEL_SAMPLE = r"""
>>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
>>> outputs = model(inputs)
......@@ -767,7 +767,7 @@ TF_MULTIPLE_CHOICE_SAMPLE = r"""
>>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> choice0 = "It is eaten with a fork and a knife."
......@@ -788,7 +788,7 @@ TF_CAUSAL_LM_SAMPLE = r"""
>>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
>>> outputs = model(inputs)
......
......@@ -416,7 +416,7 @@ class AlbertTransformer(nn.Module):
head_mask=None,
output_attentions=False,
output_hidden_states=False,
return_dict=False,
return_dict=True,
):
hidden_states = self.embedding_hidden_mapping_in(hidden_states)
......@@ -764,7 +764,7 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
>>> import torch
>>> tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
>>> model = AlbertForPreTraining.from_pretrained('albert-base-v2', return_dict=True)
>>> model = AlbertForPreTraining.from_pretrained('albert-base-v2')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment