Unverified Commit 1073a2bd authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Switch `return_dict` to `True` by default. (#8530)

* Use the CI to identify failing tests

* Remove from all examples and tests

* More default switch

* Fixes

* More test fixes

* More fixes

* Last fixes hopefully

* Use the CI to identify failing tests

* Remove from all examples and tests

* More default switch

* Fixes

* More test fixes

* More fixes

* Last fixes hopefully

* Run on the real suite

* Fix slow tests
parent 0d0a0785
...@@ -40,7 +40,7 @@ Usage: ...@@ -40,7 +40,7 @@ Usage:
labels = tokenizer('This is a short summary', return_tensors="pt").input_ids labels = tokenizer('This is a short summary', return_tensors="pt").input_ids
# train... # train...
loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels, return_dict=True).loss loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
loss.backward() loss.backward()
......
...@@ -64,7 +64,7 @@ token. T5 can be trained / fine-tuned both in a supervised and unsupervised fash ...@@ -64,7 +64,7 @@ token. T5 can be trained / fine-tuned both in a supervised and unsupervised fash
input_ids = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='pt').input_ids input_ids = tokenizer('The <extra_id_0> walks in <extra_id_1> park', return_tensors='pt').input_ids
labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2>', return_tensors='pt').input_ids labels = tokenizer('<extra_id_0> cute dog <extra_id_1> the <extra_id_2>', return_tensors='pt').input_ids
# the forward function automatically creates the correct decoder_input_ids # the forward function automatically creates the correct decoder_input_ids
loss = model(input_ids=input_ids, labels=labels, return_dict=True).loss loss = model(input_ids=input_ids, labels=labels).loss
- Supervised training - Supervised training
...@@ -77,7 +77,7 @@ token. T5 can be trained / fine-tuned both in a supervised and unsupervised fash ...@@ -77,7 +77,7 @@ token. T5 can be trained / fine-tuned both in a supervised and unsupervised fash
input_ids = tokenizer('translate English to German: The house is wonderful.', return_tensors='pt').input_ids input_ids = tokenizer('translate English to German: The house is wonderful.', return_tensors='pt').input_ids
labels = tokenizer('Das Haus ist wunderbar.', return_tensors='pt').input_ids labels = tokenizer('Das Haus ist wunderbar.', return_tensors='pt').input_ids
# the forward function automatically creates the correct decoder_input_ids # the forward function automatically creates the correct decoder_input_ids
loss = model(input_ids=input_ids, labels=labels, return_dict=True).loss loss = model(input_ids=input_ids, labels=labels).loss
T5Config T5Config
......
...@@ -89,7 +89,7 @@ each other. The process is the following: ...@@ -89,7 +89,7 @@ each other. The process is the following:
>>> import torch >>> import torch
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc") >>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc")
>>> model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc", return_dict=True) >>> model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc")
>>> classes = ["not paraphrase", "is paraphrase"] >>> classes = ["not paraphrase", "is paraphrase"]
...@@ -122,7 +122,7 @@ each other. The process is the following: ...@@ -122,7 +122,7 @@ each other. The process is the following:
>>> import tensorflow as tf >>> import tensorflow as tf
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc") >>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc")
>>> model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc", return_dict=True) >>> model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc")
>>> classes = ["not paraphrase", "is paraphrase"] >>> classes = ["not paraphrase", "is paraphrase"]
...@@ -211,7 +211,7 @@ Here is an example of question answering using a model and a tokenizer. The proc ...@@ -211,7 +211,7 @@ Here is an example of question answering using a model and a tokenizer. The proc
>>> import torch >>> import torch
>>> tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad") >>> tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
>>> model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad", return_dict=True) >>> model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
>>> text = r""" >>> text = r"""
... 🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose ... 🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
...@@ -253,7 +253,7 @@ Here is an example of question answering using a model and a tokenizer. The proc ...@@ -253,7 +253,7 @@ Here is an example of question answering using a model and a tokenizer. The proc
>>> import tensorflow as tf >>> import tensorflow as tf
>>> tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad") >>> tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
>>> model = TFAutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad", return_dict=True) >>> model = TFAutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
>>> text = r""" >>> text = r"""
... 🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose ... 🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
...@@ -373,7 +373,7 @@ Here is an example of doing masked language modeling using a model and a tokeniz ...@@ -373,7 +373,7 @@ Here is an example of doing masked language modeling using a model and a tokeniz
>>> import torch >>> import torch
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased") >>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
>>> model = AutoModelWithLMHead.from_pretrained("distilbert-base-cased", return_dict=True) >>> model = AutoModelWithLMHead.from_pretrained("distilbert-base-cased")
>>> sequence = f"Distilled models are smaller than the models they mimic. Using them instead of the large versions would help {tokenizer.mask_token} our carbon footprint." >>> sequence = f"Distilled models are smaller than the models they mimic. Using them instead of the large versions would help {tokenizer.mask_token} our carbon footprint."
...@@ -389,7 +389,7 @@ Here is an example of doing masked language modeling using a model and a tokeniz ...@@ -389,7 +389,7 @@ Here is an example of doing masked language modeling using a model and a tokeniz
>>> import tensorflow as tf >>> import tensorflow as tf
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased") >>> tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
>>> model = TFAutoModelWithLMHead.from_pretrained("distilbert-base-cased", return_dict=True) >>> model = TFAutoModelWithLMHead.from_pretrained("distilbert-base-cased")
>>> sequence = f"Distilled models are smaller than the models they mimic. Using them instead of the large versions would help {tokenizer.mask_token} our carbon footprint." >>> sequence = f"Distilled models are smaller than the models they mimic. Using them instead of the large versions would help {tokenizer.mask_token} our carbon footprint."
...@@ -437,7 +437,7 @@ of tokens. ...@@ -437,7 +437,7 @@ of tokens.
>>> from torch.nn import functional as F >>> from torch.nn import functional as F
>>> tokenizer = AutoTokenizer.from_pretrained("gpt2") >>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
>>> model = AutoModelWithLMHead.from_pretrained("gpt2", return_dict=True) >>> model = AutoModelWithLMHead.from_pretrained("gpt2")
>>> sequence = f"Hugging Face is based in DUMBO, New York City, and " >>> sequence = f"Hugging Face is based in DUMBO, New York City, and "
...@@ -461,7 +461,7 @@ of tokens. ...@@ -461,7 +461,7 @@ of tokens.
>>> import tensorflow as tf >>> import tensorflow as tf
>>> tokenizer = AutoTokenizer.from_pretrained("gpt2") >>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
>>> model = TFAutoModelWithLMHead.from_pretrained("gpt2", return_dict=True) >>> model = TFAutoModelWithLMHead.from_pretrained("gpt2")
>>> sequence = f"Hugging Face is based in DUMBO, New York City, and " >>> sequence = f"Hugging Face is based in DUMBO, New York City, and "
...@@ -520,7 +520,7 @@ Here is an example of text generation using ``XLNet`` and its tokenizer. ...@@ -520,7 +520,7 @@ Here is an example of text generation using ``XLNet`` and its tokenizer.
>>> ## PYTORCH CODE >>> ## PYTORCH CODE
>>> from transformers import AutoModelWithLMHead, AutoTokenizer >>> from transformers import AutoModelWithLMHead, AutoTokenizer
>>> model = AutoModelWithLMHead.from_pretrained("xlnet-base-cased", return_dict=True) >>> model = AutoModelWithLMHead.from_pretrained("xlnet-base-cased")
>>> tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased") >>> tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")
>>> # Padding text helps XLNet with short prompts - proposed by Aman Rusia in https://github.com/rusiaaman/XLNet-gen#methodology >>> # Padding text helps XLNet with short prompts - proposed by Aman Rusia in https://github.com/rusiaaman/XLNet-gen#methodology
...@@ -545,7 +545,7 @@ Here is an example of text generation using ``XLNet`` and its tokenizer. ...@@ -545,7 +545,7 @@ Here is an example of text generation using ``XLNet`` and its tokenizer.
>>> ## TENSORFLOW CODE >>> ## TENSORFLOW CODE
>>> from transformers import TFAutoModelWithLMHead, AutoTokenizer >>> from transformers import TFAutoModelWithLMHead, AutoTokenizer
>>> model = TFAutoModelWithLMHead.from_pretrained("xlnet-base-cased", return_dict=True) >>> model = TFAutoModelWithLMHead.from_pretrained("xlnet-base-cased")
>>> tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased") >>> tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")
>>> # Padding text helps XLNet with short prompts - proposed by Aman Rusia in https://github.com/rusiaaman/XLNet-gen#methodology >>> # Padding text helps XLNet with short prompts - proposed by Aman Rusia in https://github.com/rusiaaman/XLNet-gen#methodology
...@@ -664,7 +664,7 @@ Here is an example of doing named entity recognition, using a model and a tokeni ...@@ -664,7 +664,7 @@ Here is an example of doing named entity recognition, using a model and a tokeni
>>> from transformers import AutoModelForTokenClassification, AutoTokenizer >>> from transformers import AutoModelForTokenClassification, AutoTokenizer
>>> import torch >>> import torch
>>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english", return_dict=True) >>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") >>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
>>> label_list = [ >>> label_list = [
...@@ -692,7 +692,7 @@ Here is an example of doing named entity recognition, using a model and a tokeni ...@@ -692,7 +692,7 @@ Here is an example of doing named entity recognition, using a model and a tokeni
>>> from transformers import TFAutoModelForTokenClassification, AutoTokenizer >>> from transformers import TFAutoModelForTokenClassification, AutoTokenizer
>>> import tensorflow as tf >>> import tensorflow as tf
>>> model = TFAutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english", return_dict=True) >>> model = TFAutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
>>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") >>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
>>> label_list = [ >>> label_list = [
...@@ -790,7 +790,7 @@ CNN / Daily Mail), it yields very good results. ...@@ -790,7 +790,7 @@ CNN / Daily Mail), it yields very good results.
>>> ## PYTORCH CODE >>> ## PYTORCH CODE
>>> from transformers import AutoModelWithLMHead, AutoTokenizer >>> from transformers import AutoModelWithLMHead, AutoTokenizer
>>> model = AutoModelWithLMHead.from_pretrained("t5-base", return_dict=True) >>> model = AutoModelWithLMHead.from_pretrained("t5-base")
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base") >>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
>>> # T5 uses a max_length of 512 so we cut the article to 512 tokens. >>> # T5 uses a max_length of 512 so we cut the article to 512 tokens.
...@@ -799,7 +799,7 @@ CNN / Daily Mail), it yields very good results. ...@@ -799,7 +799,7 @@ CNN / Daily Mail), it yields very good results.
>>> ## TENSORFLOW CODE >>> ## TENSORFLOW CODE
>>> from transformers import TFAutoModelWithLMHead, AutoTokenizer >>> from transformers import TFAutoModelWithLMHead, AutoTokenizer
>>> model = TFAutoModelWithLMHead.from_pretrained("t5-base", return_dict=True) >>> model = TFAutoModelWithLMHead.from_pretrained("t5-base")
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base") >>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
>>> # T5 uses a max_length of 512 so we cut the article to 512 tokens. >>> # T5 uses a max_length of 512 so we cut the article to 512 tokens.
...@@ -843,7 +843,7 @@ Here is an example of doing translation using a model and a tokenizer. The proce ...@@ -843,7 +843,7 @@ Here is an example of doing translation using a model and a tokenizer. The proce
>>> ## PYTORCH CODE >>> ## PYTORCH CODE
>>> from transformers import AutoModelWithLMHead, AutoTokenizer >>> from transformers import AutoModelWithLMHead, AutoTokenizer
>>> model = AutoModelWithLMHead.from_pretrained("t5-base", return_dict=True) >>> model = AutoModelWithLMHead.from_pretrained("t5-base")
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base") >>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
>>> inputs = tokenizer.encode("translate English to German: Hugging Face is a technology company based in New York and Paris", return_tensors="pt") >>> inputs = tokenizer.encode("translate English to German: Hugging Face is a technology company based in New York and Paris", return_tensors="pt")
...@@ -851,7 +851,7 @@ Here is an example of doing translation using a model and a tokenizer. The proce ...@@ -851,7 +851,7 @@ Here is an example of doing translation using a model and a tokenizer. The proce
>>> ## TENSORFLOW CODE >>> ## TENSORFLOW CODE
>>> from transformers import TFAutoModelWithLMHead, AutoTokenizer >>> from transformers import TFAutoModelWithLMHead, AutoTokenizer
>>> model = TFAutoModelWithLMHead.from_pretrained("t5-base", return_dict=True) >>> model = TFAutoModelWithLMHead.from_pretrained("t5-base")
>>> tokenizer = AutoTokenizer.from_pretrained("t5-base") >>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
>>> inputs = tokenizer.encode("translate English to German: Hugging Face is a technology company based in New York and Paris", return_tensors="tf") >>> inputs = tokenizer.encode("translate English to German: Hugging Face is a technology company based in New York and Paris", return_tensors="tf")
......
...@@ -39,7 +39,7 @@ head on top of the encoder with an output size of 2. Models are initialized in ` ...@@ -39,7 +39,7 @@ head on top of the encoder with an output size of 2. Models are initialized in `
.. code-block:: python .. code-block:: python
from transformers import BertForSequenceClassification from transformers import BertForSequenceClassification
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True) model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
model.train() model.train()
This is useful because it allows us to make use of the pre-trained BERT encoder and easily train it on whatever This is useful because it allows us to make use of the pre-trained BERT encoder and easily train it on whatever
......
...@@ -210,7 +210,6 @@ ...@@ -210,7 +210,6 @@
" visual_feats=features,\n", " visual_feats=features,\n",
" visual_pos=normalized_boxes,\n", " visual_pos=normalized_boxes,\n",
" token_type_ids=inputs.token_type_ids,\n", " token_type_ids=inputs.token_type_ids,\n",
" return_dict=True,\n",
" output_attentions=False,\n", " output_attentions=False,\n",
" )\n", " )\n",
" output_vqa = lxmert_vqa(\n", " output_vqa = lxmert_vqa(\n",
...@@ -219,7 +218,6 @@ ...@@ -219,7 +218,6 @@
" visual_feats=features,\n", " visual_feats=features,\n",
" visual_pos=normalized_boxes,\n", " visual_pos=normalized_boxes,\n",
" token_type_ids=inputs.token_type_ids,\n", " token_type_ids=inputs.token_type_ids,\n",
" return_dict=True,\n",
" output_attentions=False,\n", " output_attentions=False,\n",
" )\n", " )\n",
" # get prediction\n", " # get prediction\n",
...@@ -266,4 +264,4 @@ ...@@ -266,4 +264,4 @@
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 4 "nbformat_minor": 4
} }
\ No newline at end of file
...@@ -321,7 +321,7 @@ def evaluate(args, model, tokenizer, prefix=""): ...@@ -321,7 +321,7 @@ def evaluate(args, model, tokenizer, prefix=""):
eval_feature = features[feature_index.item()] eval_feature = features[feature_index.item()]
unique_id = int(eval_feature.unique_id) unique_id = int(eval_feature.unique_id)
output = [to_list(output[i]) for output in outputs] output = [to_list(output[i]) for output in outputs.to_tuple()]
# Some models (XLNet, XLM) use 5 arguments for their predictions, while the other "simpler" # Some models (XLNet, XLM) use 5 arguments for their predictions, while the other "simpler"
# models only use two. # models only use two.
......
...@@ -95,7 +95,7 @@ def evaluate_batch_retrieval(args, rag_model, questions): ...@@ -95,7 +95,7 @@ def evaluate_batch_retrieval(args, rag_model, questions):
truncation=True, truncation=True,
)["input_ids"].to(args.device) )["input_ids"].to(args.device)
question_enc_outputs = rag_model.rag.question_encoder(retriever_input_ids, return_dict=True) question_enc_outputs = rag_model.rag.question_encoder(retriever_input_ids)
question_enc_pool_output = question_enc_outputs.pooler_output question_enc_pool_output = question_enc_outputs.pooler_output
result = rag_model.retriever( result = rag_model.retriever(
......
...@@ -204,7 +204,6 @@ class GenerativeQAModule(BaseTransformer): ...@@ -204,7 +204,6 @@ class GenerativeQAModule(BaseTransformer):
decoder_input_ids=decoder_input_ids, decoder_input_ids=decoder_input_ids,
use_cache=False, use_cache=False,
labels=lm_labels, labels=lm_labels,
return_dict=True,
**rag_kwargs, **rag_kwargs,
) )
......
...@@ -47,7 +47,7 @@ def embed(documents: dict, ctx_encoder: DPRContextEncoder, ctx_tokenizer: DPRCon ...@@ -47,7 +47,7 @@ def embed(documents: dict, ctx_encoder: DPRContextEncoder, ctx_tokenizer: DPRCon
input_ids = ctx_tokenizer( input_ids = ctx_tokenizer(
documents["title"], documents["text"], truncation=True, padding="longest", return_tensors="pt" documents["title"], documents["text"], truncation=True, padding="longest", return_tensors="pt"
)["input_ids"] )["input_ids"]
embeddings = ctx_encoder(input_ids.to(device=device), return_dict=True).pooler_output embeddings = ctx_encoder(input_ids.to(device=device)).pooler_output
return {"embeddings": embeddings.detach().cpu().numpy()} return {"embeddings": embeddings.detach().cpu().numpy()}
......
...@@ -153,7 +153,6 @@ class SummarizationDistiller(SummarizationModule): ...@@ -153,7 +153,6 @@ class SummarizationDistiller(SummarizationModule):
output_hidden_states=self.do_calc_hidden_loss, output_hidden_states=self.do_calc_hidden_loss,
output_attentions=False, output_attentions=False,
use_cache=False, use_cache=False,
return_dict=True,
) )
lm_logits = student_outputs.logits lm_logits = student_outputs.logits
...@@ -179,7 +178,6 @@ class SummarizationDistiller(SummarizationModule): ...@@ -179,7 +178,6 @@ class SummarizationDistiller(SummarizationModule):
input_ids, input_ids,
attention_mask=src_mask, attention_mask=src_mask,
output_hidden_states=self.do_calc_hidden_loss, output_hidden_states=self.do_calc_hidden_loss,
return_dict=True,
) )
if self.different_base_models: if self.different_base_models:
teacher_enc_outputs = all_teacher_encoder_outputs.last_hidden_state teacher_enc_outputs = all_teacher_encoder_outputs.last_hidden_state
...@@ -199,7 +197,6 @@ class SummarizationDistiller(SummarizationModule): ...@@ -199,7 +197,6 @@ class SummarizationDistiller(SummarizationModule):
decoder_input_ids=decoder_input_ids, decoder_input_ids=decoder_input_ids,
output_hidden_states=self.do_calc_hidden_loss, output_hidden_states=self.do_calc_hidden_loss,
use_cache=False, # since we are not passing labels, never let this default to True use_cache=False, # since we are not passing labels, never let this default to True
return_dict=True,
) )
dec_mask = decoder_input_ids.ne(pad_token_id) dec_mask = decoder_input_ids.ne(pad_token_id)
loss_ce = self.calc_ce_loss(dec_mask, lm_logits, teacher_outputs.logits) loss_ce = self.calc_ce_loss(dec_mask, lm_logits, teacher_outputs.logits)
......
...@@ -185,7 +185,7 @@ class TestSummarizationDistiller(TestCasePlus): ...@@ -185,7 +185,7 @@ class TestSummarizationDistiller(TestCasePlus):
@require_torch_non_multi_gpu_but_fix_me @require_torch_non_multi_gpu_but_fix_me
def test_loss_fn(self): def test_loss_fn(self):
model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY, return_dict=True) model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY)
input_ids, mask = model.dummy_inputs["input_ids"], model.dummy_inputs["attention_mask"] input_ids, mask = model.dummy_inputs["input_ids"], model.dummy_inputs["attention_mask"]
target_ids = torch.tensor([[0, 4, 8, 2], [0, 8, 2, 1]], dtype=torch.long, device=model.device) target_ids = torch.tensor([[0, 4, 8, 2], [0, 8, 2, 1]], dtype=torch.long, device=model.device)
decoder_input_ids = target_ids[:, :-1].contiguous() # Why this line? decoder_input_ids = target_ids[:, :-1].contiguous() # Why this line?
......
...@@ -23,7 +23,7 @@ target_str = "us rejects charges against its ambassador in bolivia" ...@@ -23,7 +23,7 @@ target_str = "us rejects charges against its ambassador in bolivia"
input_ids = tokenizer(input_str, return_tensors="pt").input_ids input_ids = tokenizer(input_str, return_tensors="pt").input_ids
labels = tokenizer(target_str, return_tensors="pt").input_ids labels = tokenizer(target_str, return_tensors="pt").input_ids
loss = model(input_ids, labels=labels, return_dict=True).loss loss = model(input_ids, labels=labels).loss
``` ```
### Citation ### Citation
......
...@@ -26,7 +26,7 @@ target_str = "us rejects charges against its ambassador in bolivia" ...@@ -26,7 +26,7 @@ target_str = "us rejects charges against its ambassador in bolivia"
input_ids = tokenizer(input_str, return_tensors="pt").input_ids input_ids = tokenizer(input_str, return_tensors="pt").input_ids
labels = tokenizer(target_str, return_tensors="pt").input_ids labels = tokenizer(target_str, return_tensors="pt").input_ids
loss = model(input_ids, labels=labels, return_dict=True).loss loss = model(input_ids, labels=labels).loss
``` ```
Note that since this model is a multi-lingual model it can be fine-tuned on all kinds of other languages. Note that since this model is a multi-lingual model it can be fine-tuned on all kinds of other languages.
......
...@@ -45,7 +45,7 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification ...@@ -45,7 +45,7 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch import torch
import numpy as np import numpy as np
tokenizer = AutoTokenizer.from_pretrained('mrm8488/codebert-base-finetuned-detect-insecure-code') tokenizer = AutoTokenizer.from_pretrained('mrm8488/codebert-base-finetuned-detect-insecure-code')
model = AutoModelForSequenceClassification.from_pretrained('mrm8488/codebert-base-finetuned-detect-insecure-code', return_dict=True) model = AutoModelForSequenceClassification.from_pretrained('mrm8488/codebert-base-finetuned-detect-insecure-code')
inputs = tokenizer("your code here", return_tensors="pt", truncation=True, padding='max_length') inputs = tokenizer("your code here", return_tensors="pt", truncation=True, padding='max_length')
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
......
...@@ -13,7 +13,7 @@ sentences = ["Hello World", "Hallo Welt"] ...@@ -13,7 +13,7 @@ sentences = ["Hello World", "Hallo Welt"]
encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=64, return_tensors='pt') encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=64, return_tensors='pt')
with torch.no_grad(): with torch.no_grad():
model_output = model(**encoded_input, return_dict=True) model_output = model(**encoded_input)
embeddings = model_output.pooler_output embeddings = model_output.pooler_output
embeddings = torch.nn.functional.normalize(embeddings) embeddings = torch.nn.functional.normalize(embeddings)
......
...@@ -59,7 +59,7 @@ print(f"num of params {tiny_model.num_parameters()}") ...@@ -59,7 +59,7 @@ print(f"num of params {tiny_model.num_parameters()}")
# Test # Test
batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"]) batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"])
outputs = tiny_model(**batch, return_dict=True) outputs = tiny_model(**batch)
print("test output:", len(outputs.logits[0])) print("test output:", len(outputs.logits[0]))
......
...@@ -30,7 +30,7 @@ print(f"num of params {tiny_model.num_parameters()}") ...@@ -30,7 +30,7 @@ print(f"num of params {tiny_model.num_parameters()}")
# Test # Test
batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"]) batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"])
outputs = tiny_model(**batch, return_dict=True) outputs = tiny_model(**batch)
print("test output:", len(outputs.logits[0])) print("test output:", len(outputs.logits[0]))
......
...@@ -55,7 +55,7 @@ class PretrainedConfig(object): ...@@ -55,7 +55,7 @@ class PretrainedConfig(object):
Whether or not the model should returns all attentions. Whether or not the model should returns all attentions.
use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`): use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`):
Whether or not the model should return the last key/values attentions (not used by all models). Whether or not the model should return the last key/values attentions (not used by all models).
return_dict (:obj:`bool`, `optional`, defaults to :obj:`False`): return_dict (:obj:`bool`, `optional`, defaults to :obj:`True`):
Whether or not the model should return a :class:`~transformers.file_utils.ModelOutput` instead of a plain Whether or not the model should return a :class:`~transformers.file_utils.ModelOutput` instead of a plain
tuple. tuple.
is_encoder_decoder (:obj:`bool`, `optional`, defaults to :obj:`False`): is_encoder_decoder (:obj:`bool`, `optional`, defaults to :obj:`False`):
...@@ -163,7 +163,7 @@ class PretrainedConfig(object): ...@@ -163,7 +163,7 @@ class PretrainedConfig(object):
def __init__(self, **kwargs): def __init__(self, **kwargs):
# Attributes with defaults # Attributes with defaults
self.return_dict = kwargs.pop("return_dict", False) self.return_dict = kwargs.pop("return_dict", True)
self.output_hidden_states = kwargs.pop("output_hidden_states", False) self.output_hidden_states = kwargs.pop("output_hidden_states", False)
self.output_attentions = kwargs.pop("output_attentions", False) self.output_attentions = kwargs.pop("output_attentions", False)
self.use_cache = kwargs.pop("use_cache", True) # Not used by all models self.use_cache = kwargs.pop("use_cache", True) # Not used by all models
......
...@@ -559,7 +559,7 @@ PT_TOKEN_CLASSIFICATION_SAMPLE = r""" ...@@ -559,7 +559,7 @@ PT_TOKEN_CLASSIFICATION_SAMPLE = r"""
>>> import torch >>> import torch
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}') >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True) >>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> labels = torch.tensor([1] * inputs["input_ids"].size(1)).unsqueeze(0) # Batch size 1 >>> labels = torch.tensor([1] * inputs["input_ids"].size(1)).unsqueeze(0) # Batch size 1
...@@ -576,7 +576,7 @@ PT_QUESTION_ANSWERING_SAMPLE = r""" ...@@ -576,7 +576,7 @@ PT_QUESTION_ANSWERING_SAMPLE = r"""
>>> import torch >>> import torch
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}') >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True) >>> model = {model_class}.from_pretrained('{checkpoint}')
>>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" >>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
>>> inputs = tokenizer(question, text, return_tensors='pt') >>> inputs = tokenizer(question, text, return_tensors='pt')
...@@ -596,7 +596,7 @@ PT_SEQUENCE_CLASSIFICATION_SAMPLE = r""" ...@@ -596,7 +596,7 @@ PT_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
>>> import torch >>> import torch
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}') >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True) >>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 >>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
...@@ -612,7 +612,7 @@ PT_MASKED_LM_SAMPLE = r""" ...@@ -612,7 +612,7 @@ PT_MASKED_LM_SAMPLE = r"""
>>> import torch >>> import torch
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}') >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True) >>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="pt") >>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="pt")
>>> labels = tokenizer("The capital of France is Paris.", return_tensors="pt")["input_ids"] >>> labels = tokenizer("The capital of France is Paris.", return_tensors="pt")["input_ids"]
...@@ -629,7 +629,7 @@ PT_BASE_MODEL_SAMPLE = r""" ...@@ -629,7 +629,7 @@ PT_BASE_MODEL_SAMPLE = r"""
>>> import torch >>> import torch
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}') >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True) >>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
...@@ -644,7 +644,7 @@ PT_MULTIPLE_CHOICE_SAMPLE = r""" ...@@ -644,7 +644,7 @@ PT_MULTIPLE_CHOICE_SAMPLE = r"""
>>> import torch >>> import torch
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}') >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True) >>> model = {model_class}.from_pretrained('{checkpoint}')
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced." >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> choice0 = "It is eaten with a fork and a knife." >>> choice0 = "It is eaten with a fork and a knife."
...@@ -666,7 +666,7 @@ PT_CAUSAL_LM_SAMPLE = r""" ...@@ -666,7 +666,7 @@ PT_CAUSAL_LM_SAMPLE = r"""
>>> from transformers import {tokenizer_class}, {model_class} >>> from transformers import {tokenizer_class}, {model_class}
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}') >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True) >>> model = {model_class}.from_pretrained('{checkpoint})
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs, labels=inputs["input_ids"]) >>> outputs = model(**inputs, labels=inputs["input_ids"])
...@@ -681,7 +681,7 @@ TF_TOKEN_CLASSIFICATION_SAMPLE = r""" ...@@ -681,7 +681,7 @@ TF_TOKEN_CLASSIFICATION_SAMPLE = r"""
>>> import tensorflow as tf >>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}') >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True) >>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
>>> input_ids = inputs["input_ids"] >>> input_ids = inputs["input_ids"]
...@@ -699,7 +699,7 @@ TF_QUESTION_ANSWERING_SAMPLE = r""" ...@@ -699,7 +699,7 @@ TF_QUESTION_ANSWERING_SAMPLE = r"""
>>> import tensorflow as tf >>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}') >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True) >>> model = {model_class}.from_pretrained('{checkpoint}')
>>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" >>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
>>> input_dict = tokenizer(question, text, return_tensors='tf') >>> input_dict = tokenizer(question, text, return_tensors='tf')
...@@ -718,7 +718,7 @@ TF_SEQUENCE_CLASSIFICATION_SAMPLE = r""" ...@@ -718,7 +718,7 @@ TF_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
>>> import tensorflow as tf >>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}') >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True) >>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
>>> inputs["labels"] = tf.reshape(tf.constant(1), (-1, 1)) # Batch size 1 >>> inputs["labels"] = tf.reshape(tf.constant(1), (-1, 1)) # Batch size 1
...@@ -735,7 +735,7 @@ TF_MASKED_LM_SAMPLE = r""" ...@@ -735,7 +735,7 @@ TF_MASKED_LM_SAMPLE = r"""
>>> import tensorflow as tf >>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}') >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True) >>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="tf") >>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="tf")
>>> inputs["labels"] = tokenizer("The capital of France is Paris.", return_tensors="tf")["input_ids"] >>> inputs["labels"] = tokenizer("The capital of France is Paris.", return_tensors="tf")["input_ids"]
...@@ -752,7 +752,7 @@ TF_BASE_MODEL_SAMPLE = r""" ...@@ -752,7 +752,7 @@ TF_BASE_MODEL_SAMPLE = r"""
>>> import tensorflow as tf >>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}') >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True) >>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
>>> outputs = model(inputs) >>> outputs = model(inputs)
...@@ -767,7 +767,7 @@ TF_MULTIPLE_CHOICE_SAMPLE = r""" ...@@ -767,7 +767,7 @@ TF_MULTIPLE_CHOICE_SAMPLE = r"""
>>> import tensorflow as tf >>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}') >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True) >>> model = {model_class}.from_pretrained('{checkpoint}')
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced." >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> choice0 = "It is eaten with a fork and a knife." >>> choice0 = "It is eaten with a fork and a knife."
...@@ -788,7 +788,7 @@ TF_CAUSAL_LM_SAMPLE = r""" ...@@ -788,7 +788,7 @@ TF_CAUSAL_LM_SAMPLE = r"""
>>> import tensorflow as tf >>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}') >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True) >>> model = {model_class}.from_pretrained('{checkpoint}')
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
>>> outputs = model(inputs) >>> outputs = model(inputs)
......
...@@ -416,7 +416,7 @@ class AlbertTransformer(nn.Module): ...@@ -416,7 +416,7 @@ class AlbertTransformer(nn.Module):
head_mask=None, head_mask=None,
output_attentions=False, output_attentions=False,
output_hidden_states=False, output_hidden_states=False,
return_dict=False, return_dict=True,
): ):
hidden_states = self.embedding_hidden_mapping_in(hidden_states) hidden_states = self.embedding_hidden_mapping_in(hidden_states)
...@@ -764,7 +764,7 @@ class AlbertForPreTraining(AlbertPreTrainedModel): ...@@ -764,7 +764,7 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
>>> import torch >>> import torch
>>> tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2') >>> tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
>>> model = AlbertForPreTraining.from_pretrained('albert-base-v2', return_dict=True) >>> model = AlbertForPreTraining.from_pretrained('albert-base-v2')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) >>> outputs = model(input_ids)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment