Commit 24d5ad1d authored by Lysandre's avatar Lysandre Committed by Lysandre Debut
Browse files

Run the examples in slow

parent 9ddf60b6
...@@ -4,7 +4,7 @@ OpenAI GPT ...@@ -4,7 +4,7 @@ OpenAI GPT
Overview Overview
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
OpenAI GPT model was proposed in `Improving Language Understanding by Generative Pre-Training`_ OpenAI GPT model was proposed in `Improving Language Understanding by Generative Pre-Training <https://s3-us-west-2.amazonaws.com/openai-assets/research-covers/language-unsupervised/language_understanding_paper.pdf>`__
by Alec Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever. It's a causal (unidirectional) by Alec Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever. It's a causal (unidirectional)
transformer pre-trained using language modeling on a large corpus will long range dependencies, the Toronto Book Corpus. transformer pre-trained using language modeling on a large corpus will long range dependencies, the Toronto Book Corpus.
......
...@@ -88,6 +88,8 @@ TF_WEIGHTS_NAME = "model.ckpt" ...@@ -88,6 +88,8 @@ TF_WEIGHTS_NAME = "model.ckpt"
CONFIG_NAME = "config.json" CONFIG_NAME = "config.json"
MODEL_CARD_NAME = "modelcard.json" MODEL_CARD_NAME = "modelcard.json"
MULTIPLE_CHOICE_DUMMY_INPUTS = [[[0], [1]], [[0], [1]]]
DUMMY_INPUTS = [[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]] DUMMY_INPUTS = [[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]]
DUMMY_MASK = [[1, 1, 1, 1, 1], [1, 1, 1, 0, 0], [0, 0, 0, 1, 1]] DUMMY_MASK = [[1, 1, 1, 1, 1], [1, 1, 1, 0, 0], [0, 0, 0, 1, 1]]
......
...@@ -659,8 +659,8 @@ class AlbertForMaskedLM(AlbertPreTrainedModel): ...@@ -659,8 +659,8 @@ class AlbertForMaskedLM(AlbertPreTrainedModel):
from transformers import AlbertTokenizer, AlbertForMaskedLM from transformers import AlbertTokenizer, AlbertForMaskedLM
import torch import torch
tokenizer = BertTokenizer.from_pretrained('albert-base-v2') tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
model = BertForMaskedLM.from_pretrained('albert-base-v2') model = AlbertForMaskedLM.from_pretrained('albert-base-v2')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, masked_lm_labels=input_ids) outputs = model(input_ids, masked_lm_labels=input_ids)
loss, prediction_scores = outputs[:2] loss, prediction_scores = outputs[:2]
...@@ -839,16 +839,19 @@ class AlbertForQuestionAnswering(AlbertPreTrainedModel): ...@@ -839,16 +839,19 @@ class AlbertForQuestionAnswering(AlbertPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads. heads.
Examples:: Examples::
# The checkpoint albert-base-v2 is not fine-tuned for question answering. Please see the # The checkpoint albert-base-v2 is not fine-tuned for question answering. Please see the
# examples/run_squad.py example to see how to fine-tune a model to a question answering task. # examples/run_squad.py example to see how to fine-tune a model to a question answering task.
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2') from transformers import AlbertTokenizer, AlbertForQuestionAnswering
model = AlbertForQuestionAnswering.from_pretrained('albert-base-v2') import torch
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
input_dict = tokenizer.encode_plus(question, text, return_tensors='pt') tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
start_scores, end_scores = model(**input_dict) model = AlbertForQuestionAnswering.from_pretrained('albert-base-v2')
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
input_dict = tokenizer.encode_plus(question, text, return_tensors='pt')
start_scores, end_scores = model(**input_dict)
""" """
......
...@@ -687,10 +687,15 @@ class BertModel(BertPreTrainedModel): ...@@ -687,10 +687,15 @@ class BertModel(BertPreTrainedModel):
Examples:: Examples::
from transformers import BertModel, BertTokenizer
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased') model = BertModel.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids) outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
...@@ -873,10 +878,15 @@ class BertForPreTraining(BertPreTrainedModel): ...@@ -873,10 +878,15 @@ class BertForPreTraining(BertPreTrainedModel):
Examples:: Examples::
from transformers import BertTokenizer, BertForPreTraining
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForPreTraining.from_pretrained('bert-base-uncased') model = BertForPreTraining.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids) outputs = model(input_ids)
prediction_scores, seq_relationship_scores = outputs[:2] prediction_scores, seq_relationship_scores = outputs[:2]
""" """
...@@ -968,10 +978,15 @@ class BertForMaskedLM(BertPreTrainedModel): ...@@ -968,10 +978,15 @@ class BertForMaskedLM(BertPreTrainedModel):
Examples:: Examples::
from transformers import BertTokenizer, BertForMaskedLM
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMaskedLM.from_pretrained('bert-base-uncased') model = BertForMaskedLM.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, masked_lm_labels=input_ids) outputs = model(input_ids, masked_lm_labels=input_ids)
loss, prediction_scores = outputs[:2] loss, prediction_scores = outputs[:2]
""" """
...@@ -1064,10 +1079,15 @@ class BertForNextSentencePrediction(BertPreTrainedModel): ...@@ -1064,10 +1079,15 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
Examples:: Examples::
from transformers import BertTokenizer, BertForNextSentencePrediction
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased') model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids) outputs = model(input_ids)
seq_relationship_scores = outputs[0] seq_relationship_scores = outputs[0]
""" """
...@@ -1148,11 +1168,16 @@ class BertForSequenceClassification(BertPreTrainedModel): ...@@ -1148,11 +1168,16 @@ class BertForSequenceClassification(BertPreTrainedModel):
Examples:: Examples::
from transformers import BertTokenizer, BertForSequenceClassification
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased') model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
loss, logits = outputs[:2] loss, logits = outputs[:2]
""" """
...@@ -1240,12 +1265,17 @@ class BertForMultipleChoice(BertPreTrainedModel): ...@@ -1240,12 +1265,17 @@ class BertForMultipleChoice(BertPreTrainedModel):
Examples:: Examples::
from transformers import BertTokenizer, BertForMultipleChoice
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMultipleChoice.from_pretrained('bert-base-uncased') model = BertForMultipleChoice.from_pretrained('bert-base-uncased')
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"] choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
input_ids = torch.tensor([tokenizer.encode(s, add_special_tokens=True) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices input_ids = torch.tensor([tokenizer.encode(s, add_special_tokens=True) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
labels = torch.tensor(1).unsqueeze(0) # Batch size 1 labels = torch.tensor(1).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
loss, classification_scores = outputs[:2] loss, classification_scores = outputs[:2]
""" """
...@@ -1333,11 +1363,16 @@ class BertForTokenClassification(BertPreTrainedModel): ...@@ -1333,11 +1363,16 @@ class BertForTokenClassification(BertPreTrainedModel):
Examples:: Examples::
from transformers import BertTokenizer, BertForTokenClassification
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForTokenClassification.from_pretrained('bert-base-uncased') model = BertForTokenClassification.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1 labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
loss, scores = outputs[:2] loss, scores = outputs[:2]
""" """
...@@ -1431,15 +1466,21 @@ class BertForQuestionAnswering(BertPreTrainedModel): ...@@ -1431,15 +1466,21 @@ class BertForQuestionAnswering(BertPreTrainedModel):
Examples:: Examples::
from transformers import BertTokenizer, BertForQuestionAnswering
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad') model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
input_ids = tokenizer.encode(question, text) input_ids = tokenizer.encode(question, text)
token_type_ids = [0 if i <= input_ids.index(102) else 1 for i in range(len(input_ids))] token_type_ids = [0 if i <= input_ids.index(102) else 1 for i in range(len(input_ids))]
start_scores, end_scores = model(torch.tensor([input_ids]), token_type_ids=torch.tensor([token_type_ids])) start_scores, end_scores = model(torch.tensor([input_ids]), token_type_ids=torch.tensor([token_type_ids]))
all_tokens = tokenizer.convert_ids_to_tokens(input_ids) all_tokens = tokenizer.convert_ids_to_tokens(input_ids)
print(' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1])) answer = ' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1])
# a nice puppet
assert answer == "a nice puppet"
""" """
......
...@@ -313,10 +313,15 @@ class CTRLModel(CTRLPreTrainedModel): ...@@ -313,10 +313,15 @@ class CTRLModel(CTRLPreTrainedModel):
Examples:: Examples::
from transformers import CTRLTokenizer, CTRLModel
import torch
tokenizer = CTRLTokenizer.from_pretrained('ctrl') tokenizer = CTRLTokenizer.from_pretrained('ctrl')
model = CTRLModel.from_pretrained('ctrl') model = CTRLModel.from_pretrained('ctrl')
input_ids = torch.tensor(tokenizer.encode("Links Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Links Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids) outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
......
...@@ -437,10 +437,15 @@ class DistilBertModel(DistilBertPreTrainedModel): ...@@ -437,10 +437,15 @@ class DistilBertModel(DistilBertPreTrainedModel):
Examples:: Examples::
from transformers import DistilBertTokenizer, DistilBertModel
import torch
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertModel.from_pretrained('distilbert-base-uncased') model = DistilBertModel.from_pretrained('distilbert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids) outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
...@@ -536,6 +541,9 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel): ...@@ -536,6 +541,9 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel):
Examples:: Examples::
from transformers import DistilBertTokenizer, DistilBertForMaskedLM
import torch
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertForMaskedLM.from_pretrained('distilbert-base-uncased') model = DistilBertForMaskedLM.from_pretrained('distilbert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...@@ -608,6 +616,9 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel): ...@@ -608,6 +616,9 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
Examples:: Examples::
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import torch
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased') model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...@@ -697,6 +708,9 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel): ...@@ -697,6 +708,9 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
Examples:: Examples::
from transformers import DistilBertTokenizer, DistilBertForQuestionAnswering
import torch
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased') model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...@@ -781,6 +795,9 @@ class DistilBertForTokenClassification(DistilBertPreTrainedModel): ...@@ -781,6 +795,9 @@ class DistilBertForTokenClassification(DistilBertPreTrainedModel):
Examples:: Examples::
from transformers import DistilBertTokenizer, DistilBertForTokenClassification
import torch
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertForTokenClassification.from_pretrained('distilbert-base-uncased') model = DistilBertForTokenClassification.from_pretrained('distilbert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
......
...@@ -109,6 +109,7 @@ class PreTrainedEncoderDecoder(nn.Module): ...@@ -109,6 +109,7 @@ class PreTrainedEncoderDecoder(nn.Module):
Examples:: Examples::
# For example purposes. Not runnable.
model = PreTrainedEncoderDecoder.from_pretained('bert-base-uncased', 'bert-base-uncased') # initialize Bert2Bert model = PreTrainedEncoderDecoder.from_pretained('bert-base-uncased', 'bert-base-uncased') # initialize Bert2Bert
""" """
......
...@@ -385,6 +385,9 @@ class GPT2Model(GPT2PreTrainedModel): ...@@ -385,6 +385,9 @@ class GPT2Model(GPT2PreTrainedModel):
Examples:: Examples::
from transformers import GPT2Tokenizer, GPT2Model
import torch
tokenizer = GPT2Tokenizer.from_pretrained('gpt2') tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2Model.from_pretrained('gpt2') model = GPT2Model.from_pretrained('gpt2')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
......
...@@ -169,6 +169,8 @@ class MMBTModel(nn.Module): ...@@ -169,6 +169,8 @@ class MMBTModel(nn.Module):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads. Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
Examples:: Examples::
# For example purposes. Not runnable.
transformer = BertModel.from_pretrained('bert-base-uncased') transformer = BertModel.from_pretrained('bert-base-uncased')
encoder = ImageEncoder(args) encoder = ImageEncoder(args)
mmbt = MMBTModel(config, transformer, encoder) mmbt = MMBTModel(config, transformer, encoder)
...@@ -351,6 +353,7 @@ class MMBTForClassification(nn.Module): ...@@ -351,6 +353,7 @@ class MMBTForClassification(nn.Module):
Examples:: Examples::
# For example purposes. Not runnable.
transformer = BertModel.from_pretrained('bert-base-uncased') transformer = BertModel.from_pretrained('bert-base-uncased')
encoder = ImageEncoder(args) encoder = ImageEncoder(args)
model = MMBTForClassification(config, transformer, encoder) model = MMBTForClassification(config, transformer, encoder)
......
...@@ -388,6 +388,9 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel): ...@@ -388,6 +388,9 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
Examples:: Examples::
from transformers import OpenAIGPTTokenizer, OpenAIGPTModel
import torch
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
model = OpenAIGPTModel.from_pretrained('openai-gpt') model = OpenAIGPTModel.from_pretrained('openai-gpt')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...@@ -541,6 +544,9 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel): ...@@ -541,6 +544,9 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
Examples:: Examples::
from transformers import OpenAIGPTTokenizer, OpenAIGPTLMHeadModel
import torch
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt') model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...@@ -650,6 +656,9 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): ...@@ -650,6 +656,9 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
Examples:: Examples::
from transformers import OpenAIGPTTokenizer, OpenAIGPTDoubleHeadsModel
import torch
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt') model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt')
tokenizer.add_special_tokens({'cls_token': '[CLS]'}) # Add a [CLS] to the vocabulary (we should train it also!) tokenizer.add_special_tokens({'cls_token': '[CLS]'}) # Add a [CLS] to the vocabulary (we should train it also!)
......
...@@ -224,6 +224,9 @@ class RobertaForMaskedLM(BertPreTrainedModel): ...@@ -224,6 +224,9 @@ class RobertaForMaskedLM(BertPreTrainedModel):
Examples:: Examples::
from transformers import RobertaTokenizer, RobertaForMaskedLM
import torch
tokenizer = RobertaTokenizer.from_pretrained('roberta-base') tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForMaskedLM.from_pretrained('roberta-base') model = RobertaForMaskedLM.from_pretrained('roberta-base')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...@@ -332,6 +335,9 @@ class RobertaForSequenceClassification(BertPreTrainedModel): ...@@ -332,6 +335,9 @@ class RobertaForSequenceClassification(BertPreTrainedModel):
Examples:: Examples::
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch
tokenizer = RobertaTokenizer.from_pretrained('roberta-base') tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained('roberta-base') model = RobertaForSequenceClassification.from_pretrained('roberta-base')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...@@ -423,6 +429,9 @@ class RobertaForMultipleChoice(BertPreTrainedModel): ...@@ -423,6 +429,9 @@ class RobertaForMultipleChoice(BertPreTrainedModel):
Examples:: Examples::
from transformers import RobertaTokenizer, RobertaForMultipleChoice
import torch
tokenizer = RobertaTokenizer.from_pretrained('roberta-base') tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForMultipleChoice.from_pretrained('roberta-base') model = RobertaForMultipleChoice.from_pretrained('roberta-base')
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"] choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
...@@ -515,14 +524,17 @@ class RobertaForTokenClassification(BertPreTrainedModel): ...@@ -515,14 +524,17 @@ class RobertaForTokenClassification(BertPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads. heads.
Examples:: Examples::
from transformers import RobertaTokenizer, RobertaForTokenClassification
import torch
tokenizer = RobertaTokenizer.from_pretrained('roberta-base') tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForTokenClassification.from_pretrained('roberta-base') model = RobertaForTokenClassification.from_pretrained('roberta-base')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1 labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
loss, scores = outputs[:2] loss, scores = outputs[:2]
""" """
...@@ -637,13 +649,23 @@ class RobertaForQuestionAnswering(BertPreTrainedModel): ...@@ -637,13 +649,23 @@ class RobertaForQuestionAnswering(BertPreTrainedModel):
heads. heads.
Examples:: Examples::
tokenizer = RobertaTokenizer.from_pretrained('roberta-large')
model = RobertaForQuestionAnswering.from_pretrained('roberta-large') # The checkpoint roberta-large is not fine-tuned for question answering. Please see the
# examples/run_squad.py example to see how to fine-tune a model to a question answering task.
from transformers import RobertaTokenizer, RobertaForQuestionAnswering
import torch
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForQuestionAnswering.from_pretrained('roberta-base')
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
input_ids = tokenizer.encode(question, text) input_ids = tokenizer.encode(question, text)
start_scores, end_scores = model(torch.tensor([input_ids])) start_scores, end_scores = model(torch.tensor([input_ids]))
all_tokens = tokenizer.convert_ids_to_tokens(input_ids) all_tokens = tokenizer.convert_ids_to_tokens(input_ids)
answer = ' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1]) answer = ' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1])
""" """
outputs = self.roberta( outputs = self.roberta(
......
...@@ -22,7 +22,7 @@ import numpy as np ...@@ -22,7 +22,7 @@ import numpy as np
import tensorflow as tf import tensorflow as tf
from .configuration_bert import BertConfig from .configuration_bert import BertConfig
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable from .file_utils import MULTIPLE_CHOICE_DUMMY_INPUTS, add_start_docstrings, add_start_docstrings_to_callable
from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list
...@@ -939,6 +939,15 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel): ...@@ -939,6 +939,15 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel):
1, kernel_initializer=get_initializer(config.initializer_range), name="classifier" 1, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
) )
@property
def dummy_inputs(self):
""" Dummy inputs to build the network.
Returns:
tf.Tensor with dummy inputs
"""
return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)}
@add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING) @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING)
def call( def call(
self, self,
......
...@@ -530,14 +530,14 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel): ...@@ -530,14 +530,14 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel):
Examples:: Examples::
import torch import tensorflow as tf
from transformers import CTRLTokenizer, TFCTRLLMHeadModel from transformers import CTRLTokenizer, TFCTRLLMHeadModel
tokenizer = CTRLTokenizer.from_pretrained('ctrl') tokenizer = CTRLTokenizer.from_pretrained('ctrl')
model = TFCTRLLMHeadModel.from_pretrained('ctrl') model = TFCTRLLMHeadModel.from_pretrained('ctrl')
input_ids = torch.tensor(tokenizer.encode("Links Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = tf.constant([tokenizer.encode("Links Hello, my dog is cute", add_special_tokens=True)])
outputs = model(input_ids, labels=input_ids) outputs = model(input_ids)
loss, logits = outputs[:2] loss, logits = outputs[:2]
""" """
......
...@@ -699,7 +699,7 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel): ...@@ -699,7 +699,7 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel):
Examples:: Examples::
import tensorflow as tf import tensorflow as tf
from transformers import BertTokenizer, TFDistilBertForSequenceClassification from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = TFDistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased') model = TFDistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')
...@@ -755,10 +755,12 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel): ...@@ -755,10 +755,12 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads. Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
Examples:: Examples::
import tensorflow as tf import tensorflow as tf
from transformers import DistilBertTokenizer, TFDistilBertForTokenClassification from transformers import DistilBertTokenizer, TFDistilBertForTokenClassification
tokenizer = DistilBertTokenizer.from_pretrained('bert-base-uncased')
model = TFDistilBertForTokenClassification.from_pretrained('bert-base-uncased') tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = TFDistilBertForTokenClassification.from_pretrained('distilbert-base-uncased')
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1 input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
outputs = model(input_ids) outputs = model(input_ids)
scores = outputs[0] scores = outputs[0]
...@@ -814,7 +816,7 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel): ...@@ -814,7 +816,7 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel):
Examples:: Examples::
import tensorflow as tf import tensorflow as tf
from transformers import BertTokenizer, TFDistilBertForQuestionAnswering from transformers import DistilBertTokenizer, TFDistilBertForQuestionAnswering
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = TFDistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased') model = TFDistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased')
......
...@@ -609,6 +609,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel): ...@@ -609,6 +609,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
Examples:: Examples::
# For example purposes. Not runnable.
import tensorflow as tf import tensorflow as tf
from transformers import GPT2Tokenizer, TFGPT2DoubleHeadsModel from transformers import GPT2Tokenizer, TFGPT2DoubleHeadsModel
......
...@@ -582,6 +582,7 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel): ...@@ -582,6 +582,7 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel):
Examples:: Examples::
# For example purposes. Not runnable.
import tensorflow as tf import tensorflow as tf
from transformers import OpenAIGPTTokenizer, TFOpenAIGPTDoubleHeadsModel from transformers import OpenAIGPTTokenizer, TFOpenAIGPTDoubleHeadsModel
......
...@@ -293,7 +293,7 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel): ...@@ -293,7 +293,7 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel):
tokenizer = RobertaTokenizer.from_pretrained('roberta-base') tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = TFRobertaForMaskedLM.from_pretrained('roberta-base') model = TFRobertaForMaskedLM.from_pretrained('roberta-base')
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1 input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
outputs = model(input_ids, masked_lm_labels=input_ids) outputs = model(input_ids)
prediction_scores = outputs[0] prediction_scores = outputs[0]
""" """
...@@ -368,7 +368,7 @@ class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel): ...@@ -368,7 +368,7 @@ class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel):
import tensorflow as tf import tensorflow as tf
from transformers import RobertaTokenizer, TFRobertaForSequenceClassification from transformers import RobertaTokenizer, TFRobertaForSequenceClassification
tokenizer = RoertaTokenizer.from_pretrained('roberta-base') tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = TFRobertaForSequenceClassification.from_pretrained('roberta-base') model = TFRobertaForSequenceClassification.from_pretrained('roberta-base')
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1 input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
labels = tf.constant([1])[None, :] # Batch size 1 labels = tf.constant([1])[None, :] # Batch size 1
......
...@@ -248,6 +248,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin): ...@@ -248,6 +248,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin):
Examples:: Examples::
# For example purposes. Not runnable.
model = BertModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache. model = BertModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache.
model = BertModel.from_pretrained('./test/saved_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` model = BertModel.from_pretrained('./test/saved_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = BertModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading model = BertModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading
......
...@@ -863,6 +863,7 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel): ...@@ -863,6 +863,7 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel):
Examples:: Examples::
import tensorflow as tf import tensorflow as tf
import numpy as np
from transformers import XLNetTokenizer, TFXLNetLMHeadModel from transformers import XLNetTokenizer, TFXLNetLMHeadModel
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
...@@ -870,11 +871,11 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel): ...@@ -870,11 +871,11 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel):
# We show how to setup inputs to predict a next token using a bi-directional context. # We show how to setup inputs to predict a next token using a bi-directional context.
input_ids = tf.constant(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=True))[None, :] # We will predict the masked token input_ids = tf.constant(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=True))[None, :] # We will predict the masked token
perm_mask = tf.zeros((1, input_ids.shape[1], input_ids.shape[1])) perm_mask = np.zeros((1, input_ids.shape[1], input_ids.shape[1]))
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = tf.zeros((1, 1, input_ids.shape[1])) # Shape [1, 1, seq_length] => let's predict one token target_mapping = np.zeros((1, 1, input_ids.shape[1])) # Shape [1, 1, seq_length] => let's predict one token
target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping) outputs = model(input_ids, perm_mask=tf.constant(perm_mask, dtype=tf.float32), target_mapping=tf.constant(target_mapping, dtype=tf.float32))
next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
...@@ -995,7 +996,7 @@ class TFXLNetForTokenClassification(TFXLNetPreTrainedModel): ...@@ -995,7 +996,7 @@ class TFXLNetForTokenClassification(TFXLNetPreTrainedModel):
from transformers import XLNetTokenizer, TFXLNetForTokenClassification from transformers import XLNetTokenizer, TFXLNetForTokenClassification
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = TFXLNetForSequenceClassification.from_pretrained('xlnet-large-cased') model = TFXLNetForTokenClassification.from_pretrained('xlnet-large-cased')
input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1 input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :] # Batch size 1
outputs = model(input_ids) outputs = model(input_ids)
scores = outputs[0] scores = outputs[0]
...@@ -1115,6 +1116,7 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel): ...@@ -1115,6 +1116,7 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel):
# Examples:: # Examples::
# # For example purposes. Not runnable.
# tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') # tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
# model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased') # model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased')
# input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1 # input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
......
...@@ -694,6 +694,9 @@ class TransfoXLModel(TransfoXLPreTrainedModel): ...@@ -694,6 +694,9 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
Examples:: Examples::
from transformers import TransfoXLTokenizer, TransfoXLModel
import torch
tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
model = TransfoXLModel.from_pretrained('transfo-xl-wt103') model = TransfoXLModel.from_pretrained('transfo-xl-wt103')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
...@@ -883,6 +886,9 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel): ...@@ -883,6 +886,9 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
Examples:: Examples::
from transformers import TransfoXLTokenizer, TransfoXLLMHeadModel
import torch
tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
model = TransfoXLLMHeadModel.from_pretrained('transfo-xl-wt103') model = TransfoXLLMHeadModel.from_pretrained('transfo-xl-wt103')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment