Unverified Commit 364a5ae1 authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

Refactor Code samples; Test code samples (#5036)



* Refactor code samples

* Test docstrings

* Style

* Tokenization examples

* Run rust of tests

* First step to testing source docs

* Style and BART comment

* Test the remainder of the code samples

* Style

* let to const

* Formatting fixes

* Ready for merge

* Fix fixture + Style

* Fix last tests

* Update docs/source/quicktour.rst
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Addressing @sgugger's comments + Fix MobileBERT in TF
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
parent 315f464b
......@@ -34,11 +34,14 @@ from transformers.modeling_bert import BertIntermediate
from .activations import gelu, gelu_new, swish
from .configuration_mobilebert import MobileBertConfig
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
from .modeling_utils import PreTrainedModel, find_pruneable_heads_and_indices, prune_linear_layer
logger = logging.getLogger(__name__)
_TOKENIZER_FOR_DOC = "MobileBertTokenizer"
MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST = ["google/mobilebert-uncased"]
......@@ -747,6 +750,7 @@ class MobileBertModel(MobileBertPreTrainedModel):
self.encoder.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING)
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
def forward(
self,
input_ids=None,
......@@ -785,20 +789,6 @@ class MobileBertModel(MobileBertPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads.
Examples::
from transformers import MobileBertModel, MobileBertTokenizer
import torch
tokenizer = MobileBertTokenizer.from_pretrained(model_name_or_path)
model = MobileBertModel.from_pretrained(model_name_or_path)
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
......@@ -951,13 +941,17 @@ class MobileBertForPreTraining(MobileBertPreTrainedModel):
heads.
Examples::
from transformers import MobileBertTokenizer, MobileBertForPreTraining
import torch
tokenizer = MobileBertTokenizer.from_pretrained(model_name_or_path)
model = MobileBertForPreTraining.from_pretrained(model_name_or_path)
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
prediction_scores, seq_relationship_scores = outputs[:2]
>>> from transformers import MobileBertTokenizer, MobileBertForPreTraining
>>> import torch
>>> tokenizer = MobileBertTokenizer.from_pretrained("google/mobilebert-uncased")
>>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids)
>>> prediction_scores, seq_relationship_scores = outputs[:2]
"""
outputs = self.mobilebert(
......@@ -1022,6 +1016,7 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel):
self._tie_or_clone_weights(output_embeddings, self.get_input_embeddings())
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
def forward(
self,
input_ids=None,
......@@ -1063,20 +1058,6 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads.
Examples::
from transformers import MobileBertTokenizer, MobileBertForMaskedLM
import torch
tokenizer = MobileBertTokenizer.from_pretrained('mobilebert-uncased')
model = MobileBertForMaskedLM.from_pretrained('mobilebert-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=input_ids)
loss, prediction_scores = outputs[:2]
"""
if "masked_lm_labels" in kwargs:
warnings.warn(
......@@ -1174,18 +1155,17 @@ class MobileBertForNextSentencePrediction(MobileBertPreTrainedModel):
Examples::
from transformers import MobileBertTokenizer, MobileBertForNextSentencePrediction
import torch
>>> from transformers import MobileBertTokenizer, MobileBertForNextSentencePrediction
>>> import torch
tokenizer = MobileBertTokenizer.from_pretrained('mobilebert-uncased')
model = MobileBertForNextSentencePrediction.from_pretrained('mobilebert-uncased')
>>> tokenizer = MobileBertTokenizer.from_pretrained('google/mobilebert-uncased')
>>> model = MobileBertForNextSentencePrediction.from_pretrained('google/mobilebert-uncased')
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
next_sentence = "The sky is blue due to the shorter wavelength of blue light."
encoding = tokenizer.encode_plus(prompt, next_sentence, return_tensors='pt')
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
>>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt')
loss, logits = model(**encoding, next_sentence_label=torch.LongTensor([1]))
assert logits[0, 0] < logits[0, 1] # next sentence was random
>>> loss, logits = model(**encoding, next_sentence_label=torch.LongTensor([1]))
"""
outputs = self.mobilebert(
......@@ -1228,6 +1208,7 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel):
self.init_weights()
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING)
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
def forward(
self,
input_ids=None,
......@@ -1263,20 +1244,6 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads.
Examples::
from transformers import BertTokenizer, BertForSequenceClassification
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, logits = outputs[:2]
"""
outputs = self.mobilebert(
......@@ -1321,6 +1288,7 @@ class MobileBertForQuestionAnswering(MobileBertPreTrainedModel):
self.init_weights()
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
def forward(
self,
input_ids=None,
......@@ -1363,25 +1331,6 @@ class MobileBertForQuestionAnswering(MobileBertPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads.
Examples::
from transformers import MobileBertTokenizer, MobileBertForQuestionAnswering
import torch
tokenizer = BertTokenizer.from_pretrained(model_name_or_path)
model = MobileBertForQuestionAnswering.from_pretrained(model_name_or_path)
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
encoding = tokenizer.encode_plus(question, text)
input_ids, token_type_ids = encoding["input_ids"], encoding["token_type_ids"]
start_scores, end_scores = model(torch.tensor([input_ids]), token_type_ids=torch.tensor([token_type_ids]))
all_tokens = tokenizer.convert_ids_to_tokens(input_ids)
answer = ' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1])
assert answer == "a nice puppet"
"""
outputs = self.mobilebert(
......@@ -1439,6 +1388,7 @@ class MobileBertForMultipleChoice(MobileBertPreTrainedModel):
self.init_weights()
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)"))
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
def forward(
self,
input_ids=None,
......@@ -1476,25 +1426,6 @@ class MobileBertForMultipleChoice(MobileBertPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads.
Examples::
from transformers import MobileBertTokenizer, MobileBertForMultipleChoice
import torch
tokenizer = MobileBertTokenizer.from_pretrained('mobilebert-uncased')
model = MobileBertForMultipleChoice.from_pretrained('mobilebert-uncased')
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
choice0 = "It is eaten with a fork and a knife."
choice1 = "It is eaten while held in the hand."
labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1
encoding = tokenizer.batch_encode_plus([[prompt, choice0], [prompt, choice1]], return_tensors='pt', pad_to_max_length=True)
outputs = model(**{k: v.unsqueeze(0) for k,v in encoding.items()}, labels=labels) # batch size is 1
# the linear classifier still needs to be trained
loss, logits = outputs[:2]
"""
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
......@@ -1552,6 +1483,7 @@ class MobileBertForTokenClassification(MobileBertPreTrainedModel):
self.init_weights()
@add_start_docstrings_to_callable(MOBILEBERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/mobilebert-uncased")
def forward(
self,
input_ids=None,
......@@ -1586,21 +1518,6 @@ class MobileBertForTokenClassification(MobileBertPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads.
Examples::
from transformers import MobileBertTokenizer, MobileBertForTokenClassification
import torch
tokenizer = MobileBertTokenizer.from_pretrained('mobilebert-uncased')
model = MobileBertForTokenClassification.from_pretrained('mobilebert-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, scores = outputs[:2]
"""
outputs = self.mobilebert(
......
......@@ -28,7 +28,7 @@ from torch.nn import CrossEntropyLoss
from .activations import gelu_new, swish
from .configuration_openai import OpenAIGPTConfig
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
from .modeling_utils import (
Conv1D,
PreTrainedModel,
......@@ -40,6 +40,8 @@ from .modeling_utils import (
logger = logging.getLogger(__name__)
_TOKENIZER_FOR_DOC = "OpenAIGPTTokenizer"
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"openai-gpt",
# See all OpenAI GPT models at https://huggingface.co/models?filter=openai-gpt
......@@ -356,6 +358,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
self.h[layer].attn.prune_heads(heads)
@add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING)
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="openai-gpt")
def forward(
self,
input_ids=None,
......@@ -383,18 +386,6 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads.
Examples::
from transformers import OpenAIGPTTokenizer, OpenAIGPTModel
import torch
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
model = OpenAIGPTModel.from_pretrained('openai-gpt')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
......@@ -490,6 +481,7 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
return self.lm_head
@add_start_docstrings_to_callable(OPENAI_GPT_INPUTS_DOCSTRING)
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="openai-gpt")
def forward(
self,
input_ids=None,
......@@ -531,18 +523,6 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads.
Examples::
from transformers import OpenAIGPTTokenizer, OpenAIGPTLMHeadModel
import torch
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=input_ids)
loss, logits = outputs[:2]
"""
transformer_outputs = self.transformer(
input_ids,
......
......@@ -29,12 +29,20 @@ from torch.nn import CrossEntropyLoss
from .activations import gelu, gelu_fast, gelu_new, swish
from .configuration_reformer import ReformerConfig
from .file_utils import DUMMY_INPUTS, DUMMY_MASK, add_start_docstrings, add_start_docstrings_to_callable
from .file_utils import (
DUMMY_INPUTS,
DUMMY_MASK,
add_code_sample_docstrings,
add_start_docstrings,
add_start_docstrings_to_callable,
)
from .modeling_utils import PreTrainedModel, apply_chunking_to_forward
logger = logging.getLogger(__name__)
_TOKENIZER_FOR_DOC = "ReformerTokenizer"
REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [
"google/reformer-crime-and-punishment",
"google/reformer-enwik8",
......@@ -1543,6 +1551,7 @@ class ReformerModel(ReformerPreTrainedModel):
self.encoder.layer[layer].attention.prune_heads(heads)
@add_start_docstrings_to_callable(REFORMER_INPUTS_DOCSTRING)
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/reformer-crime-and-punishment")
def forward(
self,
input_ids=None,
......@@ -1570,19 +1579,6 @@ class ReformerModel(ReformerPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads.
Examples::
from transformers import ReformerModel, ReformerTokenizer
import torch
tokenizer = ReformerTokenizer.from_pretrained('google/reformer-crime-and-punishment')
model = ReformerModel.from_pretrained('google/reformer-crime-and-punishment')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
......@@ -1738,6 +1734,7 @@ class ReformerModelWithLMHead(ReformerPreTrainedModel):
pass
@add_start_docstrings_to_callable(REFORMER_INPUTS_DOCSTRING)
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="google/reformer-crime-and-punishment")
def forward(
self,
input_ids=None,
......@@ -1774,19 +1771,6 @@ class ReformerModelWithLMHead(ReformerPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads.
Examples::
from transformers import ReformerModelWithLMHead, ReformerTokenizer
import torch
tokenizer = ReformerTokenizer.from_pretrained('google/reformer-crime-and-punishment')
model = ReformerModelWithLMHead.from_pretrained('google/reformer-crime-and-punishment')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=input_ids)
loss, prediction_scores = outputs[:2]
"""
reformer_outputs = self.reformer(
......
......@@ -24,12 +24,14 @@ import torch.nn as nn
from torch.nn import CrossEntropyLoss, MSELoss
from .configuration_roberta import RobertaConfig
from .file_utils import add_start_docstrings, add_start_docstrings_to_callable
from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
from .modeling_bert import BertEmbeddings, BertLayerNorm, BertModel, BertPreTrainedModel, gelu
logger = logging.getLogger(__name__)
_TOKENIZER_FOR_DOC = "RobertaTokenizer"
ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [
"roberta-base",
"roberta-large",
......@@ -177,6 +179,7 @@ class RobertaForMaskedLM(BertPreTrainedModel):
return self.lm_head.decoder
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base")
def forward(
self,
input_ids=None,
......@@ -216,18 +219,6 @@ class RobertaForMaskedLM(BertPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads.
Examples::
from transformers import RobertaTokenizer, RobertaForMaskedLM
import torch
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForMaskedLM.from_pretrained('roberta-base')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=input_ids)
loss, prediction_scores = outputs[:2]
"""
if "masked_lm_labels" in kwargs:
warnings.warn(
......@@ -304,6 +295,7 @@ class RobertaForSequenceClassification(BertPreTrainedModel):
self.init_weights()
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base")
def forward(
self,
input_ids=None,
......@@ -340,19 +332,6 @@ class RobertaForSequenceClassification(BertPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads.
Examples::
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained('roberta-base')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, logits = outputs[:2]
"""
outputs = self.roberta(
input_ids,
......@@ -400,6 +379,7 @@ class RobertaForMultipleChoice(BertPreTrainedModel):
self.init_weights()
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)"))
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base")
def forward(
self,
input_ids=None,
......@@ -437,20 +417,6 @@ class RobertaForMultipleChoice(BertPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads.
Examples::
from transformers import RobertaTokenizer, RobertaForMultipleChoice
import torch
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForMultipleChoice.from_pretrained('roberta-base')
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
input_ids = torch.tensor([tokenizer.encode(s, add_special_tokens=True) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
labels = torch.tensor(1).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, classification_scores = outputs[:2]
"""
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
......@@ -510,6 +476,7 @@ class RobertaForTokenClassification(BertPreTrainedModel):
self.init_weights()
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base")
def forward(
self,
input_ids=None,
......@@ -544,19 +511,6 @@ class RobertaForTokenClassification(BertPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads.
Examples::
from transformers import RobertaTokenizer, RobertaForTokenClassification
import torch
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForTokenClassification.from_pretrained('roberta-base')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, scores = outputs[:2]
"""
outputs = self.roberta(
......@@ -632,6 +586,7 @@ class RobertaForQuestionAnswering(BertPreTrainedModel):
self.init_weights()
@add_start_docstrings_to_callable(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
@add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="roberta-base")
def forward(
self,
input_ids=None,
......@@ -674,25 +629,6 @@ class RobertaForQuestionAnswering(BertPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads.
Examples::
# The checkpoint roberta-large is not fine-tuned for question answering. Please see the
# examples/question-answering/run_squad.py example to see how to fine-tune a model to a question answering task.
from transformers import RobertaTokenizer, RobertaForQuestionAnswering
import torch
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForQuestionAnswering.from_pretrained('roberta-base')
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
input_ids = tokenizer.encode(question, text)
start_scores, end_scores = model(torch.tensor([input_ids]))
all_tokens = tokenizer.convert_ids_to_tokens(input_ids)
answer = ' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1])
"""
outputs = self.roberta(
......
......@@ -33,6 +33,8 @@ from .modeling_utils import PreTrainedModel, find_pruneable_heads_and_indices, p
logger = logging.getLogger(__name__)
_TOKENIZER_FOR_DOC = "T5Tokenizer"
####################################################
# This dict contrains shortcut names and associated url
# for the pretrained weights provided with the models
......@@ -924,16 +926,17 @@ class T5Model(T5PreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
heads.
Examples::
Example::
>>> from transformers import T5Tokenizer, T5Model
from transformers import T5Tokenizer, T5Model
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = T5Model.from_pretrained('t5-small')
tokenizer = T5Tokenizer.from_pretrained('t5-small')
model = T5Model.from_pretrained('t5-small')
input_ids = tokenizer.encode("Hello, my dog is cute", return_tensors="pt") # Batch size 1
outputs = model(input_ids=input_ids, decoder_input_ids=input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
>>> input_ids = tokenizer.encode("Hello, my dog is cute", return_tensors="pt") # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids)
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
"""
use_cache = use_cache if use_cache is not None else self.config.use_cache
......@@ -1068,18 +1071,18 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
Examples::
from transformers import T5Tokenizer, T5ForConditionalGeneration
>>> from transformers import T5Tokenizer, T5ForConditionalGeneration
tokenizer = T5Tokenizer.from_pretrained('t5-small')
model = T5ForConditionalGeneration.from_pretrained('t5-small')
input_ids = tokenizer.encode("Hello, my dog is cute", return_tensors="pt") # Batch size 1
outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids)
loss, prediction_scores = outputs[:2]
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = T5ForConditionalGeneration.from_pretrained('t5-small')
>>> input_ids = tokenizer.encode("Hello, my dog is cute", return_tensors="pt") # Batch size 1
>>> outputs = model(input_ids=input_ids, decoder_input_ids=input_ids, labels=input_ids)
>>> loss, prediction_scores = outputs[:2]
tokenizer = T5Tokenizer.from_pretrained('t5-small')
model = T5ForConditionalGeneration.from_pretrained('t5-small')
input_ids = tokenizer.encode("summarize: Hello, my dog is cute", return_tensors="pt") # Batch size 1
outputs = model.generate(input_ids)
>>> tokenizer = T5Tokenizer.from_pretrained('t5-small')
>>> model = T5ForConditionalGeneration.from_pretrained('t5-small')
>>> input_ids = tokenizer.encode("summarize: Hello, my dog is cute", return_tensors="pt") # Batch size 1
>>> outputs = model.generate(input_ids)
"""
if "lm_labels" in kwargs:
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment