Unverified Commit ee6674d4 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Fix doc examples: name '...' is not defined (#14687)



* Fix doc examples: name '...' is not defined

* remove >>> and ... in some docstrings in visual_bert
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent e6219320
......@@ -1764,7 +1764,7 @@ class LongformerForMaskedLM(LongformerPreTrainedModel):
... # check ``LongformerModel.forward`` for more details how to set `attention_mask`
>>> outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
>>> loss = outputs.loss
>>> prediction_logits = output.logits
>>> prediction_logits = outputs.logits
"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
......
......@@ -1189,7 +1189,7 @@ class MegatronBertForCausalLM(MegatronBertPreTrainedModel):
>>> import torch
>>> tokenizer = BertTokenizer.from_pretrained('nvidia/megatron-bert-cased-345m')
>>> model = MegatronBertLMHeadModel.from_pretrained('nvidia/megatron-bert-cased-345m', is_decoder=True)
>>> model = MegatronBertForCausalLM.from_pretrained('nvidia/megatron-bert-cased-345m', is_decoder=True)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......
......@@ -741,27 +741,27 @@ class VisualBertModel(VisualBertPreTrainedModel):
Example::
>>> # Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image.
>>> from transformers import BertTokenizer, VisualBertModel
>>> import torch
# Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image.
from transformers import BertTokenizer, VisualBertModel
import torch
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = VisualBertModel.from_pretrained('uclanlp/visualbert-vqa-coco-pre')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = VisualBertModel.from_pretrained('uclanlp/visualbert-vqa-coco-pre')
>>> inputs = tokenizer("The capital of France is Paris.", return_tensors="pt")
>>> visual_embeds = get_visual_embeddings(image).unsqueeze(0)
>>> visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
>>> visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
inputs = tokenizer("The capital of France is Paris.", return_tensors="pt")
visual_embeds = get_visual_embeddings(image).unsqueeze(0)
visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
>>> inputs.update({
... "visual_embeds": visual_embeds,
... "visual_token_type_ids": visual_token_type_ids,
... "visual_attention_mask": visual_attention_mask
... })
inputs.update({
"visual_embeds": visual_embeds,
"visual_token_type_ids": visual_token_type_ids,
"visual_attention_mask": visual_attention_mask
})
>>> outputs = model(**inputs)
outputs = model(**inputs)
>>> last_hidden_states = outputs.last_hidden_state
last_hidden_states = outputs.last_hidden_state
"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
......@@ -923,31 +923,31 @@ class VisualBertForPreTraining(VisualBertPreTrainedModel):
Example::
>>> # Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
>>> from transformers import BertTokenizer, VisualBertForPreTraining
# Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
from transformers import BertTokenizer, VisualBertForPreTraining
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = VisualBertForPreTraining.from_pretrained('uclanlp/visualbert-vqa-coco-pre')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = VisualBertForPreTraining.from_pretrained('uclanlp/visualbert-vqa-coco-pre')
>>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="pt")
>>> visual_embeds = get_visual_embeddings(image).unsqueeze(0)
>>> visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
>>> visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
inputs = tokenizer("The capital of France is {mask}.", return_tensors="pt")
visual_embeds = get_visual_embeddings(image).unsqueeze(0)
visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
>>> inputs.update({
... "visual_embeds": visual_embeds,
... "visual_token_type_ids": visual_token_type_ids,
... "visual_attention_mask": visual_attention_mask
... })
>>> max_length = inputs["input_ids"].shape[-1]+visual_embeds.shape[-2]
>>> labels = tokenizer("The capital of France is Paris.", return_tensors="pt", padding="max_length", max_length=max_length)["input_ids"]
>>> sentence_image_labels = torch.tensor(1).unsqueeze(0) # Batch_size
inputs.update({
"visual_embeds": visual_embeds,
"visual_token_type_ids": visual_token_type_ids,
"visual_attention_mask": visual_attention_mask
})
max_length = inputs["input_ids"].shape[-1]+visual_embeds.shape[-2]
labels = tokenizer("The capital of France is Paris.", return_tensors="pt", padding="max_length", max_length=max_length)["input_ids"]
sentence_image_labels = torch.tensor(1).unsqueeze(0) # Batch_size
>>> outputs = model(**inputs, labels=labels, sentence_image_labels=sentence_image_labels)
>>> loss = outputs.loss
>>> prediction_logits = outputs.prediction_logits
>>> seq_relationship_logits = outputs.seq_relationship_logits
outputs = model(**inputs, labels=labels, sentence_image_labels=sentence_image_labels)
loss = outputs.loss
prediction_logits = outputs.prediction_logits
seq_relationship_logits = outputs.seq_relationship_logits
"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
......@@ -1057,37 +1057,38 @@ class VisualBertForMultipleChoice(VisualBertPreTrainedModel):
Example::
>>> from transformers import BertTokenizer, VisualBertForMultipleChoice
>>> import torch
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = VisualBertForMultipleChoice.from_pretrained('uclanlp/visualbert-vcr')
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> choice0 = "It is eaten with a fork and a knife."
>>> choice1 = "It is eaten while held in the hand."
>>> visual_embeds = get_visual_embeddings(image)
>>> # (batch_size, num_choices, visual_seq_length, visual_embedding_dim)
>>> visual_embeds = visual_embeds.expand(1, 2, *visual_embeds.shape)
>>> visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
>>> visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
>>> labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1
>>> encoding = tokenizer([[prompt, prompt], [choice0, choice1]], return_tensors='pt', padding=True)
>>> # batch size is 1
>>> inputs_dict = {k: v.unsqueeze(0) for k,v in encoding.items()}
>>> inputs_dict.update({
... "visual_embeds": visual_embeds,
... "visual_attention_mask": visual_attention_mask,
... "visual_token_type_ids": visual_token_type_ids,
... "labels": labels
... })
>>> outputs = model(**inputs_dict)
>>> loss = outputs.loss
>>> logits = outputs.logits
# Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
from transformers import BertTokenizer, VisualBertForMultipleChoice
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = VisualBertForMultipleChoice.from_pretrained('uclanlp/visualbert-vcr')
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
choice0 = "It is eaten with a fork and a knife."
choice1 = "It is eaten while held in the hand."
visual_embeds = get_visual_embeddings(image)
# (batch_size, num_choices, visual_seq_length, visual_embedding_dim)
visual_embeds = visual_embeds.expand(1, 2, *visual_embeds.shape)
visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1
encoding = tokenizer([[prompt, prompt], [choice0, choice1]], return_tensors='pt', padding=True)
# batch size is 1
inputs_dict = {k: v.unsqueeze(0) for k,v in encoding.items()}
inputs_dict.update({
"visual_embeds": visual_embeds,
"visual_attention_mask": visual_attention_mask,
"visual_token_type_ids": visual_token_type_ids,
"labels": labels
})
outputs = model(**inputs_dict)
loss = outputs.loss
logits = outputs.logits
"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
......@@ -1204,30 +1205,30 @@ class VisualBertForQuestionAnswering(VisualBertPreTrainedModel):
Example::
>>> # Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
>>> from transformers import BertTokenizer, VisualBertForQuestionAnswering
>>> import torch
# Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
from transformers import BertTokenizer, VisualBertForQuestionAnswering
import torch
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = VisualBertForQuestionAnswering.from_pretrained('uclanlp/visualbert-vqa')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = VisualBertForQuestionAnswering.from_pretrained('uclanlp/visualbert-vqa')
>>> text = "Who is eating the apple?"
>>> inputs = tokenizer(text, return_tensors='pt')
>>> visual_embeds = get_visual_embeddings(image).unsqueeze(0)
>>> visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
>>> visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
text = "Who is eating the apple?"
inputs = tokenizer(text, return_tensors='pt')
visual_embeds = get_visual_embeddings(image).unsqueeze(0)
visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
>>> inputs.update({
... "visual_embeds": visual_embeds,
... "visual_token_type_ids": visual_token_type_ids,
... "visual_attention_mask": visual_attention_mask
... })
inputs.update({
"visual_embeds": visual_embeds,
"visual_token_type_ids": visual_token_type_ids,
"visual_attention_mask": visual_attention_mask
})
>>> labels = torch.tensor([[0.0,1.0]]).unsqueeze(0) # Batch size 1, Num labels 2
labels = torch.tensor([[0.0,1.0]]).unsqueeze(0) # Batch size 1, Num labels 2
>>> outputs = model(**inputs, labels=labels)
>>> loss = outputs.loss
>>> scores = outputs.logits
outputs = model(**inputs, labels=labels)
loss = outputs.loss
scores = outputs.logits
"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
......@@ -1327,30 +1328,30 @@ class VisualBertForVisualReasoning(VisualBertPreTrainedModel):
Example::
>>> # Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
>>> from transformers import BertTokenizer, VisualBertForVisualReasoning
>>> import torch
# Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
from transformers import BertTokenizer, VisualBertForVisualReasoning
import torch
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = VisualBertForVisualReasoning.from_pretrained('uclanlp/visualbert-nlvr2')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = VisualBertForVisualReasoning.from_pretrained('uclanlp/visualbert-nlvr2')
>>> text = "Who is eating the apple?"
>>> inputs = tokenizer(text, return_tensors='pt')
>>> visual_embeds = get_visual_embeddings(image).unsqueeze(0)
>>> visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
>>> visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
text = "Who is eating the apple?"
inputs = tokenizer(text, return_tensors='pt')
visual_embeds = get_visual_embeddings(image).unsqueeze(0)
visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
>>> inputs.update({
... "visual_embeds": visual_embeds,
... "visual_token_type_ids": visual_token_type_ids,
... "visual_attention_mask": visual_attention_mask
... })
inputs.update({
"visual_embeds": visual_embeds,
"visual_token_type_ids": visual_token_type_ids,
"visual_attention_mask": visual_attention_mask
})
>>> labels = torch.tensor(1).unsqueeze(0) # Batch size 1, Num choices 2
labels = torch.tensor(1).unsqueeze(0) # Batch size 1, Num choices 2
>>> outputs = model(**inputs, labels=labels)
>>> loss = outputs.loss
>>> scores = outputs.logits
outputs = model(**inputs, labels=labels)
loss = outputs.loss
scores = outputs.logits
"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
......@@ -1488,32 +1489,32 @@ class VisualBertForRegionToPhraseAlignment(VisualBertPreTrainedModel):
Example::
>>> # Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
>>> from transformers import BertTokenizer, VisualBertForRegionToPhraseAlignment
>>> import torch
# Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
from transformers import BertTokenizer, VisualBertForRegionToPhraseAlignment
import torch
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = VisualBertForRegionToPhraseAlignment.from_pretrained('uclanlp/visualbert-vqa-coco-pre')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = VisualBertForRegionToPhraseAlignment.from_pretrained('uclanlp/visualbert-vqa-coco-pre')
>>> text = "Who is eating the apple?"
>>> inputs = tokenizer(text, return_tensors='pt')
>>> visual_embeds = get_visual_embeddings(image).unsqueeze(0)
>>> visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
>>> visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
>>> region_to_phrase_position = torch.ones((1, inputs["input_ids"].shape[-1]+visual_embeds.shape[-2]))
text = "Who is eating the apple?"
inputs = tokenizer(text, return_tensors='pt')
visual_embeds = get_visual_embeddings(image).unsqueeze(0)
visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
region_to_phrase_position = torch.ones((1, inputs["input_ids"].shape[-1]+visual_embeds.shape[-2]))
>>> inputs.update({
... "region_to_phrase_position": region_to_phrase_position,
... "visual_embeds": visual_embeds,
... "visual_token_type_ids": visual_token_type_ids,
... "visual_attention_mask": visual_attention_mask
... })
inputs.update({
"region_to_phrase_position": region_to_phrase_position,
"visual_embeds": visual_embeds,
"visual_token_type_ids": visual_token_type_ids,
"visual_attention_mask": visual_attention_mask
})
>>> labels = torch.ones((1, inputs["input_ids"].shape[-1]+visual_embeds.shape[-2], visual_embeds.shape[-2])) # Batch size 1
labels = torch.ones((1, inputs["input_ids"].shape[-1]+visual_embeds.shape[-2], visual_embeds.shape[-2])) # Batch size 1
>>> outputs = model(**inputs, labels=labels)
>>> loss = outputs.loss
>>> scores = outputs.logits
outputs = model(**inputs, labels=labels)
loss = outputs.loss
scores = outputs.logits
"""
if region_to_phrase_position is None:
raise ValueError("`region_to_phrase_position` should not be None when using Flickr Model.")
......
......@@ -1517,7 +1517,7 @@ class Wav2Vec2ForMaskedLM(Wav2Vec2PreTrainedModel):
Example::
>>> from transformers import Wav2Vec2Processor, Wav2Vec2Model
>>> from transformers import Wav2Vec2Processor, Wav2Vec2ForMaskedLM
>>> from datasets import load_dataset
>>> import soundfile as sf
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment