Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
6ec1ee9e
Commit
6ec1ee9e
authored
Aug 06, 2019
by
wangfei
Browse files
Fix examples in docstring
parent
72622926
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
114 additions
and
114 deletions
+114
-114
pytorch_transformers/modeling_bert.py
pytorch_transformers/modeling_bert.py
+46
-46
pytorch_transformers/modeling_openai.py
pytorch_transformers/modeling_openai.py
+17
-17
pytorch_transformers/modeling_xlm.py
pytorch_transformers/modeling_xlm.py
+23
-23
pytorch_transformers/modeling_xlnet.py
pytorch_transformers/modeling_xlnet.py
+28
-28
No files found.
pytorch_transformers/modeling_bert.py
View file @
6ec1ee9e
...
...
@@ -643,11 +643,11 @@ class BertModel(BertPreTrainedModel):
Examples::
>>>
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
model = BertModel.from_pretrained('bert-base-uncased')
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>>
outputs = model(input_ids)
>>>
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
"""
def
__init__
(
self
,
config
):
...
...
@@ -753,11 +753,11 @@ class BertForPreTraining(BertPreTrainedModel):
Examples::
>>>
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
model = BertForPreTraining.from_pretrained('bert-base-uncased')
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>>
outputs = model(input_ids)
>>>
prediction_scores, seq_relationship_scores = outputs[:2]
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForPreTraining.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
prediction_scores, seq_relationship_scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
@@ -821,11 +821,11 @@ class BertForMaskedLM(BertPreTrainedModel):
Examples::
>>>
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>>
outputs = model(input_ids, masked_lm_labels=input_ids)
>>>
loss, prediction_scores = outputs[:2]
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids, masked_lm_labels=input_ids)
loss, prediction_scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
@@ -886,11 +886,11 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
Examples::
>>>
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>>
outputs = model(input_ids)
>>>
seq_relationship_scores = outputs[0]
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
seq_relationship_scores = outputs[0]
"""
def
__init__
(
self
,
config
):
...
...
@@ -944,12 +944,12 @@ class BertForSequenceClassification(BertPreTrainedModel):
Examples::
>>>
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>>
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
>>>
outputs = model(input_ids, labels=labels)
>>>
loss, logits = outputs[:2]
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, logits = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
@@ -1048,13 +1048,13 @@ class BertForMultipleChoice(BertPreTrainedModel):
Examples::
>>>
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
model = BertForMultipleChoice.from_pretrained('bert-base-uncased')
>>>
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
>>>
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
>>>
labels = torch.tensor(1).unsqueeze(0) # Batch size 1
>>>
outputs = model(input_ids, labels=labels)
>>>
loss, classification_scores = outputs[:2]
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMultipleChoice.from_pretrained('bert-base-uncased')
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
labels = torch.tensor(1).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, classification_scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
@@ -1116,12 +1116,12 @@ class BertForTokenClassification(BertPreTrainedModel):
Examples::
>>>
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
model = BertForTokenClassification.from_pretrained('bert-base-uncased')
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>>
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
>>>
outputs = model(input_ids, labels=labels)
>>>
loss, scores = outputs[:2]
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForTokenClassification.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
@@ -1190,13 +1190,13 @@ class BertForQuestionAnswering(BertPreTrainedModel):
Examples::
>>>
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>>
start_positions = torch.tensor([1])
>>>
end_positions = torch.tensor([3])
>>>
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
>>>
loss, start_scores, end_scores = outputs[:2]
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss, start_scores, end_scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
pytorch_transformers/modeling_openai.py
View file @
6ec1ee9e
...
...
@@ -439,11 +439,11 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
Examples::
>>>
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
>>>
model = OpenAIGPTModel.from_pretrained('openai-gpt')
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>>
outputs = model(input_ids)
>>>
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
model = OpenAIGPTModel.from_pretrained('openai-gpt')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
"""
def
__init__
(
self
,
config
):
...
...
@@ -557,11 +557,11 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
Examples::
>>>
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
>>>
model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt')
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>>
outputs = model(input_ids, labels=input_ids)
>>>
loss, logits = outputs[:2]
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=input_ids)
loss, logits = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
@@ -663,13 +663,13 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
Examples::
>>>
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
>>>
model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt')
>>>
choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary
>>>
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
>>>
mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1
>>>
outputs = model(input_ids, mc_token_ids)
>>>
lm_prediction_scores, mc_prediction_scores = outputs[:2]
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt')
choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, mc_token_ids)
lm_prediction_scores, mc_prediction_scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
pytorch_transformers/modeling_xlm.py
View file @
6ec1ee9e
...
...
@@ -472,11 +472,11 @@ class XLMModel(XLMPreTrainedModel):
Examples::
>>>
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>>
model = XLMModel.from_pretrained('xlm-mlm-en-2048')
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>>
outputs = model(input_ids)
>>>
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMModel.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
"""
ATTRIBUTES
=
[
'encoder'
,
'eos_index'
,
'pad_index'
,
# 'with_output',
...
...
@@ -744,11 +744,11 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
Examples::
>>>
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>>
model = XLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048')
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>>
outputs = model(input_ids)
>>>
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
"""
def
__init__
(
self
,
config
):
...
...
@@ -803,12 +803,12 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
Examples::
>>>
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>>
model = XLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048')
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>>
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
>>>
outputs = model(input_ids, labels=labels)
>>>
loss, logits = outputs[:2]
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, logits = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
@@ -881,13 +881,13 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
Examples::
>>>
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>>
model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048')
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>>
start_positions = torch.tensor([1])
>>>
end_positions = torch.tensor([3])
>>>
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
>>>
loss, start_scores, end_scores = outputs[:2]
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss, start_scores, end_scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
pytorch_transformers/modeling_xlnet.py
View file @
6ec1ee9e
...
...
@@ -712,11 +712,11 @@ class XLNetModel(XLNetPreTrainedModel):
Examples::
>>>
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
>>>
model = XLNetModel.from_pretrained('xlnet-large-cased')
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>>
outputs = model(input_ids)
>>>
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetModel.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
"""
def
__init__
(
self
,
config
):
...
...
@@ -1018,16 +1018,16 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
Examples::
>>>
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
>>>
model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased')
>>>
# We show how to setup inputs to predict a next token using a bi-directional context.
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>")).unsqueeze(0) # We will predict the masked token
>>>
perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
>>>
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
>>>
target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token
>>>
target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
>>>
outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping)
>>>
next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased')
# We show how to setup inputs to predict a next token using a bi-directional context.
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>")).unsqueeze(0) # We will predict the masked token
perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token
target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping)
next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
"""
def
__init__
(
self
,
config
):
...
...
@@ -1098,12 +1098,12 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
Examples::
>>>
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
>>>
model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased')
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>>
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
>>>
outputs = model(input_ids, labels=labels)
>>>
loss, logits = outputs[:2]
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, logits = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
@@ -1196,13 +1196,13 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
Examples::
>>>
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>>
model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased')
>>>
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>>
start_positions = torch.tensor([1])
>>>
end_positions = torch.tensor([3])
>>>
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
>>>
loss, start_scores, end_scores = outputs[:2]
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss, start_scores, end_scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment