Commit 6ec1ee9e authored by wangfei's avatar wangfei
Browse files

Fix examples in docstring

parent 72622926
...@@ -643,11 +643,11 @@ class BertModel(BertPreTrainedModel): ...@@ -643,11 +643,11 @@ class BertModel(BertPreTrainedModel):
Examples:: Examples::
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = BertModel.from_pretrained('bert-base-uncased') model = BertModel.from_pretrained('bert-base-uncased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
def __init__(self, config): def __init__(self, config):
...@@ -753,11 +753,11 @@ class BertForPreTraining(BertPreTrainedModel): ...@@ -753,11 +753,11 @@ class BertForPreTraining(BertPreTrainedModel):
Examples:: Examples::
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = BertForPreTraining.from_pretrained('bert-base-uncased') model = BertForPreTraining.from_pretrained('bert-base-uncased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> prediction_scores, seq_relationship_scores = outputs[:2] prediction_scores, seq_relationship_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -821,11 +821,11 @@ class BertForMaskedLM(BertPreTrainedModel): ...@@ -821,11 +821,11 @@ class BertForMaskedLM(BertPreTrainedModel):
Examples:: Examples::
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = BertForMaskedLM.from_pretrained('bert-base-uncased') model = BertForMaskedLM.from_pretrained('bert-base-uncased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, masked_lm_labels=input_ids) outputs = model(input_ids, masked_lm_labels=input_ids)
>>> loss, prediction_scores = outputs[:2] loss, prediction_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -886,11 +886,11 @@ class BertForNextSentencePrediction(BertPreTrainedModel): ...@@ -886,11 +886,11 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
Examples:: Examples::
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased') model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> seq_relationship_scores = outputs[0] seq_relationship_scores = outputs[0]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -944,12 +944,12 @@ class BertForSequenceClassification(BertPreTrainedModel): ...@@ -944,12 +944,12 @@ class BertForSequenceClassification(BertPreTrainedModel):
Examples:: Examples::
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = BertForSequenceClassification.from_pretrained('bert-base-uncased') model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
>>> loss, logits = outputs[:2] loss, logits = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -1048,13 +1048,13 @@ class BertForMultipleChoice(BertPreTrainedModel): ...@@ -1048,13 +1048,13 @@ class BertForMultipleChoice(BertPreTrainedModel):
Examples:: Examples::
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = BertForMultipleChoice.from_pretrained('bert-base-uncased') model = BertForMultipleChoice.from_pretrained('bert-base-uncased')
>>> choices = ["Hello, my dog is cute", "Hello, my cat is amazing"] choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
>>> input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
>>> labels = torch.tensor(1).unsqueeze(0) # Batch size 1 labels = torch.tensor(1).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
>>> loss, classification_scores = outputs[:2] loss, classification_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -1116,12 +1116,12 @@ class BertForTokenClassification(BertPreTrainedModel): ...@@ -1116,12 +1116,12 @@ class BertForTokenClassification(BertPreTrainedModel):
Examples:: Examples::
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = BertForTokenClassification.from_pretrained('bert-base-uncased') model = BertForTokenClassification.from_pretrained('bert-base-uncased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1 labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
>>> loss, scores = outputs[:2] loss, scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -1190,13 +1190,13 @@ class BertForQuestionAnswering(BertPreTrainedModel): ...@@ -1190,13 +1190,13 @@ class BertForQuestionAnswering(BertPreTrainedModel):
Examples:: Examples::
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = BertForQuestionAnswering.from_pretrained('bert-base-uncased') model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> start_positions = torch.tensor([1]) start_positions = torch.tensor([1])
>>> end_positions = torch.tensor([3]) end_positions = torch.tensor([3])
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
>>> loss, start_scores, end_scores = outputs[:2] loss, start_scores, end_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
......
...@@ -439,11 +439,11 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel): ...@@ -439,11 +439,11 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
Examples:: Examples::
>>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
>>> model = OpenAIGPTModel.from_pretrained('openai-gpt') model = OpenAIGPTModel.from_pretrained('openai-gpt')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
def __init__(self, config): def __init__(self, config):
...@@ -557,11 +557,11 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel): ...@@ -557,11 +557,11 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
Examples:: Examples::
>>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
>>> model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt') model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=input_ids) outputs = model(input_ids, labels=input_ids)
>>> loss, logits = outputs[:2] loss, logits = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -663,13 +663,13 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): ...@@ -663,13 +663,13 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
Examples:: Examples::
>>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
>>> model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt') model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt')
>>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary
>>> input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
>>> mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1 mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, mc_token_ids) outputs = model(input_ids, mc_token_ids)
>>> lm_prediction_scores, mc_prediction_scores = outputs[:2] lm_prediction_scores, mc_prediction_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
......
...@@ -472,11 +472,11 @@ class XLMModel(XLMPreTrainedModel): ...@@ -472,11 +472,11 @@ class XLMModel(XLMPreTrainedModel):
Examples:: Examples::
>>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>> model = XLMModel.from_pretrained('xlm-mlm-en-2048') model = XLMModel.from_pretrained('xlm-mlm-en-2048')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
ATTRIBUTES = ['encoder', 'eos_index', 'pad_index', # 'with_output', ATTRIBUTES = ['encoder', 'eos_index', 'pad_index', # 'with_output',
...@@ -744,11 +744,11 @@ class XLMWithLMHeadModel(XLMPreTrainedModel): ...@@ -744,11 +744,11 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
Examples:: Examples::
>>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>> model = XLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048') model = XLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
def __init__(self, config): def __init__(self, config):
...@@ -803,12 +803,12 @@ class XLMForSequenceClassification(XLMPreTrainedModel): ...@@ -803,12 +803,12 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
Examples:: Examples::
>>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>> model = XLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048') model = XLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
>>> loss, logits = outputs[:2] loss, logits = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -881,13 +881,13 @@ class XLMForQuestionAnswering(XLMPreTrainedModel): ...@@ -881,13 +881,13 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
Examples:: Examples::
>>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>> model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048') model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> start_positions = torch.tensor([1]) start_positions = torch.tensor([1])
>>> end_positions = torch.tensor([3]) end_positions = torch.tensor([3])
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
>>> loss, start_scores, end_scores = outputs[:2] loss, start_scores, end_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
......
...@@ -712,11 +712,11 @@ class XLNetModel(XLNetPreTrainedModel): ...@@ -712,11 +712,11 @@ class XLNetModel(XLNetPreTrainedModel):
Examples:: Examples::
>>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
>>> model = XLNetModel.from_pretrained('xlnet-large-cased') model = XLNetModel.from_pretrained('xlnet-large-cased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
def __init__(self, config): def __init__(self, config):
...@@ -1018,16 +1018,16 @@ class XLNetLMHeadModel(XLNetPreTrainedModel): ...@@ -1018,16 +1018,16 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
Examples:: Examples::
>>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
>>> model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased') model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased')
>>> # We show how to setup inputs to predict a next token using a bi-directional context. # We show how to setup inputs to predict a next token using a bi-directional context.
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>")).unsqueeze(0) # We will predict the masked token input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>")).unsqueeze(0) # We will predict the masked token
>>> perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float) perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
>>> perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
>>> target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token
>>> target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
>>> outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping) outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping)
>>> next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -1098,12 +1098,12 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel): ...@@ -1098,12 +1098,12 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
Examples:: Examples::
>>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
>>> model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased') model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
>>> loss, logits = outputs[:2] loss, logits = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -1196,13 +1196,13 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel): ...@@ -1196,13 +1196,13 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
Examples:: Examples::
>>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>> model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased') model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> start_positions = torch.tensor([1]) start_positions = torch.tensor([1])
>>> end_positions = torch.tensor([3]) end_positions = torch.tensor([3])
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
>>> loss, start_scores, end_scores = outputs[:2] loss, start_scores, end_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment