Unverified Commit a80aa03b authored by Thomas Wolf's avatar Thomas Wolf Committed by GitHub
Browse files

Merge pull request #973 from FeiWang96/bert_config

 Fix examples of loading pretrained models in docstring
parents 4fc9f9ef 6ec1ee9e
......@@ -643,9 +643,8 @@ class BertModel(BertPreTrainedModel):
Examples::
config = BertConfig.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel(config)
model = BertModel.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
......@@ -754,10 +753,8 @@ class BertForPreTraining(BertPreTrainedModel):
Examples::
config = BertConfig.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForPreTraining(config)
model = BertForPreTraining.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
prediction_scores, seq_relationship_scores = outputs[:2]
......@@ -824,10 +821,8 @@ class BertForMaskedLM(BertPreTrainedModel):
Examples::
config = BertConfig.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMaskedLM(config)
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids, masked_lm_labels=input_ids)
loss, prediction_scores = outputs[:2]
......@@ -891,10 +886,8 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
Examples::
config = BertConfig.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForNextSentencePrediction(config)
model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
seq_relationship_scores = outputs[0]
......@@ -951,10 +944,8 @@ class BertForSequenceClassification(BertPreTrainedModel):
Examples::
config = BertConfig.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification(config)
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
......@@ -1057,10 +1048,8 @@ class BertForMultipleChoice(BertPreTrainedModel):
Examples::
config = BertConfig.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMultipleChoice(config)
model = BertForMultipleChoice.from_pretrained('bert-base-uncased')
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
labels = torch.tensor(1).unsqueeze(0) # Batch size 1
......@@ -1127,10 +1116,8 @@ class BertForTokenClassification(BertPreTrainedModel):
Examples::
config = BertConfig.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForTokenClassification(config)
model = BertForTokenClassification.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
......@@ -1203,10 +1190,8 @@ class BertForQuestionAnswering(BertPreTrainedModel):
Examples::
config = BertConfig.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForQuestionAnswering(config)
model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
......
......@@ -433,9 +433,8 @@ class GPT2Model(GPT2PreTrainedModel):
Examples::
config = GPT2Config.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2Model(config)
model = GPT2Model.from_pretrained('gpt2')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
......@@ -567,9 +566,8 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
Examples::
config = GPT2Config.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel(config)
model = GPT2LMHeadModel.from_pretrained('gpt2')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=input_ids)
loss, logits = outputs[:2]
......@@ -683,9 +681,8 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
Examples::
config = GPT2Config.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2DoubleHeadsModel(config)
model = GPT2DoubleHeadsModel.from_pretrained('gpt2')
choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1
......
......@@ -439,9 +439,8 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
Examples::
config = OpenAIGPTConfig.from_pretrained('openai-gpt')
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
model = OpenAIGPTModel(config)
model = OpenAIGPTModel.from_pretrained('openai-gpt')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
......@@ -558,9 +557,8 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
Examples::
config = OpenAIGPTConfig.from_pretrained('openai-gpt')
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
model = OpenAIGPTLMHeadModel(config)
model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=input_ids)
loss, logits = outputs[:2]
......@@ -665,9 +663,8 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
Examples::
config = OpenAIGPTConfig.from_pretrained('openai-gpt')
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
model = OpenAIGPTDoubleHeadsModel(config)
model = OpenAIGPTDoubleHeadsModel.from_pretrained('openai-gpt')
choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1
......
......@@ -968,9 +968,8 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
Examples::
config = TransfoXLConfig.from_pretrained('transfo-xl-wt103')
tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
model = TransfoXLModel(config)
model = TransfoXLModel.from_pretrained('transfo-xl-wt103')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states, mems = outputs[:2]
......@@ -1284,9 +1283,8 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
Examples::
config = TransfoXLConfig.from_pretrained('transfo-xl-wt103')
tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
model = TransfoXLLMHeadModel(config)
model = TransfoXLLMHeadModel.from_pretrained('transfo-xl-wt103')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
prediction_scores, mems = outputs[:2]
......
......@@ -472,9 +472,8 @@ class XLMModel(XLMPreTrainedModel):
Examples::
config = XLMConfig.from_pretrained('xlm-mlm-en-2048')
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMModel(config)
model = XLMModel.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
......@@ -745,9 +744,8 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
Examples::
config = XLMConfig.from_pretrained('xlm-mlm-en-2048')
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMWithLMHeadModel(config)
model = XLMWithLMHeadModel.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
......@@ -805,10 +803,8 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
Examples::
config = XLMConfig.from_pretrained('xlm-mlm-en-2048')
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForSequenceClassification(config)
model = XLMForSequenceClassification.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
......@@ -885,10 +881,8 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
Examples::
config = XLMConfig.from_pretrained('xlm-mlm-en-2048')
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnswering(config)
model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
......
......@@ -712,9 +712,8 @@ class XLNetModel(XLNetPreTrainedModel):
Examples::
config = XLNetConfig.from_pretrained('xlnet-large-cased')
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetModel(config)
model = XLNetModel.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
......@@ -1019,9 +1018,8 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
Examples::
config = XLNetConfig.from_pretrained('xlnet-large-cased')
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetLMHeadModel(config)
model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased')
# We show how to setup inputs to predict a next token using a bi-directional context.
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>")).unsqueeze(0) # We will predict the masked token
perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
......@@ -1100,10 +1098,8 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
Examples::
config = XLNetConfig.from_pretrained('xlnet-large-cased')
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetForSequenceClassification(config)
model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
......@@ -1200,10 +1196,8 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
Examples::
config = XLMConfig.from_pretrained('xlm-mlm-en-2048')
tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
model = XLMForQuestionAnswering(config)
model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment