Commit bfbe52ec authored by thomwolf's avatar thomwolf
Browse files

cleaning up example docstrings

parent 4cc1bf81
...@@ -84,12 +84,12 @@ def bertTokenizer(*args, **kwargs): ...@@ -84,12 +84,12 @@ def bertTokenizer(*args, **kwargs):
Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"] Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"]
Example: Example:
>>> import torch import torch
>>> sentence = 'Hello, World!' sentence = 'Hello, World!'
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
>>> toks = tokenizer.tokenize(sentence) toks = tokenizer.tokenize(sentence)
['Hello', '##,', 'World', '##!'] ['Hello', '##,', 'World', '##!']
>>> ids = tokenizer.convert_tokens_to_ids(toks) ids = tokenizer.convert_tokens_to_ids(toks)
[8667, 28136, 1291, 28125] [8667, 28136, 1291, 28125]
""" """
tokenizer = BertTokenizer.from_pretrained(*args, **kwargs) tokenizer = BertTokenizer.from_pretrained(*args, **kwargs)
...@@ -105,20 +105,20 @@ def bertModel(*args, **kwargs): ...@@ -105,20 +105,20 @@ def bertModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertModel # Load bertModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
encoded_layers, _ = model(tokens_tensor, segments_tensors) encoded_layers, _ = model(tokens_tensor, segments_tensors)
""" """
model = BertModel.from_pretrained(*args, **kwargs) model = BertModel.from_pretrained(*args, **kwargs)
...@@ -134,20 +134,20 @@ def bertForNextSentencePrediction(*args, **kwargs): ...@@ -134,20 +134,20 @@ def bertForNextSentencePrediction(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForNextSentencePrediction # Load bertForNextSentencePrediction
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased')
>>> model.eval() model.eval()
# Predict the next sentence classification logits # Predict the next sentence classification logits
>>> with torch.no_grad(): with torch.no_grad():
next_sent_classif_logits = model(tokens_tensor, segments_tensors) next_sent_classif_logits = model(tokens_tensor, segments_tensors)
""" """
model = BertForNextSentencePrediction.from_pretrained(*args, **kwargs) model = BertForNextSentencePrediction.from_pretrained(*args, **kwargs)
...@@ -164,17 +164,17 @@ def bertForPreTraining(*args, **kwargs): ...@@ -164,17 +164,17 @@ def bertForPreTraining(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForPreTraining # Load bertForPreTraining
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased')
>>> masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors) masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors)
""" """
model = BertForPreTraining.from_pretrained(*args, **kwargs) model = BertForPreTraining.from_pretrained(*args, **kwargs)
return model return model
...@@ -188,25 +188,25 @@ def bertForMaskedLM(*args, **kwargs): ...@@ -188,25 +188,25 @@ def bertForMaskedLM(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> masked_index = 8 masked_index = 8
>>> tokenized_text[masked_index] = '[MASK]' tokenized_text[masked_index] = '[MASK]'
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForMaskedLM # Load bertForMaskedLM
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased')
>>> model.eval() model.eval()
# Predict all tokens # Predict all tokens
>>> with torch.no_grad(): with torch.no_grad():
predictions = model(tokens_tensor, segments_tensors) predictions = model(tokens_tensor, segments_tensors)
>>> predicted_index = torch.argmax(predictions[0, masked_index]).item() predicted_index = torch.argmax(predictions[0, masked_index]).item()
>>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'henson' 'henson'
""" """
model = BertForMaskedLM.from_pretrained(*args, **kwargs) model = BertForMaskedLM.from_pretrained(*args, **kwargs)
...@@ -230,24 +230,24 @@ def bertForSequenceClassification(*args, **kwargs): ...@@ -230,24 +230,24 @@ def bertForSequenceClassification(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForSequenceClassification # Load bertForSequenceClassification
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2) model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2)
>>> model.eval() model.eval()
# Predict the sequence classification logits # Predict the sequence classification logits
>>> with torch.no_grad(): with torch.no_grad():
seq_classif_logits = model(tokens_tensor, segments_tensors) seq_classif_logits = model(tokens_tensor, segments_tensors)
# Or get the sequence classification loss # Or get the sequence classification loss
>>> labels = torch.tensor([1]) labels = torch.tensor([1])
>>> seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
""" """
model = BertForSequenceClassification.from_pretrained(*args, **kwargs) model = BertForSequenceClassification.from_pretrained(*args, **kwargs)
return model return model
...@@ -265,24 +265,24 @@ def bertForMultipleChoice(*args, **kwargs): ...@@ -265,24 +265,24 @@ def bertForMultipleChoice(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0) tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0)
>>> segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0) segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0)
# Load bertForMultipleChoice # Load bertForMultipleChoice
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2) model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2)
>>> model.eval() model.eval()
# Predict the multiple choice logits # Predict the multiple choice logits
>>> with torch.no_grad(): with torch.no_grad():
multiple_choice_logits = model(tokens_tensor, segments_tensors) multiple_choice_logits = model(tokens_tensor, segments_tensors)
# Or get the multiple choice loss # Or get the multiple choice loss
>>> labels = torch.tensor([1]) labels = torch.tensor([1])
>>> multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
""" """
model = BertForMultipleChoice.from_pretrained(*args, **kwargs) model = BertForMultipleChoice.from_pretrained(*args, **kwargs)
return model return model
...@@ -298,25 +298,25 @@ def bertForQuestionAnswering(*args, **kwargs): ...@@ -298,25 +298,25 @@ def bertForQuestionAnswering(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForQuestionAnswering # Load bertForQuestionAnswering
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased')
>>> model.eval() model.eval()
# Predict the start and end positions logits # Predict the start and end positions logits
>>> with torch.no_grad(): with torch.no_grad():
start_logits, end_logits = model(tokens_tensor, segments_tensors) start_logits, end_logits = model(tokens_tensor, segments_tensors)
# Or get the total loss which is the sum of the CrossEntropy loss for the start and end token positions # Or get the total loss which is the sum of the CrossEntropy loss for the start and end token positions
>>> start_positions, end_positions = torch.tensor([12]), torch.tensor([14]) start_positions, end_positions = torch.tensor([12]), torch.tensor([14])
# set model.train() before if training this loss # set model.train() before if training this loss
>>> multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions) multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions)
""" """
model = BertForQuestionAnswering.from_pretrained(*args, **kwargs) model = BertForQuestionAnswering.from_pretrained(*args, **kwargs)
return model return model
...@@ -337,24 +337,24 @@ def bertForTokenClassification(*args, **kwargs): ...@@ -337,24 +337,24 @@ def bertForTokenClassification(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForTokenClassification # Load bertForTokenClassification
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2) model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2)
>>> model.eval() model.eval()
# Predict the token classification logits # Predict the token classification logits
>>> with torch.no_grad(): with torch.no_grad():
classif_logits = model(tokens_tensor, segments_tensors) classif_logits = model(tokens_tensor, segments_tensors)
# Or get the token classification loss # Or get the token classification loss
>>> labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]]) labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]])
>>> classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
""" """
model = BertForTokenClassification.from_pretrained(*args, **kwargs) model = BertForTokenClassification.from_pretrained(*args, **kwargs)
return model return model
...@@ -52,11 +52,11 @@ def gpt2Tokenizer(*args, **kwargs): ...@@ -52,11 +52,11 @@ def gpt2Tokenizer(*args, **kwargs):
Default: None Default: None
Example: Example:
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
>>> text = "Who was Jim Henson ?" text = "Who was Jim Henson ?"
>>> indexed_tokens = tokenizer.encode(tokenized_text) indexed_tokens = tokenizer.encode(tokenized_text)
""" """
tokenizer = GPT2Tokenizer.from_pretrained(*args, **kwargs) tokenizer = GPT2Tokenizer.from_pretrained(*args, **kwargs)
return tokenizer return tokenizer
...@@ -71,24 +71,24 @@ def gpt2Model(*args, **kwargs): ...@@ -71,24 +71,24 @@ def gpt2Model(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2Model # Load gpt2Model
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2') model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions # past can be used to reuse precomputed hidden state in a subsequent predictions
>>> with torch.no_grad(): with torch.no_grad():
hidden_states_1, past = model(tokens_tensor_1) hidden_states_1, past = model(tokens_tensor_1)
hidden_states_2, past = model(tokens_tensor_2, past=past) hidden_states_2, past = model(tokens_tensor_2, past=past)
""" """
...@@ -104,31 +104,31 @@ def gpt2LMHeadModel(*args, **kwargs): ...@@ -104,31 +104,31 @@ def gpt2LMHeadModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2LMHeadModel # Load gpt2LMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2') model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions # past can be used to reuse precomputed hidden state in a subsequent predictions
>>> with torch.no_grad(): with torch.no_grad():
predictions_1, past = model(tokens_tensor_1) predictions_1, past = model(tokens_tensor_1)
predictions_2, past = model(tokens_tensor_2, past=past) predictions_2, past = model(tokens_tensor_2, past=past)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.decode([predicted_index]) predicted_token = tokenizer.decode([predicted_index])
>>> assert predicted_token == ' who' assert predicted_token == ' who'
""" """
model = GPT2LMHeadModel.from_pretrained(*args, **kwargs) model = GPT2LMHeadModel.from_pretrained(*args, **kwargs)
return model return model
...@@ -143,25 +143,25 @@ def gpt2DoubleHeadsModel(*args, **kwargs): ...@@ -143,25 +143,25 @@ def gpt2DoubleHeadsModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input # Prepare tokenized input
>>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>> tokenized_text1 = tokenizer.tokenize(text1) tokenized_text1 = tokenizer.tokenize(text1)
>>> tokenized_text2 = tokenizer.tokenize(text2) tokenized_text2 = tokenizer.tokenize(text2)
>>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load gpt2DoubleHeadsModel # Load gpt2DoubleHeadsModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2') model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
lm_logits, multiple_choice_logits, presents = model(tokens_tensor, mc_token_ids) lm_logits, multiple_choice_logits, presents = model(tokens_tensor, mc_token_ids)
""" """
model = GPT2DoubleHeadsModel.from_pretrained(*args, **kwargs) model = GPT2DoubleHeadsModel.from_pretrained(*args, **kwargs)
......
...@@ -76,12 +76,12 @@ def openAIGPTTokenizer(*args, **kwargs): ...@@ -76,12 +76,12 @@ def openAIGPTTokenizer(*args, **kwargs):
Default: None Default: None
Example: Example:
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
>>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
[763, 509, 4265, 2298, 945, 257, 4265, 2298, 945, 509, 246, 10148, 39041, 483] [763, 509, 4265, 2298, 945, 257, 4265, 2298, 945, 509, 246, 10148, 39041, 483]
""" """
tokenizer = OpenAIGPTTokenizer.from_pretrained(*args, **kwargs) tokenizer = OpenAIGPTTokenizer.from_pretrained(*args, **kwargs)
...@@ -97,21 +97,21 @@ def openAIGPTModel(*args, **kwargs): ...@@ -97,21 +97,21 @@ def openAIGPTModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input # Prepare tokenized input
>>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
# Load openAIGPTModel # Load openAIGPTModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt') model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
hidden_states = model(tokens_tensor) hidden_states = model(tokens_tensor)
""" """
model = OpenAIGPTModel.from_pretrained(*args, **kwargs) model = OpenAIGPTModel.from_pretrained(*args, **kwargs)
...@@ -126,26 +126,26 @@ def openAIGPTLMHeadModel(*args, **kwargs): ...@@ -126,26 +126,26 @@ def openAIGPTLMHeadModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input # Prepare tokenized input
>>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
# Load openAIGPTLMHeadModel # Load openAIGPTLMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt') model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
predictions = model(tokens_tensor) predictions = model(tokens_tensor)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions[0, -1, :]).item() predicted_index = torch.argmax(predictions[0, -1, :]).item()
>>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'.</w>' '.</w>'
""" """
model = OpenAIGPTLMHeadModel.from_pretrained(*args, **kwargs) model = OpenAIGPTLMHeadModel.from_pretrained(*args, **kwargs)
...@@ -161,25 +161,25 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs): ...@@ -161,25 +161,25 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input # Prepare tokenized input
>>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>> tokenized_text1 = tokenizer.tokenize(text1) tokenized_text1 = tokenizer.tokenize(text1)
>>> tokenized_text2 = tokenizer.tokenize(text2) tokenized_text2 = tokenizer.tokenize(text2)
>>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load openAIGPTDoubleHeadsModel # Load openAIGPTDoubleHeadsModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt') model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
lm_logits, multiple_choice_logits = model(tokens_tensor, mc_token_ids) lm_logits, multiple_choice_logits = model(tokens_tensor, mc_token_ids)
""" """
model = OpenAIGPTDoubleHeadsModel.from_pretrained(*args, **kwargs) model = OpenAIGPTDoubleHeadsModel.from_pretrained(*args, **kwargs)
......
...@@ -45,12 +45,12 @@ def transformerXLTokenizer(*args, **kwargs): ...@@ -45,12 +45,12 @@ def transformerXLTokenizer(*args, **kwargs):
* transfo-xl-wt103 * transfo-xl-wt103
Example: Example:
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
>>> text = "Who was Jim Henson ?" text = "Who was Jim Henson ?"
>>> tokenized_text = tokenizer.tokenize(tokenized_text) tokenized_text = tokenizer.tokenize(tokenized_text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
""" """
tokenizer = TransfoXLTokenizer.from_pretrained(*args, **kwargs) tokenizer = TransfoXLTokenizer.from_pretrained(*args, **kwargs)
return tokenizer return tokenizer
...@@ -63,26 +63,26 @@ def transformerXLModel(*args, **kwargs): ...@@ -63,26 +63,26 @@ def transformerXLModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> tokenized_text_1 = tokenizer.tokenize(text_1) tokenized_text_1 = tokenizer.tokenize(text_1)
>>> tokenized_text_2 = tokenizer.tokenize(text_2) tokenized_text_2 = tokenizer.tokenize(text_2)
>>> indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1) indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>> indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2) indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLModel # Load transformerXLModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103') model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context # We can re-use the memory cells in a subsequent call to attend a longer context
>>> with torch.no_grad(): with torch.no_grad():
hidden_states_1, mems_1 = model(tokens_tensor_1) hidden_states_1, mems_1 = model(tokens_tensor_1)
hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1) hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
""" """
...@@ -98,33 +98,33 @@ def transformerXLLMHeadModel(*args, **kwargs): ...@@ -98,33 +98,33 @@ def transformerXLLMHeadModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> tokenized_text_1 = tokenizer.tokenize(text_1) tokenized_text_1 = tokenizer.tokenize(text_1)
>>> tokenized_text_2 = tokenizer.tokenize(text_2) tokenized_text_2 = tokenizer.tokenize(text_2)
>>> indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1) indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>> indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2) indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLLMHeadModel # Load transformerXLLMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103') model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context # We can re-use the memory cells in a subsequent call to attend a longer context
>>> with torch.no_grad(): with torch.no_grad():
predictions_1, mems_1 = model(tokens_tensor_1) predictions_1, mems_1 = model(tokens_tensor_1)
predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1) predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
>>> assert predicted_token == 'who' assert predicted_token == 'who'
""" """
model = TransfoXLLMHeadModel.from_pretrained(*args, **kwargs) model = TransfoXLLMHeadModel.from_pretrained(*args, **kwargs)
return model return model
...@@ -17,16 +17,16 @@ xlm_start_docstring = """ ...@@ -17,16 +17,16 @@ xlm_start_docstring = """
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
""" """
# A lot of models share the same param doc. Use a decorator # A lot of models share the same param doc. Use a decorator
...@@ -76,11 +76,11 @@ def xlmTokenizer(*args, **kwargs): ...@@ -76,11 +76,11 @@ def xlmTokenizer(*args, **kwargs):
Default: None Default: None
Example: Example:
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
>>> text = "Who was Jim Henson ?" text = "Who was Jim Henson ?"
>>> indexed_tokens = tokenizer.encode(tokenized_text) indexed_tokens = tokenizer.encode(tokenized_text)
""" """
tokenizer = XLMTokenizer.from_pretrained(*args, **kwargs) tokenizer = XLMTokenizer.from_pretrained(*args, **kwargs)
return tokenizer return tokenizer
...@@ -91,11 +91,11 @@ def xlmTokenizer(*args, **kwargs): ...@@ -91,11 +91,11 @@ def xlmTokenizer(*args, **kwargs):
def xlmModel(*args, **kwargs): def xlmModel(*args, **kwargs):
""" """
# Load xlmModel # Load xlmModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048') model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
hidden_states_1, mems = model(tokens_tensor_1) hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_2, mems = model(tokens_tensor_2, past=mems) hidden_states_2, mems = model(tokens_tensor_2, past=mems)
""" """
...@@ -108,26 +108,26 @@ def xlmModel(*args, **kwargs): ...@@ -108,26 +108,26 @@ def xlmModel(*args, **kwargs):
def xlmLMHeadModel(*args, **kwargs): def xlmLMHeadModel(*args, **kwargs):
""" """
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetLMHeadModel # Load xlnetLMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048') model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
predictions_1, mems = model(tokens_tensor_1) predictions_1, mems = model(tokens_tensor_1)
predictions_2, mems = model(tokens_tensor_2, mems=mems) predictions_2, mems = model(tokens_tensor_2, mems=mems)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.decode([predicted_index]) predicted_token = tokenizer.decode([predicted_index])
>>> assert predicted_token == ' who' assert predicted_token == ' who'
""" """
model = XLMWithLMHeadModel.from_pretrained(*args, **kwargs) model = XLMWithLMHeadModel.from_pretrained(*args, **kwargs)
return model return model
...@@ -142,25 +142,25 @@ def xlmLMHeadModel(*args, **kwargs): ...@@ -142,25 +142,25 @@ def xlmLMHeadModel(*args, **kwargs):
# Example: # Example:
# # Load the tokenizer # # Load the tokenizer
# >>> import torch # import torch
# >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048') # tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048')
# # Prepare tokenized input # # Prepare tokenized input
# >>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" # text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
# >>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" # text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
# >>> tokenized_text1 = tokenizer.tokenize(text1) # tokenized_text1 = tokenizer.tokenize(text1)
# >>> tokenized_text2 = tokenizer.tokenize(text2) # tokenized_text2 = tokenizer.tokenize(text2)
# >>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) # indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
# >>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) # indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
# >>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) # tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
# >>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) # mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# # Load xlnetForSequenceClassification # # Load xlnetForSequenceClassification
# >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048') # model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048')
# >>> model.eval() # model.eval()
# # Predict sequence classes logits # # Predict sequence classes logits
# >>> with torch.no_grad(): # with torch.no_grad():
# lm_logits, mems = model(tokens_tensor) # lm_logits, mems = model(tokens_tensor)
# """ # """
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs) # model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
......
...@@ -53,11 +53,11 @@ def xlnetTokenizer(*args, **kwargs): ...@@ -53,11 +53,11 @@ def xlnetTokenizer(*args, **kwargs):
Default: None Default: None
Example: Example:
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
>>> text = "Who was Jim Henson ?" text = "Who was Jim Henson ?"
>>> indexed_tokens = tokenizer.encode(tokenized_text) indexed_tokens = tokenizer.encode(tokenized_text)
""" """
tokenizer = XLNetTokenizer.from_pretrained(*args, **kwargs) tokenizer = XLNetTokenizer.from_pretrained(*args, **kwargs)
return tokenizer return tokenizer
...@@ -72,23 +72,23 @@ def xlnetModel(*args, **kwargs): ...@@ -72,23 +72,23 @@ def xlnetModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetModel # Load xlnetModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
hidden_states_1, mems = model(tokens_tensor_1) hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_2, mems = model(tokens_tensor_2, past=mems) hidden_states_2, mems = model(tokens_tensor_2, past=mems)
""" """
...@@ -106,30 +106,30 @@ def xlnetLMHeadModel(*args, **kwargs): ...@@ -106,30 +106,30 @@ def xlnetLMHeadModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetLMHeadModel # Load xlnetLMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
predictions_1, mems = model(tokens_tensor_1) predictions_1, mems = model(tokens_tensor_1)
predictions_2, mems = model(tokens_tensor_2, mems=mems) predictions_2, mems = model(tokens_tensor_2, mems=mems)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.decode([predicted_index]) predicted_token = tokenizer.decode([predicted_index])
>>> assert predicted_token == ' who' assert predicted_token == ' who'
""" """
model = XLNetLMHeadModel.from_pretrained(*args, **kwargs) model = XLNetLMHeadModel.from_pretrained(*args, **kwargs)
return model return model
...@@ -144,25 +144,25 @@ def xlnetLMHeadModel(*args, **kwargs): ...@@ -144,25 +144,25 @@ def xlnetLMHeadModel(*args, **kwargs):
# Example: # Example:
# # Load the tokenizer # # Load the tokenizer
# >>> import torch # import torch
# >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') # tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# # Prepare tokenized input # # Prepare tokenized input
# >>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" # text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
# >>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" # text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
# >>> tokenized_text1 = tokenizer.tokenize(text1) # tokenized_text1 = tokenizer.tokenize(text1)
# >>> tokenized_text2 = tokenizer.tokenize(text2) # tokenized_text2 = tokenizer.tokenize(text2)
# >>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) # indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
# >>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) # indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
# >>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) # tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
# >>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) # mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# # Load xlnetForSequenceClassification # # Load xlnetForSequenceClassification
# >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased') # model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased')
# >>> model.eval() # model.eval()
# # Predict sequence classes logits # # Predict sequence classes logits
# >>> with torch.no_grad(): # with torch.no_grad():
# lm_logits, mems = model(tokens_tensor) # lm_logits, mems = model(tokens_tensor)
# """ # """
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs) # model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
......
...@@ -89,15 +89,15 @@ class AutoConfig(object): ...@@ -89,15 +89,15 @@ class AutoConfig(object):
Examples:: Examples::
>>> config = AutoConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache. config = AutoConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache.
>>> config = AutoConfig.from_pretrained('./test/bert_saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')` config = AutoConfig.from_pretrained('./test/bert_saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')`
>>> config = AutoConfig.from_pretrained('./test/bert_saved_model/my_configuration.json') config = AutoConfig.from_pretrained('./test/bert_saved_model/my_configuration.json')
>>> config = AutoConfig.from_pretrained('bert-base-uncased', output_attention=True, foo=False) config = AutoConfig.from_pretrained('bert-base-uncased', output_attention=True, foo=False)
>>> assert config.output_attention == True assert config.output_attention == True
>>> config, unused_kwargs = AutoConfig.from_pretrained('bert-base-uncased', output_attention=True, config, unused_kwargs = AutoConfig.from_pretrained('bert-base-uncased', output_attention=True,
>>> foo=False, return_unused_kwargs=True) foo=False, return_unused_kwargs=True)
>>> assert config.output_attention == True assert config.output_attention == True
>>> assert unused_kwargs == {'foo': False} assert unused_kwargs == {'foo': False}
""" """
if 'bert' in pretrained_model_name_or_path: if 'bert' in pretrained_model_name_or_path:
...@@ -202,13 +202,13 @@ class AutoModel(object): ...@@ -202,13 +202,13 @@ class AutoModel(object):
Examples:: Examples::
>>> model = AutoModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache. model = AutoModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache.
>>> model = AutoModel.from_pretrained('./test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` model = AutoModel.from_pretrained('./test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
>>> model = AutoModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading model = AutoModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading
>>> assert model.config.output_attention == True assert model.config.output_attention == True
>>> # Loading from a TF checkpoint file instead of a PyTorch model (slower) # Loading from a TF checkpoint file instead of a PyTorch model (slower)
>>> config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
>>> model = AutoModel.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) model = AutoModel.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
""" """
if 'bert' in pretrained_model_name_or_path: if 'bert' in pretrained_model_name_or_path:
......
...@@ -643,12 +643,12 @@ class BertModel(BertPreTrainedModel): ...@@ -643,12 +643,12 @@ class BertModel(BertPreTrainedModel):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') config = BertConfig.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = BertModel(config) model = BertModel(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
def __init__(self, config): def __init__(self, config):
...@@ -754,13 +754,13 @@ class BertForPreTraining(BertPreTrainedModel): ...@@ -754,13 +754,13 @@ class BertForPreTraining(BertPreTrainedModel):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') config = BertConfig.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
>>> model = BertForPreTraining(config) model = BertForPreTraining(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> prediction_scores, seq_relationship_scores = outputs[:2] prediction_scores, seq_relationship_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -824,13 +824,13 @@ class BertForMaskedLM(BertPreTrainedModel): ...@@ -824,13 +824,13 @@ class BertForMaskedLM(BertPreTrainedModel):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') config = BertConfig.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
>>> model = BertForMaskedLM(config) model = BertForMaskedLM(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, masked_lm_labels=input_ids) outputs = model(input_ids, masked_lm_labels=input_ids)
>>> loss, prediction_scores = outputs[:2] loss, prediction_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -891,13 +891,13 @@ class BertForNextSentencePrediction(BertPreTrainedModel): ...@@ -891,13 +891,13 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') config = BertConfig.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
>>> model = BertForNextSentencePrediction(config) model = BertForNextSentencePrediction(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> seq_relationship_scores = outputs[0] seq_relationship_scores = outputs[0]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -951,14 +951,14 @@ class BertForSequenceClassification(BertPreTrainedModel): ...@@ -951,14 +951,14 @@ class BertForSequenceClassification(BertPreTrainedModel):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') config = BertConfig.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
>>> model = BertForSequenceClassification(config) model = BertForSequenceClassification(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
>>> loss, logits = outputs[:2] loss, logits = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -1057,15 +1057,15 @@ class BertForMultipleChoice(BertPreTrainedModel): ...@@ -1057,15 +1057,15 @@ class BertForMultipleChoice(BertPreTrainedModel):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') config = BertConfig.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
>>> model = BertForMultipleChoice(config) model = BertForMultipleChoice(config)
>>> choices = ["Hello, my dog is cute", "Hello, my cat is amazing"] choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
>>> input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
>>> labels = torch.tensor(1).unsqueeze(0) # Batch size 1 labels = torch.tensor(1).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
>>> loss, classification_scores = outputs[:2] loss, classification_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -1127,14 +1127,14 @@ class BertForTokenClassification(BertPreTrainedModel): ...@@ -1127,14 +1127,14 @@ class BertForTokenClassification(BertPreTrainedModel):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') config = BertConfig.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
>>> model = BertForTokenClassification(config) model = BertForTokenClassification(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1 labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
>>> loss, scores = outputs[:2] loss, scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -1203,15 +1203,15 @@ class BertForQuestionAnswering(BertPreTrainedModel): ...@@ -1203,15 +1203,15 @@ class BertForQuestionAnswering(BertPreTrainedModel):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') config = BertConfig.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
>>> model = BertForQuestionAnswering(config) model = BertForQuestionAnswering(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> start_positions = torch.tensor([1]) start_positions = torch.tensor([1])
>>> end_positions = torch.tensor([3]) end_positions = torch.tensor([3])
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
>>> loss, start_scores, end_scores = outputs[:2] loss, start_scores, end_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
......
...@@ -433,12 +433,12 @@ class GPT2Model(GPT2PreTrainedModel): ...@@ -433,12 +433,12 @@ class GPT2Model(GPT2PreTrainedModel):
Examples:: Examples::
>>> config = GPT2Config.from_pretrained('gpt2') config = GPT2Config.from_pretrained('gpt2')
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2') tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
>>> model = GPT2Model(config) model = GPT2Model(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
def __init__(self, config): def __init__(self, config):
...@@ -567,12 +567,12 @@ class GPT2LMHeadModel(GPT2PreTrainedModel): ...@@ -567,12 +567,12 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
Examples:: Examples::
>>> config = GPT2Config.from_pretrained('gpt2') config = GPT2Config.from_pretrained('gpt2')
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2') tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
>>> model = GPT2LMHeadModel(config) model = GPT2LMHeadModel(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=input_ids) outputs = model(input_ids, labels=input_ids)
>>> loss, logits = outputs[:2] loss, logits = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -683,14 +683,14 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): ...@@ -683,14 +683,14 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
Examples:: Examples::
>>> config = GPT2Config.from_pretrained('gpt2') config = GPT2Config.from_pretrained('gpt2')
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2') tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
>>> model = GPT2DoubleHeadsModel(config) model = GPT2DoubleHeadsModel(config)
>>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary
>>> input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
>>> mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1 mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, mc_token_ids) outputs = model(input_ids, mc_token_ids)
>>> lm_prediction_scores, mc_prediction_scores = outputs[:2] lm_prediction_scores, mc_prediction_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
......
...@@ -439,12 +439,12 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel): ...@@ -439,12 +439,12 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
Examples:: Examples::
>>> config = OpenAIGPTConfig.from_pretrained('openai-gpt') config = OpenAIGPTConfig.from_pretrained('openai-gpt')
>>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
>>> model = OpenAIGPTModel(config) model = OpenAIGPTModel(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
def __init__(self, config): def __init__(self, config):
...@@ -558,12 +558,12 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel): ...@@ -558,12 +558,12 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
Examples:: Examples::
>>> config = OpenAIGPTConfig.from_pretrained('openai-gpt') config = OpenAIGPTConfig.from_pretrained('openai-gpt')
>>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
>>> model = OpenAIGPTLMHeadModel(config) model = OpenAIGPTLMHeadModel(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=input_ids) outputs = model(input_ids, labels=input_ids)
>>> loss, logits = outputs[:2] loss, logits = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -665,14 +665,14 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): ...@@ -665,14 +665,14 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
Examples:: Examples::
>>> config = OpenAIGPTConfig.from_pretrained('openai-gpt') config = OpenAIGPTConfig.from_pretrained('openai-gpt')
>>> tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt') tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
>>> model = OpenAIGPTDoubleHeadsModel(config) model = OpenAIGPTDoubleHeadsModel(config)
>>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary
>>> input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
>>> mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1 mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, mc_token_ids) outputs = model(input_ids, mc_token_ids)
>>> lm_prediction_scores, mc_prediction_scores = outputs[:2] lm_prediction_scores, mc_prediction_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
......
...@@ -968,12 +968,12 @@ class TransfoXLModel(TransfoXLPreTrainedModel): ...@@ -968,12 +968,12 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
Examples:: Examples::
>>> config = TransfoXLConfig.from_pretrained('transfo-xl-wt103') config = TransfoXLConfig.from_pretrained('transfo-xl-wt103')
>>> tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
>>> model = TransfoXLModel(config) model = TransfoXLModel(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> last_hidden_states, mems = outputs[:2] last_hidden_states, mems = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -1284,12 +1284,12 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel): ...@@ -1284,12 +1284,12 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
Examples:: Examples::
>>> config = TransfoXLConfig.from_pretrained('transfo-xl-wt103') config = TransfoXLConfig.from_pretrained('transfo-xl-wt103')
>>> tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
>>> model = TransfoXLLMHeadModel(config) model = TransfoXLLMHeadModel(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> prediction_scores, mems = outputs[:2] prediction_scores, mems = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
......
...@@ -105,15 +105,15 @@ class PretrainedConfig(object): ...@@ -105,15 +105,15 @@ class PretrainedConfig(object):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache. config = BertConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache.
>>> config = BertConfig.from_pretrained('./test/saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')` config = BertConfig.from_pretrained('./test/saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')`
>>> config = BertConfig.from_pretrained('./test/saved_model/my_configuration.json') config = BertConfig.from_pretrained('./test/saved_model/my_configuration.json')
>>> config = BertConfig.from_pretrained('bert-base-uncased', output_attention=True, foo=False) config = BertConfig.from_pretrained('bert-base-uncased', output_attention=True, foo=False)
>>> assert config.output_attention == True assert config.output_attention == True
>>> config, unused_kwargs = BertConfig.from_pretrained('bert-base-uncased', output_attention=True, config, unused_kwargs = BertConfig.from_pretrained('bert-base-uncased', output_attention=True,
>>> foo=False, return_unused_kwargs=True) foo=False, return_unused_kwargs=True)
>>> assert config.output_attention == True assert config.output_attention == True
>>> assert unused_kwargs == {'foo': False} assert unused_kwargs == {'foo': False}
""" """
cache_dir = kwargs.pop('cache_dir', None) cache_dir = kwargs.pop('cache_dir', None)
...@@ -369,13 +369,13 @@ class PreTrainedModel(nn.Module): ...@@ -369,13 +369,13 @@ class PreTrainedModel(nn.Module):
Examples::dictionary Examples::dictionary
>>> model = BertModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache. model = BertModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache.
>>> model = BertModel.from_pretrained('./test/saved_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` model = BertModel.from_pretrained('./test/saved_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
>>> model = BertModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading model = BertModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading
>>> assert model.config.output_attention == True assert model.config.output_attention == True
>>> # Loading from a TF checkpoint file instead of a PyTorch model (slower) # Loading from a TF checkpoint file instead of a PyTorch model (slower)
>>> config = BertConfig.from_json_file('./tf_model/my_tf_model_config.json') config = BertConfig.from_json_file('./tf_model/my_tf_model_config.json')
>>> model = BertModel.from_pretrained('./tf_model/my_tf_checkpoint.ckpt.index', from_tf=True, config=config) model = BertModel.from_pretrained('./tf_model/my_tf_checkpoint.ckpt.index', from_tf=True, config=config)
""" """
config = kwargs.pop('config', None) config = kwargs.pop('config', None)
......
...@@ -472,12 +472,12 @@ class XLMModel(XLMPreTrainedModel): ...@@ -472,12 +472,12 @@ class XLMModel(XLMPreTrainedModel):
Examples:: Examples::
>>> config = XLMConfig.from_pretrained('xlm-mlm-en-2048') config = XLMConfig.from_pretrained('xlm-mlm-en-2048')
>>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>> model = XLMModel(config) model = XLMModel(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
ATTRIBUTES = ['encoder', 'eos_index', 'pad_index', # 'with_output', ATTRIBUTES = ['encoder', 'eos_index', 'pad_index', # 'with_output',
...@@ -745,12 +745,12 @@ class XLMWithLMHeadModel(XLMPreTrainedModel): ...@@ -745,12 +745,12 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
Examples:: Examples::
>>> config = XLMConfig.from_pretrained('xlm-mlm-en-2048') config = XLMConfig.from_pretrained('xlm-mlm-en-2048')
>>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>> model = XLMWithLMHeadModel(config) model = XLMWithLMHeadModel(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
def __init__(self, config): def __init__(self, config):
...@@ -805,14 +805,14 @@ class XLMForSequenceClassification(XLMPreTrainedModel): ...@@ -805,14 +805,14 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
Examples:: Examples::
>>> config = XLMConfig.from_pretrained('xlm-mlm-en-2048') config = XLMConfig.from_pretrained('xlm-mlm-en-2048')
>>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>>
>>> model = XLMForSequenceClassification(config) model = XLMForSequenceClassification(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
>>> loss, logits = outputs[:2] loss, logits = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -885,15 +885,15 @@ class XLMForQuestionAnswering(XLMPreTrainedModel): ...@@ -885,15 +885,15 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
Examples:: Examples::
>>> config = XLMConfig.from_pretrained('xlm-mlm-en-2048') config = XLMConfig.from_pretrained('xlm-mlm-en-2048')
>>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>>
>>> model = XLMForQuestionAnswering(config) model = XLMForQuestionAnswering(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> start_positions = torch.tensor([1]) start_positions = torch.tensor([1])
>>> end_positions = torch.tensor([3]) end_positions = torch.tensor([3])
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
>>> loss, start_scores, end_scores = outputs[:2] loss, start_scores, end_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
......
...@@ -712,12 +712,12 @@ class XLNetModel(XLNetPreTrainedModel): ...@@ -712,12 +712,12 @@ class XLNetModel(XLNetPreTrainedModel):
Examples:: Examples::
>>> config = XLNetConfig.from_pretrained('xlnet-large-cased') config = XLNetConfig.from_pretrained('xlnet-large-cased')
>>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
>>> model = XLNetModel(config) model = XLNetModel(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids) outputs = model(input_ids)
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
def __init__(self, config): def __init__(self, config):
...@@ -1019,17 +1019,17 @@ class XLNetLMHeadModel(XLNetPreTrainedModel): ...@@ -1019,17 +1019,17 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
Examples:: Examples::
>>> config = XLNetConfig.from_pretrained('xlnet-large-cased') config = XLNetConfig.from_pretrained('xlnet-large-cased')
>>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
>>> model = XLNetLMHeadModel(config) model = XLNetLMHeadModel(config)
>>> # We show how to setup inputs to predict a next token using a bi-directional context. # We show how to setup inputs to predict a next token using a bi-directional context.
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>")).unsqueeze(0) # We will predict the masked token input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>")).unsqueeze(0) # We will predict the masked token
>>> perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float) perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
>>> perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
>>> target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token
>>> target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
>>> outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping) outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping)
>>> next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -1100,14 +1100,14 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel): ...@@ -1100,14 +1100,14 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
Examples:: Examples::
>>> config = XLNetConfig.from_pretrained('xlnet-large-cased') config = XLNetConfig.from_pretrained('xlnet-large-cased')
>>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
>>>
>>> model = XLNetForSequenceClassification(config) model = XLNetForSequenceClassification(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
>>> loss, logits = outputs[:2] loss, logits = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -1200,15 +1200,15 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel): ...@@ -1200,15 +1200,15 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
Examples:: Examples::
>>> config = XLMConfig.from_pretrained('xlm-mlm-en-2048') config = XLMConfig.from_pretrained('xlm-mlm-en-2048')
>>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>>
>>> model = XLMForQuestionAnswering(config) model = XLMForQuestionAnswering(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> start_positions = torch.tensor([1]) start_positions = torch.tensor([1])
>>> end_positions = torch.tensor([3]) end_positions = torch.tensor([3])
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
>>> loss, start_scores, end_scores = outputs[:2] loss, start_scores, end_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
......
...@@ -78,8 +78,8 @@ class AutoTokenizer(object): ...@@ -78,8 +78,8 @@ class AutoTokenizer(object):
Examples:: Examples::
>>> config = AutoTokenizer.from_pretrained('bert-base-uncased') # Download vocabulary from S3 and cache. config = AutoTokenizer.from_pretrained('bert-base-uncased') # Download vocabulary from S3 and cache.
>>> config = AutoTokenizer.from_pretrained('./test/bert_saved_model/') # E.g. tokenizer was saved using `save_pretrained('./test/saved_model/')` config = AutoTokenizer.from_pretrained('./test/bert_saved_model/') # E.g. tokenizer was saved using `save_pretrained('./test/saved_model/')`
""" """
if 'bert' in pretrained_model_name_or_path: if 'bert' in pretrained_model_name_or_path:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment