Commit d2cc6b10 authored by LysandreJik's avatar LysandreJik
Browse files

Merge branch 'master' into RoBERTa

parents 39d72bcc 7729ef73
...@@ -247,6 +247,9 @@ def evaluate(args, model, tokenizer, prefix=""): ...@@ -247,6 +247,9 @@ def evaluate(args, model, tokenizer, prefix=""):
def load_and_cache_examples(args, task, tokenizer, evaluate=False): def load_and_cache_examples(args, task, tokenizer, evaluate=False):
if args.local_rank not in [-1, 0]:
torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache
processor = processors[task]() processor = processors[task]()
output_mode = output_modes[task] output_mode = output_modes[task]
# Load data features from cache or dataset file # Load data features from cache or dataset file
...@@ -273,6 +276,9 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False): ...@@ -273,6 +276,9 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False):
logger.info("Saving features into cached file %s", cached_features_file) logger.info("Saving features into cached file %s", cached_features_file)
torch.save(features, cached_features_file) torch.save(features, cached_features_file)
if args.local_rank == 0:
torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache
# Convert to Tensors and build dataset # Convert to Tensors and build dataset
all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
......
...@@ -138,7 +138,7 @@ def train(args, train_dataset, model, tokenizer): ...@@ -138,7 +138,7 @@ def train(args, train_dataset, model, tokenizer):
'end_positions': batch[4]} 'end_positions': batch[4]}
if args.model_type in ['xlnet', 'xlm']: if args.model_type in ['xlnet', 'xlm']:
inputs.update({'cls_index': batch[5], inputs.update({'cls_index': batch[5],
'p_mask': batch[6]}) 'p_mask': batch[6]})
outputs = model(**inputs) outputs = model(**inputs)
loss = outputs[0] # model outputs are always tuple in pytorch-transformers (see doc) loss = outputs[0] # model outputs are always tuple in pytorch-transformers (see doc)
...@@ -272,6 +272,9 @@ def evaluate(args, model, tokenizer, prefix=""): ...@@ -272,6 +272,9 @@ def evaluate(args, model, tokenizer, prefix=""):
def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False): def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False):
if args.local_rank not in [-1, 0]:
torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache
# Load data features from cache or dataset file # Load data features from cache or dataset file
input_file = args.predict_file if evaluate else args.train_file input_file = args.predict_file if evaluate else args.train_file
cached_features_file = os.path.join(os.path.dirname(input_file), 'cached_{}_{}_{}'.format( cached_features_file = os.path.join(os.path.dirname(input_file), 'cached_{}_{}_{}'.format(
...@@ -296,6 +299,9 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal ...@@ -296,6 +299,9 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal
logger.info("Saving features into cached file %s", cached_features_file) logger.info("Saving features into cached file %s", cached_features_file)
torch.save(features, cached_features_file) torch.save(features, cached_features_file)
if args.local_rank == 0:
torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache
# Convert to Tensors and build dataset # Convert to Tensors and build dataset
all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
......
...@@ -205,7 +205,7 @@ def main(): ...@@ -205,7 +205,7 @@ def main():
param_optimizer = list(model.named_parameters()) param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [ optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
] ]
optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
......
...@@ -37,7 +37,7 @@ bert_docstring = """ ...@@ -37,7 +37,7 @@ bert_docstring = """
checkpoint checkpoint
cache_dir: an optional path to a folder in which the pre-trained models cache_dir: an optional path to a folder in which the pre-trained models
will be cached. will be cached.
state_dict: an optional state dictionnary state_dict: an optional state dictionary
(collections.OrderedDict object) to use instead of Google (collections.OrderedDict object) to use instead of Google
pre-trained models pre-trained models
*inputs, **kwargs: additional input for the specific Bert class *inputs, **kwargs: additional input for the specific Bert class
...@@ -84,12 +84,12 @@ def bertTokenizer(*args, **kwargs): ...@@ -84,12 +84,12 @@ def bertTokenizer(*args, **kwargs):
Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"] Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"]
Example: Example:
>>> import torch import torch
>>> sentence = 'Hello, World!' sentence = 'Hello, World!'
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
>>> toks = tokenizer.tokenize(sentence) toks = tokenizer.tokenize(sentence)
['Hello', '##,', 'World', '##!'] ['Hello', '##,', 'World', '##!']
>>> ids = tokenizer.convert_tokens_to_ids(toks) ids = tokenizer.convert_tokens_to_ids(toks)
[8667, 28136, 1291, 28125] [8667, 28136, 1291, 28125]
""" """
tokenizer = BertTokenizer.from_pretrained(*args, **kwargs) tokenizer = BertTokenizer.from_pretrained(*args, **kwargs)
...@@ -105,20 +105,20 @@ def bertModel(*args, **kwargs): ...@@ -105,20 +105,20 @@ def bertModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertModel # Load bertModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
encoded_layers, _ = model(tokens_tensor, segments_tensors) encoded_layers, _ = model(tokens_tensor, segments_tensors)
""" """
model = BertModel.from_pretrained(*args, **kwargs) model = BertModel.from_pretrained(*args, **kwargs)
...@@ -134,20 +134,20 @@ def bertForNextSentencePrediction(*args, **kwargs): ...@@ -134,20 +134,20 @@ def bertForNextSentencePrediction(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForNextSentencePrediction # Load bertForNextSentencePrediction
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased')
>>> model.eval() model.eval()
# Predict the next sentence classification logits # Predict the next sentence classification logits
>>> with torch.no_grad(): with torch.no_grad():
next_sent_classif_logits = model(tokens_tensor, segments_tensors) next_sent_classif_logits = model(tokens_tensor, segments_tensors)
""" """
model = BertForNextSentencePrediction.from_pretrained(*args, **kwargs) model = BertForNextSentencePrediction.from_pretrained(*args, **kwargs)
...@@ -164,17 +164,17 @@ def bertForPreTraining(*args, **kwargs): ...@@ -164,17 +164,17 @@ def bertForPreTraining(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForPreTraining # Load bertForPreTraining
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased')
>>> masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors) masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors)
""" """
model = BertForPreTraining.from_pretrained(*args, **kwargs) model = BertForPreTraining.from_pretrained(*args, **kwargs)
return model return model
...@@ -188,25 +188,25 @@ def bertForMaskedLM(*args, **kwargs): ...@@ -188,25 +188,25 @@ def bertForMaskedLM(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> masked_index = 8 masked_index = 8
>>> tokenized_text[masked_index] = '[MASK]' tokenized_text[masked_index] = '[MASK]'
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForMaskedLM # Load bertForMaskedLM
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased')
>>> model.eval() model.eval()
# Predict all tokens # Predict all tokens
>>> with torch.no_grad(): with torch.no_grad():
predictions = model(tokens_tensor, segments_tensors) predictions = model(tokens_tensor, segments_tensors)
>>> predicted_index = torch.argmax(predictions[0, masked_index]).item() predicted_index = torch.argmax(predictions[0, masked_index]).item()
>>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'henson' 'henson'
""" """
model = BertForMaskedLM.from_pretrained(*args, **kwargs) model = BertForMaskedLM.from_pretrained(*args, **kwargs)
...@@ -230,24 +230,24 @@ def bertForSequenceClassification(*args, **kwargs): ...@@ -230,24 +230,24 @@ def bertForSequenceClassification(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForSequenceClassification # Load bertForSequenceClassification
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2) model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2)
>>> model.eval() model.eval()
# Predict the sequence classification logits # Predict the sequence classification logits
>>> with torch.no_grad(): with torch.no_grad():
seq_classif_logits = model(tokens_tensor, segments_tensors) seq_classif_logits = model(tokens_tensor, segments_tensors)
# Or get the sequence classification loss # Or get the sequence classification loss
>>> labels = torch.tensor([1]) labels = torch.tensor([1])
>>> seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
""" """
model = BertForSequenceClassification.from_pretrained(*args, **kwargs) model = BertForSequenceClassification.from_pretrained(*args, **kwargs)
return model return model
...@@ -265,24 +265,24 @@ def bertForMultipleChoice(*args, **kwargs): ...@@ -265,24 +265,24 @@ def bertForMultipleChoice(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0) tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0)
>>> segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0) segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0)
# Load bertForMultipleChoice # Load bertForMultipleChoice
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2) model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2)
>>> model.eval() model.eval()
# Predict the multiple choice logits # Predict the multiple choice logits
>>> with torch.no_grad(): with torch.no_grad():
multiple_choice_logits = model(tokens_tensor, segments_tensors) multiple_choice_logits = model(tokens_tensor, segments_tensors)
# Or get the multiple choice loss # Or get the multiple choice loss
>>> labels = torch.tensor([1]) labels = torch.tensor([1])
>>> multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
""" """
model = BertForMultipleChoice.from_pretrained(*args, **kwargs) model = BertForMultipleChoice.from_pretrained(*args, **kwargs)
return model return model
...@@ -298,25 +298,25 @@ def bertForQuestionAnswering(*args, **kwargs): ...@@ -298,25 +298,25 @@ def bertForQuestionAnswering(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForQuestionAnswering # Load bertForQuestionAnswering
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased')
>>> model.eval() model.eval()
# Predict the start and end positions logits # Predict the start and end positions logits
>>> with torch.no_grad(): with torch.no_grad():
start_logits, end_logits = model(tokens_tensor, segments_tensors) start_logits, end_logits = model(tokens_tensor, segments_tensors)
# Or get the total loss which is the sum of the CrossEntropy loss for the start and end token positions # Or get the total loss which is the sum of the CrossEntropy loss for the start and end token positions
>>> start_positions, end_positions = torch.tensor([12]), torch.tensor([14]) start_positions, end_positions = torch.tensor([12]), torch.tensor([14])
# set model.train() before if training this loss # set model.train() before if training this loss
>>> multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions) multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions)
""" """
model = BertForQuestionAnswering.from_pretrained(*args, **kwargs) model = BertForQuestionAnswering.from_pretrained(*args, **kwargs)
return model return model
...@@ -337,24 +337,24 @@ def bertForTokenClassification(*args, **kwargs): ...@@ -337,24 +337,24 @@ def bertForTokenClassification(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForTokenClassification # Load bertForTokenClassification
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2) model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2)
>>> model.eval() model.eval()
# Predict the token classification logits # Predict the token classification logits
>>> with torch.no_grad(): with torch.no_grad():
classif_logits = model(tokens_tensor, segments_tensors) classif_logits = model(tokens_tensor, segments_tensors)
# Or get the token classification loss # Or get the token classification loss
>>> labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]]) labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]])
>>> classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
""" """
model = BertForTokenClassification.from_pretrained(*args, **kwargs) model = BertForTokenClassification.from_pretrained(*args, **kwargs)
return model return model
...@@ -52,11 +52,11 @@ def gpt2Tokenizer(*args, **kwargs): ...@@ -52,11 +52,11 @@ def gpt2Tokenizer(*args, **kwargs):
Default: None Default: None
Example: Example:
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
>>> text = "Who was Jim Henson ?" text = "Who was Jim Henson ?"
>>> indexed_tokens = tokenizer.encode(tokenized_text) indexed_tokens = tokenizer.encode(tokenized_text)
""" """
tokenizer = GPT2Tokenizer.from_pretrained(*args, **kwargs) tokenizer = GPT2Tokenizer.from_pretrained(*args, **kwargs)
return tokenizer return tokenizer
...@@ -71,24 +71,24 @@ def gpt2Model(*args, **kwargs): ...@@ -71,24 +71,24 @@ def gpt2Model(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2Model # Load gpt2Model
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2') model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions # past can be used to reuse precomputed hidden state in a subsequent predictions
>>> with torch.no_grad(): with torch.no_grad():
hidden_states_1, past = model(tokens_tensor_1) hidden_states_1, past = model(tokens_tensor_1)
hidden_states_2, past = model(tokens_tensor_2, past=past) hidden_states_2, past = model(tokens_tensor_2, past=past)
""" """
...@@ -104,31 +104,31 @@ def gpt2LMHeadModel(*args, **kwargs): ...@@ -104,31 +104,31 @@ def gpt2LMHeadModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2LMHeadModel # Load gpt2LMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2') model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions # past can be used to reuse precomputed hidden state in a subsequent predictions
>>> with torch.no_grad(): with torch.no_grad():
predictions_1, past = model(tokens_tensor_1) predictions_1, past = model(tokens_tensor_1)
predictions_2, past = model(tokens_tensor_2, past=past) predictions_2, past = model(tokens_tensor_2, past=past)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.decode([predicted_index]) predicted_token = tokenizer.decode([predicted_index])
>>> assert predicted_token == ' who' assert predicted_token == ' who'
""" """
model = GPT2LMHeadModel.from_pretrained(*args, **kwargs) model = GPT2LMHeadModel.from_pretrained(*args, **kwargs)
return model return model
...@@ -143,25 +143,25 @@ def gpt2DoubleHeadsModel(*args, **kwargs): ...@@ -143,25 +143,25 @@ def gpt2DoubleHeadsModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input # Prepare tokenized input
>>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>> tokenized_text1 = tokenizer.tokenize(text1) tokenized_text1 = tokenizer.tokenize(text1)
>>> tokenized_text2 = tokenizer.tokenize(text2) tokenized_text2 = tokenizer.tokenize(text2)
>>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load gpt2DoubleHeadsModel # Load gpt2DoubleHeadsModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2') model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
lm_logits, multiple_choice_logits, presents = model(tokens_tensor, mc_token_ids) lm_logits, multiple_choice_logits, presents = model(tokens_tensor, mc_token_ids)
""" """
model = GPT2DoubleHeadsModel.from_pretrained(*args, **kwargs) model = GPT2DoubleHeadsModel.from_pretrained(*args, **kwargs)
......
...@@ -40,7 +40,7 @@ gpt_docstring = """ ...@@ -40,7 +40,7 @@ gpt_docstring = """
. a series of NumPy files containing OpenAI TensorFlow trained weights . a series of NumPy files containing OpenAI TensorFlow trained weights
from_tf: should we load the weights from a locally saved TensorFlow checkpoint from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached. cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionnary (collections.OrderedDict object) state_dict: an optional state dictionary (collections.OrderedDict object)
to use instead of pre-trained models to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific OpenAI-GPT class *inputs, **kwargs: additional input for the specific OpenAI-GPT class
""" """
...@@ -76,12 +76,12 @@ def openAIGPTTokenizer(*args, **kwargs): ...@@ -76,12 +76,12 @@ def openAIGPTTokenizer(*args, **kwargs):
Default: None Default: None
Example: Example:
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
>>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
[763, 509, 4265, 2298, 945, 257, 4265, 2298, 945, 509, 246, 10148, 39041, 483] [763, 509, 4265, 2298, 945, 257, 4265, 2298, 945, 509, 246, 10148, 39041, 483]
""" """
tokenizer = OpenAIGPTTokenizer.from_pretrained(*args, **kwargs) tokenizer = OpenAIGPTTokenizer.from_pretrained(*args, **kwargs)
...@@ -97,21 +97,21 @@ def openAIGPTModel(*args, **kwargs): ...@@ -97,21 +97,21 @@ def openAIGPTModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input # Prepare tokenized input
>>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
# Load openAIGPTModel # Load openAIGPTModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt') model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
hidden_states = model(tokens_tensor) hidden_states = model(tokens_tensor)
""" """
model = OpenAIGPTModel.from_pretrained(*args, **kwargs) model = OpenAIGPTModel.from_pretrained(*args, **kwargs)
...@@ -126,26 +126,26 @@ def openAIGPTLMHeadModel(*args, **kwargs): ...@@ -126,26 +126,26 @@ def openAIGPTLMHeadModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input # Prepare tokenized input
>>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
# Load openAIGPTLMHeadModel # Load openAIGPTLMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt') model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
predictions = model(tokens_tensor) predictions = model(tokens_tensor)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions[0, -1, :]).item() predicted_index = torch.argmax(predictions[0, -1, :]).item()
>>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'.</w>' '.</w>'
""" """
model = OpenAIGPTLMHeadModel.from_pretrained(*args, **kwargs) model = OpenAIGPTLMHeadModel.from_pretrained(*args, **kwargs)
...@@ -161,25 +161,25 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs): ...@@ -161,25 +161,25 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input # Prepare tokenized input
>>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>> tokenized_text1 = tokenizer.tokenize(text1) tokenized_text1 = tokenizer.tokenize(text1)
>>> tokenized_text2 = tokenizer.tokenize(text2) tokenized_text2 = tokenizer.tokenize(text2)
>>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load openAIGPTDoubleHeadsModel # Load openAIGPTDoubleHeadsModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt') model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
lm_logits, multiple_choice_logits = model(tokens_tensor, mc_token_ids) lm_logits, multiple_choice_logits = model(tokens_tensor, mc_token_ids)
""" """
model = OpenAIGPTDoubleHeadsModel.from_pretrained(*args, **kwargs) model = OpenAIGPTDoubleHeadsModel.from_pretrained(*args, **kwargs)
......
...@@ -23,7 +23,7 @@ transformer_xl_docstring = """ ...@@ -23,7 +23,7 @@ transformer_xl_docstring = """
. `model.chkpt` a TensorFlow checkpoint . `model.chkpt` a TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow checkpoint from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached. cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of pre-trained models state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific TransformerXL class *inputs, **kwargs: additional input for the specific TransformerXL class
""" """
...@@ -45,12 +45,12 @@ def transformerXLTokenizer(*args, **kwargs): ...@@ -45,12 +45,12 @@ def transformerXLTokenizer(*args, **kwargs):
* transfo-xl-wt103 * transfo-xl-wt103
Example: Example:
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
>>> text = "Who was Jim Henson ?" text = "Who was Jim Henson ?"
>>> tokenized_text = tokenizer.tokenize(tokenized_text) tokenized_text = tokenizer.tokenize(tokenized_text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
""" """
tokenizer = TransfoXLTokenizer.from_pretrained(*args, **kwargs) tokenizer = TransfoXLTokenizer.from_pretrained(*args, **kwargs)
return tokenizer return tokenizer
...@@ -63,26 +63,26 @@ def transformerXLModel(*args, **kwargs): ...@@ -63,26 +63,26 @@ def transformerXLModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> tokenized_text_1 = tokenizer.tokenize(text_1) tokenized_text_1 = tokenizer.tokenize(text_1)
>>> tokenized_text_2 = tokenizer.tokenize(text_2) tokenized_text_2 = tokenizer.tokenize(text_2)
>>> indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1) indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>> indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2) indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLModel # Load transformerXLModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103') model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context # We can re-use the memory cells in a subsequent call to attend a longer context
>>> with torch.no_grad(): with torch.no_grad():
hidden_states_1, mems_1 = model(tokens_tensor_1) hidden_states_1, mems_1 = model(tokens_tensor_1)
hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1) hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
""" """
...@@ -98,33 +98,33 @@ def transformerXLLMHeadModel(*args, **kwargs): ...@@ -98,33 +98,33 @@ def transformerXLLMHeadModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> tokenized_text_1 = tokenizer.tokenize(text_1) tokenized_text_1 = tokenizer.tokenize(text_1)
>>> tokenized_text_2 = tokenizer.tokenize(text_2) tokenized_text_2 = tokenizer.tokenize(text_2)
>>> indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1) indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>> indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2) indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLLMHeadModel # Load transformerXLLMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103') model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context # We can re-use the memory cells in a subsequent call to attend a longer context
>>> with torch.no_grad(): with torch.no_grad():
predictions_1, mems_1 = model(tokens_tensor_1) predictions_1, mems_1 = model(tokens_tensor_1)
predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1) predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
>>> assert predicted_token == 'who' assert predicted_token == 'who'
""" """
model = TransfoXLLMHeadModel.from_pretrained(*args, **kwargs) model = TransfoXLLMHeadModel.from_pretrained(*args, **kwargs)
return model return model
...@@ -17,16 +17,16 @@ xlm_start_docstring = """ ...@@ -17,16 +17,16 @@ xlm_start_docstring = """
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
""" """
# A lot of models share the same param doc. Use a decorator # A lot of models share the same param doc. Use a decorator
...@@ -76,11 +76,11 @@ def xlmTokenizer(*args, **kwargs): ...@@ -76,11 +76,11 @@ def xlmTokenizer(*args, **kwargs):
Default: None Default: None
Example: Example:
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
>>> text = "Who was Jim Henson ?" text = "Who was Jim Henson ?"
>>> indexed_tokens = tokenizer.encode(tokenized_text) indexed_tokens = tokenizer.encode(tokenized_text)
""" """
tokenizer = XLMTokenizer.from_pretrained(*args, **kwargs) tokenizer = XLMTokenizer.from_pretrained(*args, **kwargs)
return tokenizer return tokenizer
...@@ -91,11 +91,11 @@ def xlmTokenizer(*args, **kwargs): ...@@ -91,11 +91,11 @@ def xlmTokenizer(*args, **kwargs):
def xlmModel(*args, **kwargs): def xlmModel(*args, **kwargs):
""" """
# Load xlmModel # Load xlmModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048') model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
hidden_states_1, mems = model(tokens_tensor_1) hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_2, mems = model(tokens_tensor_2, past=mems) hidden_states_2, mems = model(tokens_tensor_2, past=mems)
""" """
...@@ -108,26 +108,26 @@ def xlmModel(*args, **kwargs): ...@@ -108,26 +108,26 @@ def xlmModel(*args, **kwargs):
def xlmLMHeadModel(*args, **kwargs): def xlmLMHeadModel(*args, **kwargs):
""" """
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetLMHeadModel # Load xlnetLMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048') model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
predictions_1, mems = model(tokens_tensor_1) predictions_1, mems = model(tokens_tensor_1)
predictions_2, mems = model(tokens_tensor_2, mems=mems) predictions_2, mems = model(tokens_tensor_2, mems=mems)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.decode([predicted_index]) predicted_token = tokenizer.decode([predicted_index])
>>> assert predicted_token == ' who' assert predicted_token == ' who'
""" """
model = XLMWithLMHeadModel.from_pretrained(*args, **kwargs) model = XLMWithLMHeadModel.from_pretrained(*args, **kwargs)
return model return model
...@@ -142,25 +142,25 @@ def xlmLMHeadModel(*args, **kwargs): ...@@ -142,25 +142,25 @@ def xlmLMHeadModel(*args, **kwargs):
# Example: # Example:
# # Load the tokenizer # # Load the tokenizer
# >>> import torch # import torch
# >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048') # tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048')
# # Prepare tokenized input # # Prepare tokenized input
# >>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" # text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
# >>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" # text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
# >>> tokenized_text1 = tokenizer.tokenize(text1) # tokenized_text1 = tokenizer.tokenize(text1)
# >>> tokenized_text2 = tokenizer.tokenize(text2) # tokenized_text2 = tokenizer.tokenize(text2)
# >>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) # indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
# >>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) # indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
# >>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) # tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
# >>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) # mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# # Load xlnetForSequenceClassification # # Load xlnetForSequenceClassification
# >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048') # model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048')
# >>> model.eval() # model.eval()
# # Predict sequence classes logits # # Predict sequence classes logits
# >>> with torch.no_grad(): # with torch.no_grad():
# lm_logits, mems = model(tokens_tensor) # lm_logits, mems = model(tokens_tensor)
# """ # """
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs) # model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
......
...@@ -53,11 +53,11 @@ def xlnetTokenizer(*args, **kwargs): ...@@ -53,11 +53,11 @@ def xlnetTokenizer(*args, **kwargs):
Default: None Default: None
Example: Example:
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
>>> text = "Who was Jim Henson ?" text = "Who was Jim Henson ?"
>>> indexed_tokens = tokenizer.encode(tokenized_text) indexed_tokens = tokenizer.encode(tokenized_text)
""" """
tokenizer = XLNetTokenizer.from_pretrained(*args, **kwargs) tokenizer = XLNetTokenizer.from_pretrained(*args, **kwargs)
return tokenizer return tokenizer
...@@ -72,23 +72,23 @@ def xlnetModel(*args, **kwargs): ...@@ -72,23 +72,23 @@ def xlnetModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetModel # Load xlnetModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
hidden_states_1, mems = model(tokens_tensor_1) hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_2, mems = model(tokens_tensor_2, past=mems) hidden_states_2, mems = model(tokens_tensor_2, past=mems)
""" """
...@@ -106,30 +106,30 @@ def xlnetLMHeadModel(*args, **kwargs): ...@@ -106,30 +106,30 @@ def xlnetLMHeadModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetLMHeadModel # Load xlnetLMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
predictions_1, mems = model(tokens_tensor_1) predictions_1, mems = model(tokens_tensor_1)
predictions_2, mems = model(tokens_tensor_2, mems=mems) predictions_2, mems = model(tokens_tensor_2, mems=mems)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.decode([predicted_index]) predicted_token = tokenizer.decode([predicted_index])
>>> assert predicted_token == ' who' assert predicted_token == ' who'
""" """
model = XLNetLMHeadModel.from_pretrained(*args, **kwargs) model = XLNetLMHeadModel.from_pretrained(*args, **kwargs)
return model return model
...@@ -144,25 +144,25 @@ def xlnetLMHeadModel(*args, **kwargs): ...@@ -144,25 +144,25 @@ def xlnetLMHeadModel(*args, **kwargs):
# Example: # Example:
# # Load the tokenizer # # Load the tokenizer
# >>> import torch # import torch
# >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') # tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# # Prepare tokenized input # # Prepare tokenized input
# >>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" # text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
# >>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" # text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
# >>> tokenized_text1 = tokenizer.tokenize(text1) # tokenized_text1 = tokenizer.tokenize(text1)
# >>> tokenized_text2 = tokenizer.tokenize(text2) # tokenized_text2 = tokenizer.tokenize(text2)
# >>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) # indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
# >>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) # indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
# >>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) # tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
# >>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) # mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# # Load xlnetForSequenceClassification # # Load xlnetForSequenceClassification
# >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased') # model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased')
# >>> model.eval() # model.eval()
# # Predict sequence classes logits # # Predict sequence classes logits
# >>> with torch.no_grad(): # with torch.no_grad():
# lm_logits, mems = model(tokens_tensor) # lm_logits, mems = model(tokens_tensor)
# """ # """
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs) # model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
......
__version__ = "1.0.0" __version__ = "1.0.0"
from .tokenization_auto import AutoTokenizer
from .tokenization_bert import BertTokenizer, BasicTokenizer, WordpieceTokenizer from .tokenization_bert import BertTokenizer, BasicTokenizer, WordpieceTokenizer
from .tokenization_openai import OpenAIGPTTokenizer from .tokenization_openai import OpenAIGPTTokenizer
from .tokenization_transfo_xl import (TransfoXLTokenizer, TransfoXLCorpus) from .tokenization_transfo_xl import (TransfoXLTokenizer, TransfoXLCorpus)
...@@ -8,6 +9,10 @@ from .tokenization_xlm import XLMTokenizer ...@@ -8,6 +9,10 @@ from .tokenization_xlm import XLMTokenizer
from .tokenization_roberta import RobertaTokenizer from .tokenization_roberta import RobertaTokenizer
from .tokenization_utils import (PreTrainedTokenizer, clean_up_tokenization) from .tokenization_utils import (PreTrainedTokenizer, clean_up_tokenization)
from .tokenization_utils import (PreTrainedTokenizer)
from .modeling_auto import (AutoConfig, AutoModel)
from .modeling_bert import (BertConfig, BertPreTrainedModel, BertModel, BertForPreTraining, from .modeling_bert import (BertConfig, BertPreTrainedModel, BertModel, BertForPreTraining,
BertForMaskedLM, BertForNextSentencePrediction, BertForMaskedLM, BertForNextSentencePrediction,
BertForSequenceClassification, BertForMultipleChoice, BertForSequenceClassification, BertForMultipleChoice,
...@@ -42,4 +47,4 @@ from .modeling_utils import (WEIGHTS_NAME, CONFIG_NAME, TF_WEIGHTS_NAME, ...@@ -42,4 +47,4 @@ from .modeling_utils import (WEIGHTS_NAME, CONFIG_NAME, TF_WEIGHTS_NAME,
from .optimization import (AdamW, ConstantLRSchedule, WarmupConstantSchedule, WarmupCosineSchedule, from .optimization import (AdamW, ConstantLRSchedule, WarmupConstantSchedule, WarmupCosineSchedule,
WarmupCosineWithHardRestartsSchedule, WarmupLinearSchedule) WarmupCosineWithHardRestartsSchedule, WarmupLinearSchedule)
from .file_utils import (PYTORCH_PRETRAINED_BERT_CACHE, cached_path) from .file_utils import (PYTORCH_TRANSFORMERS_CACHE, PYTORCH_PRETRAINED_BERT_CACHE, cached_path)
...@@ -58,7 +58,7 @@ if __name__ == "__main__": ...@@ -58,7 +58,7 @@ if __name__ == "__main__":
default = None, default = None,
type = str, type = str,
required = True, required = True,
help = "Path the TensorFlow checkpoint path.") help = "Path to the TensorFlow checkpoint path.")
parser.add_argument("--pytorch_dump_folder_path", parser.add_argument("--pytorch_dump_folder_path",
default = None, default = None,
type = str, type = str,
......
...@@ -58,7 +58,7 @@ if __name__ == "__main__": ...@@ -58,7 +58,7 @@ if __name__ == "__main__":
default = None, default = None,
type = str, type = str,
required = True, required = True,
help = "Path the TensorFlow checkpoint path.") help = "Path to the TensorFlow checkpoint path.")
parser.add_argument("--pytorch_dump_folder_path", parser.add_argument("--pytorch_dump_folder_path",
default = None, default = None,
type = str, type = str,
......
...@@ -20,7 +20,7 @@ import argparse ...@@ -20,7 +20,7 @@ import argparse
import torch import torch
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from pytorch_pretrained_bert.modeling import BertModel from pytorch_transformers.modeling import BertModel
def convert_pytorch_checkpoint_to_tf(model:BertModel, ckpt_dir:str, model_name:str): def convert_pytorch_checkpoint_to_tf(model:BertModel, ckpt_dir:str, model_name:str):
...@@ -41,7 +41,7 @@ def convert_pytorch_checkpoint_to_tf(model:BertModel, ckpt_dir:str, model_name:s ...@@ -41,7 +41,7 @@ def convert_pytorch_checkpoint_to_tf(model:BertModel, ckpt_dir:str, model_name:s
N BertForQuestionAnswering N BertForQuestionAnswering
""" """
tensors_to_transopse = ( tensors_to_transpose = (
"dense.weight", "dense.weight",
"attention.self.query", "attention.self.query",
"attention.self.key", "attention.self.key",
...@@ -62,34 +62,34 @@ def convert_pytorch_checkpoint_to_tf(model:BertModel, ckpt_dir:str, model_name:s ...@@ -62,34 +62,34 @@ def convert_pytorch_checkpoint_to_tf(model:BertModel, ckpt_dir:str, model_name:s
if not os.path.isdir(ckpt_dir): if not os.path.isdir(ckpt_dir):
os.makedirs(ckpt_dir) os.makedirs(ckpt_dir)
session = tf.Session()
state_dict = model.state_dict() state_dict = model.state_dict()
tf_vars = []
def to_tf_var_name(name:str): def to_tf_var_name(name:str):
for patt, repl in iter(var_map): for patt, repl in iter(var_map):
name = name.replace(patt, repl) name = name.replace(patt, repl)
return 'bert/{}'.format(name) return 'bert/{}'.format(name)
def assign_tf_var(tensor:np.ndarray, name:str): def create_tf_var(tensor:np.ndarray, name:str, session:tf.Session):
tmp_var = tf.Variable(initial_value=tensor) tf_dtype = tf.dtypes.as_dtype(tensor.dtype)
tf_var = tf.get_variable(dtype=tmp_var.dtype, shape=tmp_var.shape, name=name) tf_var = tf.get_variable(dtype=tf_dtype, shape=tensor.shape, name=name, initializer=tf.zeros_initializer())
op = tf.assign(ref=tf_var, value=tmp_var) session.run(tf.variables_initializer([tf_var]))
session.run(tf.variables_initializer([tmp_var, tf_var])) session.run(tf_var)
session.run(fetches=[op, tf_var])
return tf_var return tf_var
for var_name in state_dict: tf.reset_default_graph()
tf_name = to_tf_var_name(var_name) with tf.Session() as session:
torch_tensor = state_dict[var_name].numpy() for var_name in state_dict:
if any([x in var_name for x in tensors_to_transopse]): tf_name = to_tf_var_name(var_name)
torch_tensor = torch_tensor.T torch_tensor = state_dict[var_name].numpy()
tf_tensor = assign_tf_var(tensor=torch_tensor, name=tf_name) if any([x in var_name for x in tensors_to_transpose]):
tf_vars.append(tf_tensor) torch_tensor = torch_tensor.T
print("{0}{1}initialized".format(tf_name, " " * (60 - len(tf_name)))) tf_var = create_tf_var(tensor=torch_tensor, name=tf_name, session=session)
tf.keras.backend.set_value(tf_var, torch_tensor)
saver = tf.train.Saver(tf_vars) tf_weight = session.run(tf_var)
saver.save(session, os.path.join(ckpt_dir, model_name.replace("-", "_") + ".ckpt")) print("Successfully created {}: {}".format(tf_name, np.allclose(tf_weight, torch_tensor)))
saver = tf.train.Saver(tf.trainable_variables())
saver.save(session, os.path.join(ckpt_dir, model_name.replace("-", "_") + ".ckpt"))
def main(raw_args=None): def main(raw_args=None):
......
...@@ -47,7 +47,7 @@ if __name__ == "__main__": ...@@ -47,7 +47,7 @@ if __name__ == "__main__":
default = None, default = None,
type = str, type = str,
required = True, required = True,
help = "Path the TensorFlow checkpoint path.") help = "Path to the TensorFlow checkpoint path.")
parser.add_argument("--bert_config_file", parser.add_argument("--bert_config_file",
default = None, default = None,
type = str, type = str,
......
...@@ -24,11 +24,10 @@ from io import open ...@@ -24,11 +24,10 @@ from io import open
import torch import torch
import pytorch_transformers.tokenization_transfo_xl as data_utils import pytorch_transformers.tokenization_transfo_xl as data_utils
from pytorch_transformers.modeling_transfo_xl import (CONFIG_NAME,
WEIGHTS_NAME, from pytorch_transformers import CONFIG_NAME, WEIGHTS_NAME
TransfoXLConfig, from pytorch_transformers.modeling_transfo_xl import (TransfoXLConfig, TransfoXLLMHeadModel,
TransfoXLLMHeadModel, load_tf_weights_in_transfo_xl)
load_tf_weights_in_transfo_xl)
from pytorch_transformers.tokenization_transfo_xl import (CORPUS_NAME, VOCAB_FILES_NAMES) from pytorch_transformers.tokenization_transfo_xl import (CORPUS_NAME, VOCAB_FILES_NAMES)
if sys.version_info[0] == 2: if sys.version_info[0] == 2:
......
...@@ -79,7 +79,7 @@ if __name__ == "__main__": ...@@ -79,7 +79,7 @@ if __name__ == "__main__":
default = None, default = None,
type = str, type = str,
required = True, required = True,
help = "Path the TensorFlow checkpoint path.") help = "Path to the TensorFlow checkpoint path.")
parser.add_argument("--xlnet_config_file", parser.add_argument("--xlnet_config_file",
default = None, default = None,
type = str, type = str,
......
...@@ -38,10 +38,13 @@ except ImportError: ...@@ -38,10 +38,13 @@ except ImportError:
try: try:
from pathlib import Path from pathlib import Path
PYTORCH_PRETRAINED_BERT_CACHE = Path( PYTORCH_PRETRAINED_BERT_CACHE = Path(
os.getenv('PYTORCH_PRETRAINED_BERT_CACHE', default_cache_path)) os.getenv('PYTORCH_TRANSFORMERS_CACHE', os.getenv('PYTORCH_PRETRAINED_BERT_CACHE', default_cache_path)))
except (AttributeError, ImportError): except (AttributeError, ImportError):
PYTORCH_PRETRAINED_BERT_CACHE = os.getenv('PYTORCH_PRETRAINED_BERT_CACHE', PYTORCH_PRETRAINED_BERT_CACHE = os.getenv('PYTORCH_TRANSFORMERS_CACHE',
default_cache_path) os.getenv('PYTORCH_PRETRAINED_BERT_CACHE',
default_cache_path))
PYTORCH_TRANSFORMERS_CACHE = PYTORCH_PRETRAINED_BERT_CACHE # Kept for backward compatibility
logger = logging.getLogger(__name__) # pylint: disable=invalid-name logger = logging.getLogger(__name__) # pylint: disable=invalid-name
...@@ -70,7 +73,7 @@ def filename_to_url(filename, cache_dir=None): ...@@ -70,7 +73,7 @@ def filename_to_url(filename, cache_dir=None):
Raise ``EnvironmentError`` if `filename` or its stored metadata do not exist. Raise ``EnvironmentError`` if `filename` or its stored metadata do not exist.
""" """
if cache_dir is None: if cache_dir is None:
cache_dir = PYTORCH_PRETRAINED_BERT_CACHE cache_dir = PYTORCH_TRANSFORMERS_CACHE
if sys.version_info[0] == 3 and isinstance(cache_dir, Path): if sys.version_info[0] == 3 and isinstance(cache_dir, Path):
cache_dir = str(cache_dir) cache_dir = str(cache_dir)
...@@ -98,7 +101,7 @@ def cached_path(url_or_filename, cache_dir=None): ...@@ -98,7 +101,7 @@ def cached_path(url_or_filename, cache_dir=None):
make sure the file exists and then return the path. make sure the file exists and then return the path.
""" """
if cache_dir is None: if cache_dir is None:
cache_dir = PYTORCH_PRETRAINED_BERT_CACHE cache_dir = PYTORCH_TRANSFORMERS_CACHE
if sys.version_info[0] == 3 and isinstance(url_or_filename, Path): if sys.version_info[0] == 3 and isinstance(url_or_filename, Path):
url_or_filename = str(url_or_filename) url_or_filename = str(url_or_filename)
if sys.version_info[0] == 3 and isinstance(cache_dir, Path): if sys.version_info[0] == 3 and isinstance(cache_dir, Path):
...@@ -187,7 +190,7 @@ def get_from_cache(url, cache_dir=None): ...@@ -187,7 +190,7 @@ def get_from_cache(url, cache_dir=None):
If it's not there, download it. Then return the path to the cached file. If it's not there, download it. Then return the path to the cached file.
""" """
if cache_dir is None: if cache_dir is None:
cache_dir = PYTORCH_PRETRAINED_BERT_CACHE cache_dir = PYTORCH_TRANSFORMERS_CACHE
if sys.version_info[0] == 3 and isinstance(cache_dir, Path): if sys.version_info[0] == 3 and isinstance(cache_dir, Path):
cache_dir = str(cache_dir) cache_dir = str(cache_dir)
if sys.version_info[0] == 2 and not isinstance(cache_dir, str): if sys.version_info[0] == 2 and not isinstance(cache_dir, str):
......
# coding=utf-8
# Copyright 2018 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Auto Model class. """
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
import torch
import torch.nn as nn
from torch.nn import CrossEntropyLoss, MSELoss
from torch.nn.parameter import Parameter
from .modeling_bert import BertConfig, BertModel
from .modeling_openai import OpenAIGPTConfig, OpenAIGPTModel
from .modeling_gpt2 import GPT2Config, GPT2Model
from .modeling_transfo_xl import TransfoXLConfig, TransfoXLModel
from .modeling_xlnet import XLNetConfig, XLNetModel
from .modeling_xlm import XLMConfig, XLMModel
from .modeling_utils import PreTrainedModel, SequenceSummary
logger = logging.getLogger(__name__)
class AutoConfig(object):
r""":class:`~pytorch_transformers.AutoConfig` is a generic configuration class
that will be instantiated as one of the configuration classes of the library
when created with the `AutoConfig.from_pretrained(pretrained_model_name_or_path)`
class method.
The `from_pretrained()` method take care of returning the correct model class instance
using pattern matching on the `pretrained_model_name_or_path` string.
The base model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
- contains `bert`: BertConfig (Bert model)
- contains `openai-gpt`: OpenAIGPTConfig (OpenAI GPT model)
- contains `gpt2`: GPT2Config (OpenAI GPT-2 model)
- contains `transfo-xl`: TransfoXLConfig (Transformer-XL model)
- contains `xlnet`: XLNetConfig (XLNet model)
- contains `xlm`: XLMConfig (XLM model)
This class cannot be instantiated using `__init__()` (throw an error).
"""
def __init__(self):
raise EnvironmentError("AutoConfig is designed to be instantiated "
"using the `AutoConfig.from_pretrained(pretrained_model_name_or_path)` method.")
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
r""" Instantiate a one of the configuration classes of the library
from a pre-trained model configuration.
The configuration class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
- contains `bert`: BertConfig (Bert model)
- contains `openai-gpt`: OpenAIGPTConfig (OpenAI GPT model)
- contains `gpt2`: GPT2Config (OpenAI GPT-2 model)
- contains `transfo-xl`: TransfoXLConfig (Transformer-XL model)
- contains `xlnet`: XLNetConfig (XLNet model)
- contains `xlm`: XLMConfig (XLM model)
Params:
**pretrained_model_name_or_path**: either:
- a string with the `shortcut name` of a pre-trained model configuration to load from cache
or download and cache if not already stored in cache (e.g. 'bert-base-uncased').
- a path to a `directory` containing a configuration file saved
using the `save_pretrained(save_directory)` method.
- a path or url to a saved configuration `file`.
**cache_dir**: (`optional`) string:
Path to a directory in which a downloaded pre-trained model
configuration should be cached if the standard cache should not be used.
**return_unused_kwargs**: (`optional`) bool:
- If False, then this function returns just the final configuration object.
- If True, then this functions returns a tuple `(config, unused_kwargs)` where `unused_kwargs`
is a dictionary consisting of the key/value pairs whose keys are not configuration attributes:
ie the part of kwargs which has not been used to update `config` and is otherwise ignored.
**kwargs**: (`optional`) dict:
Dictionary of key/value pairs with which to update the configuration object after loading.
- The values in kwargs of any keys which are configuration attributes will be used
to override the loaded values.
- Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
by the `return_unused_kwargs` keyword parameter.
Examples::
config = AutoConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache.
config = AutoConfig.from_pretrained('./test/bert_saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')`
config = AutoConfig.from_pretrained('./test/bert_saved_model/my_configuration.json')
config = AutoConfig.from_pretrained('bert-base-uncased', output_attention=True, foo=False)
assert config.output_attention == True
config, unused_kwargs = AutoConfig.from_pretrained('bert-base-uncased', output_attention=True,
foo=False, return_unused_kwargs=True)
assert config.output_attention == True
assert unused_kwargs == {'foo': False}
"""
if 'bert' in pretrained_model_name_or_path:
return BertConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
elif 'openai-gpt' in pretrained_model_name_or_path:
return OpenAIGPTConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
elif 'gpt2' in pretrained_model_name_or_path:
return GPT2Config.from_pretrained(pretrained_model_name_or_path, **kwargs)
elif 'transfo-xl' in pretrained_model_name_or_path:
return TransfoXLConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
elif 'xlnet' in pretrained_model_name_or_path:
return XLNetConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
elif 'xlm' in pretrained_model_name_or_path:
return XLMConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
raise ValueError("Unrecognized model identifier in {}. Should contains one of "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'xlm'".format(pretrained_model_name_or_path))
class AutoModel(object):
r"""
:class:`~pytorch_transformers.AutoModel` is a generic model class
that will be instantiated as one of the base model classes of the library
when created with the `AutoModel.from_pretrained(pretrained_model_name_or_path)`
class method.
The `from_pretrained()` method take care of returning the correct model class instance
using pattern matching on the `pretrained_model_name_or_path` string.
The base model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
- contains `bert`: BertConfig (Bert model)
- contains `openai-gpt`: OpenAIGPTConfig (OpenAI GPT model)
- contains `gpt2`: GPT2Config (OpenAI GPT-2 model)
- contains `transfo-xl`: TransfoXLConfig (Transformer-XL model)
- contains `xlnet`: XLNetConfig (XLNet model)
- contains `xlm`: XLMConfig (XLM model)
This class cannot be instantiated using `__init__()` (throw an error).
"""
def __init__(self):
raise EnvironmentError("AutoModel is designed to be instantiated "
"using the `AutoModel.from_pretrained(pretrained_model_name_or_path)` method.")
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
r""" Instantiate a one of the base model classes of the library
from a pre-trained model configuration.
The base model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
- contains `bert`: BertConfig (Bert model)
- contains `openai-gpt`: OpenAIGPTConfig (OpenAI GPT model)
- contains `gpt2`: GPT2Config (OpenAI GPT-2 model)
- contains `transfo-xl`: TransfoXLConfig (Transformer-XL model)
- contains `xlnet`: XLNetConfig (XLNet model)
- contains `xlm`: XLMConfig (XLM model)
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
To train the model, you should first set it back in training mode with `model.train()`
Params:
**pretrained_model_name_or_path**: either:
- a string with the `shortcut name` of a pre-trained model to load from cache
or download and cache if not already stored in cache (e.g. 'bert-base-uncased').
- a path to a `directory` containing a configuration file saved
using the `save_pretrained(save_directory)` method.
- a path or url to a tensorflow index checkpoint `file` (e.g. `./tf_model/model.ckpt.index`).
In this case, ``from_tf`` should be set to True and a configuration object should be
provided as `config` argument. This loading option is slower than converting the TensorFlow
checkpoint in a PyTorch model using the provided conversion scripts and loading
the PyTorch model afterwards.
**model_args**: (`optional`) Sequence:
All remaning positional arguments will be passed to the underlying model's __init__ function
**config**: an optional configuration for the model to use instead of an automatically loaded configuation.
Configuration can be automatically loaded when:
- the model is a model provided by the library (loaded with a `shortcut name` of a pre-trained model), or
- the model was saved using the `save_pretrained(save_directory)` (loaded by suppling the save directory).
**state_dict**: an optional state dictionnary for the model to use instead of a state dictionary loaded
from saved weights file.
This option can be used if you want to create a model from a pretrained configuration but load your own weights.
In this case though, you should check if using `save_pretrained(dir)` and `from_pretrained(save_directory)` is not
a simpler option.
**cache_dir**: (`optional`) string:
Path to a directory in which a downloaded pre-trained model
configuration should be cached if the standard cache should not be used.
**output_loading_info**: (`optional`) boolean:
Set to ``True`` to also return a dictionnary containing missing keys, unexpected keys and error messages.
**kwargs**: (`optional`) dict:
Dictionary of key, values to update the configuration object after loading.
Can be used to override selected configuration parameters. E.g. ``output_attention=True``.
- If a configuration is provided with `config`, **kwargs will be directly passed
to the underlying model's __init__ method.
- If a configuration is not provided, **kwargs will be first passed to the pretrained
model configuration class loading function (`PretrainedConfig.from_pretrained`).
Each key of **kwargs that corresponds to a configuration attribute
will be used to override said attribute with the supplied **kwargs value.
Remaining keys that do not correspond to any configuration attribute will
be passed to the underlying model's __init__ function.
Examples::
model = AutoModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache.
model = AutoModel.from_pretrained('./test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = AutoModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading
assert model.config.output_attention == True
# Loading from a TF checkpoint file instead of a PyTorch model (slower)
config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
model = AutoModel.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
"""
if 'bert' in pretrained_model_name_or_path:
return BertModel.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
elif 'openai-gpt' in pretrained_model_name_or_path:
return OpenAIGPTModel.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
elif 'gpt2' in pretrained_model_name_or_path:
return GPT2Model.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
elif 'transfo-xl' in pretrained_model_name_or_path:
return TransfoXLModel.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
elif 'xlnet' in pretrained_model_name_or_path:
return XLNetModel.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
elif 'xlm' in pretrained_model_name_or_path:
return XLMModel.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
raise ValueError("Unrecognized model identifier in {}. Should contains one of "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'xlm'".format(pretrained_model_name_or_path))
...@@ -222,7 +222,7 @@ class BertConfig(PretrainedConfig): ...@@ -222,7 +222,7 @@ class BertConfig(PretrainedConfig):
try: try:
from apex.normalization.fused_layer_norm import FusedLayerNorm as BertLayerNorm from apex.normalization.fused_layer_norm import FusedLayerNorm as BertLayerNorm
except ImportError: except (ImportError, AttributeError) as e:
logger.info("Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .") logger.info("Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .")
class BertLayerNorm(nn.Module): class BertLayerNorm(nn.Module):
def __init__(self, hidden_size, eps=1e-12): def __init__(self, hidden_size, eps=1e-12):
...@@ -643,12 +643,11 @@ class BertModel(BertPreTrainedModel): ...@@ -643,12 +643,11 @@ class BertModel(BertPreTrainedModel):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertModel.from_pretrained('bert-base-uncased')
>>> model = BertModel(config) input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 outputs = model(input_ids)
>>> outputs = model(input_ids) last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
def __init__(self, config): def __init__(self, config):
...@@ -754,13 +753,11 @@ class BertForPreTraining(BertPreTrainedModel): ...@@ -754,13 +753,11 @@ class BertForPreTraining(BertPreTrainedModel):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertForPreTraining.from_pretrained('bert-base-uncased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> model = BertForPreTraining(config) outputs = model(input_ids)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 prediction_scores, seq_relationship_scores = outputs[:2]
>>> outputs = model(input_ids)
>>> prediction_scores, seq_relationship_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -824,13 +821,11 @@ class BertForMaskedLM(BertPreTrainedModel): ...@@ -824,13 +821,11 @@ class BertForMaskedLM(BertPreTrainedModel):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertForMaskedLM.from_pretrained('bert-base-uncased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> model = BertForMaskedLM(config) outputs = model(input_ids, masked_lm_labels=input_ids)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 loss, prediction_scores = outputs[:2]
>>> outputs = model(input_ids, masked_lm_labels=input_ids)
>>> loss, prediction_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -857,7 +852,7 @@ class BertForMaskedLM(BertPreTrainedModel): ...@@ -857,7 +852,7 @@ class BertForMaskedLM(BertPreTrainedModel):
sequence_output = outputs[0] sequence_output = outputs[0]
prediction_scores = self.cls(sequence_output) prediction_scores = self.cls(sequence_output)
outputs = (prediction_scores,) + outputs[2:] # Add hidden states and attention is they are here outputs = (prediction_scores,) + outputs[2:] # Add hidden states and attention if they are here
if masked_lm_labels is not None: if masked_lm_labels is not None:
loss_fct = CrossEntropyLoss(ignore_index=-1) loss_fct = CrossEntropyLoss(ignore_index=-1)
masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1)) masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1))
...@@ -891,13 +886,11 @@ class BertForNextSentencePrediction(BertPreTrainedModel): ...@@ -891,13 +886,11 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> model = BertForNextSentencePrediction(config) outputs = model(input_ids)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 seq_relationship_scores = outputs[0]
>>> outputs = model(input_ids)
>>> seq_relationship_scores = outputs[0]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -951,14 +944,12 @@ class BertForSequenceClassification(BertPreTrainedModel): ...@@ -951,14 +944,12 @@ class BertForSequenceClassification(BertPreTrainedModel):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> model = BertForSequenceClassification(config) labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 outputs = model(input_ids, labels=labels)
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1 loss, logits = outputs[:2]
>>> outputs = model(input_ids, labels=labels)
>>> loss, logits = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -1057,15 +1048,13 @@ class BertForMultipleChoice(BertPreTrainedModel): ...@@ -1057,15 +1048,13 @@ class BertForMultipleChoice(BertPreTrainedModel):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertForMultipleChoice.from_pretrained('bert-base-uncased')
>>> choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
>>> model = BertForMultipleChoice(config) input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
>>> choices = ["Hello, my dog is cute", "Hello, my cat is amazing"] labels = torch.tensor(1).unsqueeze(0) # Batch size 1
>>> input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices outputs = model(input_ids, labels=labels)
>>> labels = torch.tensor(1).unsqueeze(0) # Batch size 1 loss, classification_scores = outputs[:2]
>>> outputs = model(input_ids, labels=labels)
>>> loss, classification_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -1127,14 +1116,12 @@ class BertForTokenClassification(BertPreTrainedModel): ...@@ -1127,14 +1116,12 @@ class BertForTokenClassification(BertPreTrainedModel):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertForTokenClassification.from_pretrained('bert-base-uncased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> model = BertForTokenClassification(config) labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 outputs = model(input_ids, labels=labels)
>>> labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1 loss, scores = outputs[:2]
>>> outputs = model(input_ids, labels=labels)
>>> loss, scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -1203,15 +1190,13 @@ class BertForQuestionAnswering(BertPreTrainedModel): ...@@ -1203,15 +1190,13 @@ class BertForQuestionAnswering(BertPreTrainedModel):
Examples:: Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> model = BertForQuestionAnswering(config) start_positions = torch.tensor([1])
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 end_positions = torch.tensor([3])
>>> start_positions = torch.tensor([1]) outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
>>> end_positions = torch.tensor([3]) loss, start_scores, end_scores = outputs[:2]
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
>>> loss, start_scores, end_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
......
...@@ -137,7 +137,7 @@ class GPT2Config(PretrainedConfig): ...@@ -137,7 +137,7 @@ class GPT2Config(PretrainedConfig):
initializer_range=0.02, initializer_range=0.02,
num_labels=1, num_labels=1,
summary_type='token_ids', summary_type='cls_index',
summary_use_proj=True, summary_use_proj=True,
summary_activation=None, summary_activation=None,
summary_proj_to_labels=True, summary_proj_to_labels=True,
...@@ -433,12 +433,11 @@ class GPT2Model(GPT2PreTrainedModel): ...@@ -433,12 +433,11 @@ class GPT2Model(GPT2PreTrainedModel):
Examples:: Examples::
>>> config = GPT2Config.from_pretrained('gpt2') tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2') model = GPT2Model.from_pretrained('gpt2')
>>> model = GPT2Model(config) input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 outputs = model(input_ids)
>>> outputs = model(input_ids) last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
def __init__(self, config): def __init__(self, config):
...@@ -567,12 +566,11 @@ class GPT2LMHeadModel(GPT2PreTrainedModel): ...@@ -567,12 +566,11 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
Examples:: Examples::
>>> config = GPT2Config.from_pretrained('gpt2') tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2') model = GPT2LMHeadModel.from_pretrained('gpt2')
>>> model = GPT2LMHeadModel(config) input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 outputs = model(input_ids, labels=input_ids)
>>> outputs = model(input_ids, labels=input_ids) loss, logits = outputs[:2]
>>> loss, logits = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
...@@ -683,14 +681,13 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): ...@@ -683,14 +681,13 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
Examples:: Examples::
>>> config = GPT2Config.from_pretrained('gpt2') tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2') model = GPT2DoubleHeadsModel.from_pretrained('gpt2')
>>> model = GPT2DoubleHeadsModel(config) choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary
>>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
>>> input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1
>>> mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1 outputs = model(input_ids, mc_token_ids)
>>> outputs = model(input_ids, mc_token_ids) lm_prediction_scores, mc_prediction_scores = outputs[:2]
>>> lm_prediction_scores, mc_prediction_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment