Unverified Commit 0a2fecdf authored by Thomas Wolf's avatar Thomas Wolf Committed by GitHub
Browse files

Merge branch 'master' into master

parents 39eb31e1 e0caab0c
...@@ -205,7 +205,7 @@ def main(): ...@@ -205,7 +205,7 @@ def main():
param_optimizer = list(model.named_parameters()) param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [ optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
] ]
optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
......
...@@ -81,7 +81,7 @@ class ExamplesTests(unittest.TestCase): ...@@ -81,7 +81,7 @@ class ExamplesTests(unittest.TestCase):
"--do_train", "--do_train",
"--do_eval", "--do_eval",
"--version_2_with_negative", "--version_2_with_negative",
"--learning_rate=1e-4", "--learning_rate=2e-4",
"--per_gpu_train_batch_size=2", "--per_gpu_train_batch_size=2",
"--per_gpu_eval_batch_size=1", "--per_gpu_eval_batch_size=1",
"--overwrite_output_dir", "--overwrite_output_dir",
......
...@@ -390,10 +390,16 @@ class WnliProcessor(DataProcessor): ...@@ -390,10 +390,16 @@ class WnliProcessor(DataProcessor):
def convert_examples_to_features(examples, label_list, max_seq_length, def convert_examples_to_features(examples, label_list, max_seq_length,
tokenizer, output_mode, tokenizer, output_mode,
cls_token_at_end=False, pad_on_left=False, cls_token_at_end=False,
cls_token='[CLS]', sep_token='[SEP]', pad_token=0, cls_token='[CLS]',
sequence_a_segment_id=0, sequence_b_segment_id=1, cls_token_segment_id=1,
cls_token_segment_id=1, pad_token_segment_id=0, sep_token='[SEP]',
sep_token_extra=False,
pad_on_left=False,
pad_token=0,
pad_token_segment_id=0,
sequence_a_segment_id=0,
sequence_b_segment_id=1,
mask_padding_with_zero=True): mask_padding_with_zero=True):
""" Loads a data file into a list of `InputBatch`s """ Loads a data file into a list of `InputBatch`s
`cls_token_at_end` define the location of the CLS token: `cls_token_at_end` define the location of the CLS token:
...@@ -416,12 +422,14 @@ def convert_examples_to_features(examples, label_list, max_seq_length, ...@@ -416,12 +422,14 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
tokens_b = tokenizer.tokenize(example.text_b) tokens_b = tokenizer.tokenize(example.text_b)
# Modifies `tokens_a` and `tokens_b` in place so that the total # Modifies `tokens_a` and `tokens_b` in place so that the total
# length is less than the specified length. # length is less than the specified length.
# Account for [CLS], [SEP], [SEP] with "- 3" # Account for [CLS], [SEP], [SEP] with "- 3". " -4" for RoBERTa.
_truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3) special_tokens_count = 4 if sep_token_extra else 3
_truncate_seq_pair(tokens_a, tokens_b, max_seq_length - special_tokens_count)
else: else:
# Account for [CLS] and [SEP] with "- 2" # Account for [CLS] and [SEP] with "- 2" and with "- 3" for RoBERTa.
if len(tokens_a) > max_seq_length - 2: special_tokens_count = 3 if sep_token_extra else 2
tokens_a = tokens_a[:(max_seq_length - 2)] if len(tokens_a) > max_seq_length - special_tokens_count:
tokens_a = tokens_a[:(max_seq_length - special_tokens_count)]
# The convention in BERT is: # The convention in BERT is:
# (a) For sequence pairs: # (a) For sequence pairs:
...@@ -442,6 +450,9 @@ def convert_examples_to_features(examples, label_list, max_seq_length, ...@@ -442,6 +450,9 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
# used as as the "sentence vector". Note that this only makes sense because # used as as the "sentence vector". Note that this only makes sense because
# the entire model is fine-tuned. # the entire model is fine-tuned.
tokens = tokens_a + [sep_token] tokens = tokens_a + [sep_token]
if sep_token_extra:
# roberta uses an extra separator b/w pairs of sentences
tokens += [sep_token]
segment_ids = [sequence_a_segment_id] * len(tokens) segment_ids = [sequence_a_segment_id] * len(tokens)
if tokens_b: if tokens_b:
......
...@@ -37,7 +37,7 @@ bert_docstring = """ ...@@ -37,7 +37,7 @@ bert_docstring = """
checkpoint checkpoint
cache_dir: an optional path to a folder in which the pre-trained models cache_dir: an optional path to a folder in which the pre-trained models
will be cached. will be cached.
state_dict: an optional state dictionnary state_dict: an optional state dictionary
(collections.OrderedDict object) to use instead of Google (collections.OrderedDict object) to use instead of Google
pre-trained models pre-trained models
*inputs, **kwargs: additional input for the specific Bert class *inputs, **kwargs: additional input for the specific Bert class
...@@ -84,12 +84,12 @@ def bertTokenizer(*args, **kwargs): ...@@ -84,12 +84,12 @@ def bertTokenizer(*args, **kwargs):
Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"] Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"]
Example: Example:
>>> import torch import torch
>>> sentence = 'Hello, World!' sentence = 'Hello, World!'
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
>>> toks = tokenizer.tokenize(sentence) toks = tokenizer.tokenize(sentence)
['Hello', '##,', 'World', '##!'] ['Hello', '##,', 'World', '##!']
>>> ids = tokenizer.convert_tokens_to_ids(toks) ids = tokenizer.convert_tokens_to_ids(toks)
[8667, 28136, 1291, 28125] [8667, 28136, 1291, 28125]
""" """
tokenizer = BertTokenizer.from_pretrained(*args, **kwargs) tokenizer = BertTokenizer.from_pretrained(*args, **kwargs)
...@@ -105,20 +105,20 @@ def bertModel(*args, **kwargs): ...@@ -105,20 +105,20 @@ def bertModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertModel # Load bertModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
encoded_layers, _ = model(tokens_tensor, segments_tensors) encoded_layers, _ = model(tokens_tensor, segments_tensors)
""" """
model = BertModel.from_pretrained(*args, **kwargs) model = BertModel.from_pretrained(*args, **kwargs)
...@@ -134,20 +134,20 @@ def bertForNextSentencePrediction(*args, **kwargs): ...@@ -134,20 +134,20 @@ def bertForNextSentencePrediction(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForNextSentencePrediction # Load bertForNextSentencePrediction
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased')
>>> model.eval() model.eval()
# Predict the next sentence classification logits # Predict the next sentence classification logits
>>> with torch.no_grad(): with torch.no_grad():
next_sent_classif_logits = model(tokens_tensor, segments_tensors) next_sent_classif_logits = model(tokens_tensor, segments_tensors)
""" """
model = BertForNextSentencePrediction.from_pretrained(*args, **kwargs) model = BertForNextSentencePrediction.from_pretrained(*args, **kwargs)
...@@ -164,17 +164,17 @@ def bertForPreTraining(*args, **kwargs): ...@@ -164,17 +164,17 @@ def bertForPreTraining(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForPreTraining # Load bertForPreTraining
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased')
>>> masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors) masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors)
""" """
model = BertForPreTraining.from_pretrained(*args, **kwargs) model = BertForPreTraining.from_pretrained(*args, **kwargs)
return model return model
...@@ -188,25 +188,25 @@ def bertForMaskedLM(*args, **kwargs): ...@@ -188,25 +188,25 @@ def bertForMaskedLM(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> masked_index = 8 masked_index = 8
>>> tokenized_text[masked_index] = '[MASK]' tokenized_text[masked_index] = '[MASK]'
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForMaskedLM # Load bertForMaskedLM
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased')
>>> model.eval() model.eval()
# Predict all tokens # Predict all tokens
>>> with torch.no_grad(): with torch.no_grad():
predictions = model(tokens_tensor, segments_tensors) predictions = model(tokens_tensor, segments_tensors)
>>> predicted_index = torch.argmax(predictions[0, masked_index]).item() predicted_index = torch.argmax(predictions[0, masked_index]).item()
>>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'henson' 'henson'
""" """
model = BertForMaskedLM.from_pretrained(*args, **kwargs) model = BertForMaskedLM.from_pretrained(*args, **kwargs)
...@@ -230,24 +230,24 @@ def bertForSequenceClassification(*args, **kwargs): ...@@ -230,24 +230,24 @@ def bertForSequenceClassification(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForSequenceClassification # Load bertForSequenceClassification
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2) model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2)
>>> model.eval() model.eval()
# Predict the sequence classification logits # Predict the sequence classification logits
>>> with torch.no_grad(): with torch.no_grad():
seq_classif_logits = model(tokens_tensor, segments_tensors) seq_classif_logits = model(tokens_tensor, segments_tensors)
# Or get the sequence classification loss # Or get the sequence classification loss
>>> labels = torch.tensor([1]) labels = torch.tensor([1])
>>> seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
""" """
model = BertForSequenceClassification.from_pretrained(*args, **kwargs) model = BertForSequenceClassification.from_pretrained(*args, **kwargs)
return model return model
...@@ -265,24 +265,24 @@ def bertForMultipleChoice(*args, **kwargs): ...@@ -265,24 +265,24 @@ def bertForMultipleChoice(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0) tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0)
>>> segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0) segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0)
# Load bertForMultipleChoice # Load bertForMultipleChoice
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2) model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2)
>>> model.eval() model.eval()
# Predict the multiple choice logits # Predict the multiple choice logits
>>> with torch.no_grad(): with torch.no_grad():
multiple_choice_logits = model(tokens_tensor, segments_tensors) multiple_choice_logits = model(tokens_tensor, segments_tensors)
# Or get the multiple choice loss # Or get the multiple choice loss
>>> labels = torch.tensor([1]) labels = torch.tensor([1])
>>> multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
""" """
model = BertForMultipleChoice.from_pretrained(*args, **kwargs) model = BertForMultipleChoice.from_pretrained(*args, **kwargs)
return model return model
...@@ -298,25 +298,25 @@ def bertForQuestionAnswering(*args, **kwargs): ...@@ -298,25 +298,25 @@ def bertForQuestionAnswering(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForQuestionAnswering # Load bertForQuestionAnswering
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased')
>>> model.eval() model.eval()
# Predict the start and end positions logits # Predict the start and end positions logits
>>> with torch.no_grad(): with torch.no_grad():
start_logits, end_logits = model(tokens_tensor, segments_tensors) start_logits, end_logits = model(tokens_tensor, segments_tensors)
# Or get the total loss which is the sum of the CrossEntropy loss for the start and end token positions # Or get the total loss which is the sum of the CrossEntropy loss for the start and end token positions
>>> start_positions, end_positions = torch.tensor([12]), torch.tensor([14]) start_positions, end_positions = torch.tensor([12]), torch.tensor([14])
# set model.train() before if training this loss # set model.train() before if training this loss
>>> multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions) multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions)
""" """
model = BertForQuestionAnswering.from_pretrained(*args, **kwargs) model = BertForQuestionAnswering.from_pretrained(*args, **kwargs)
return model return model
...@@ -337,24 +337,24 @@ def bertForTokenClassification(*args, **kwargs): ...@@ -337,24 +337,24 @@ def bertForTokenClassification(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
>>> segments_tensors = torch.tensor([segments_ids]) segments_tensors = torch.tensor([segments_ids])
# Load bertForTokenClassification # Load bertForTokenClassification
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2) model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2)
>>> model.eval() model.eval()
# Predict the token classification logits # Predict the token classification logits
>>> with torch.no_grad(): with torch.no_grad():
classif_logits = model(tokens_tensor, segments_tensors) classif_logits = model(tokens_tensor, segments_tensors)
# Or get the token classification loss # Or get the token classification loss
>>> labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]]) labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]])
>>> classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
""" """
model = BertForTokenClassification.from_pretrained(*args, **kwargs) model = BertForTokenClassification.from_pretrained(*args, **kwargs)
return model return model
...@@ -52,11 +52,11 @@ def gpt2Tokenizer(*args, **kwargs): ...@@ -52,11 +52,11 @@ def gpt2Tokenizer(*args, **kwargs):
Default: None Default: None
Example: Example:
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
>>> text = "Who was Jim Henson ?" text = "Who was Jim Henson ?"
>>> indexed_tokens = tokenizer.encode(tokenized_text) indexed_tokens = tokenizer.encode(tokenized_text)
""" """
tokenizer = GPT2Tokenizer.from_pretrained(*args, **kwargs) tokenizer = GPT2Tokenizer.from_pretrained(*args, **kwargs)
return tokenizer return tokenizer
...@@ -71,24 +71,24 @@ def gpt2Model(*args, **kwargs): ...@@ -71,24 +71,24 @@ def gpt2Model(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2Model # Load gpt2Model
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2') model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions # past can be used to reuse precomputed hidden state in a subsequent predictions
>>> with torch.no_grad(): with torch.no_grad():
hidden_states_1, past = model(tokens_tensor_1) hidden_states_1, past = model(tokens_tensor_1)
hidden_states_2, past = model(tokens_tensor_2, past=past) hidden_states_2, past = model(tokens_tensor_2, past=past)
""" """
...@@ -104,31 +104,31 @@ def gpt2LMHeadModel(*args, **kwargs): ...@@ -104,31 +104,31 @@ def gpt2LMHeadModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2LMHeadModel # Load gpt2LMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2') model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions # past can be used to reuse precomputed hidden state in a subsequent predictions
>>> with torch.no_grad(): with torch.no_grad():
predictions_1, past = model(tokens_tensor_1) predictions_1, past = model(tokens_tensor_1)
predictions_2, past = model(tokens_tensor_2, past=past) predictions_2, past = model(tokens_tensor_2, past=past)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.decode([predicted_index]) predicted_token = tokenizer.decode([predicted_index])
>>> assert predicted_token == ' who' assert predicted_token == ' who'
""" """
model = GPT2LMHeadModel.from_pretrained(*args, **kwargs) model = GPT2LMHeadModel.from_pretrained(*args, **kwargs)
return model return model
...@@ -143,25 +143,25 @@ def gpt2DoubleHeadsModel(*args, **kwargs): ...@@ -143,25 +143,25 @@ def gpt2DoubleHeadsModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input # Prepare tokenized input
>>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>> tokenized_text1 = tokenizer.tokenize(text1) tokenized_text1 = tokenizer.tokenize(text1)
>>> tokenized_text2 = tokenizer.tokenize(text2) tokenized_text2 = tokenizer.tokenize(text2)
>>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load gpt2DoubleHeadsModel # Load gpt2DoubleHeadsModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2') model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
lm_logits, multiple_choice_logits, presents = model(tokens_tensor, mc_token_ids) lm_logits, multiple_choice_logits, presents = model(tokens_tensor, mc_token_ids)
""" """
model = GPT2DoubleHeadsModel.from_pretrained(*args, **kwargs) model = GPT2DoubleHeadsModel.from_pretrained(*args, **kwargs)
......
...@@ -40,7 +40,7 @@ gpt_docstring = """ ...@@ -40,7 +40,7 @@ gpt_docstring = """
. a series of NumPy files containing OpenAI TensorFlow trained weights . a series of NumPy files containing OpenAI TensorFlow trained weights
from_tf: should we load the weights from a locally saved TensorFlow checkpoint from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached. cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionnary (collections.OrderedDict object) state_dict: an optional state dictionary (collections.OrderedDict object)
to use instead of pre-trained models to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific OpenAI-GPT class *inputs, **kwargs: additional input for the specific OpenAI-GPT class
""" """
...@@ -76,12 +76,12 @@ def openAIGPTTokenizer(*args, **kwargs): ...@@ -76,12 +76,12 @@ def openAIGPTTokenizer(*args, **kwargs):
Default: None Default: None
Example: Example:
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
>>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
[763, 509, 4265, 2298, 945, 257, 4265, 2298, 945, 509, 246, 10148, 39041, 483] [763, 509, 4265, 2298, 945, 257, 4265, 2298, 945, 509, 246, 10148, 39041, 483]
""" """
tokenizer = OpenAIGPTTokenizer.from_pretrained(*args, **kwargs) tokenizer = OpenAIGPTTokenizer.from_pretrained(*args, **kwargs)
...@@ -97,21 +97,21 @@ def openAIGPTModel(*args, **kwargs): ...@@ -97,21 +97,21 @@ def openAIGPTModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input # Prepare tokenized input
>>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
# Load openAIGPTModel # Load openAIGPTModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt') model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
hidden_states = model(tokens_tensor) hidden_states = model(tokens_tensor)
""" """
model = OpenAIGPTModel.from_pretrained(*args, **kwargs) model = OpenAIGPTModel.from_pretrained(*args, **kwargs)
...@@ -126,26 +126,26 @@ def openAIGPTLMHeadModel(*args, **kwargs): ...@@ -126,26 +126,26 @@ def openAIGPTLMHeadModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input # Prepare tokenized input
>>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> tokenized_text = tokenizer.tokenize(text) tokenized_text = tokenizer.tokenize(text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = torch.tensor([indexed_tokens])
# Load openAIGPTLMHeadModel # Load openAIGPTLMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt') model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
predictions = model(tokens_tensor) predictions = model(tokens_tensor)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions[0, -1, :]).item() predicted_index = torch.argmax(predictions[0, -1, :]).item()
>>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'.</w>' '.</w>'
""" """
model = OpenAIGPTLMHeadModel.from_pretrained(*args, **kwargs) model = OpenAIGPTLMHeadModel.from_pretrained(*args, **kwargs)
...@@ -161,25 +161,25 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs): ...@@ -161,25 +161,25 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input # Prepare tokenized input
>>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>> tokenized_text1 = tokenizer.tokenize(text1) tokenized_text1 = tokenizer.tokenize(text1)
>>> tokenized_text2 = tokenizer.tokenize(text2) tokenized_text2 = tokenizer.tokenize(text2)
>>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load openAIGPTDoubleHeadsModel # Load openAIGPTDoubleHeadsModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt') model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
lm_logits, multiple_choice_logits = model(tokens_tensor, mc_token_ids) lm_logits, multiple_choice_logits = model(tokens_tensor, mc_token_ids)
""" """
model = OpenAIGPTDoubleHeadsModel.from_pretrained(*args, **kwargs) model = OpenAIGPTDoubleHeadsModel.from_pretrained(*args, **kwargs)
......
...@@ -23,7 +23,7 @@ transformer_xl_docstring = """ ...@@ -23,7 +23,7 @@ transformer_xl_docstring = """
. `model.chkpt` a TensorFlow checkpoint . `model.chkpt` a TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow checkpoint from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached. cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of pre-trained models state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific TransformerXL class *inputs, **kwargs: additional input for the specific TransformerXL class
""" """
...@@ -45,12 +45,12 @@ def transformerXLTokenizer(*args, **kwargs): ...@@ -45,12 +45,12 @@ def transformerXLTokenizer(*args, **kwargs):
* transfo-xl-wt103 * transfo-xl-wt103
Example: Example:
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
>>> text = "Who was Jim Henson ?" text = "Who was Jim Henson ?"
>>> tokenized_text = tokenizer.tokenize(tokenized_text) tokenized_text = tokenizer.tokenize(tokenized_text)
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
""" """
tokenizer = TransfoXLTokenizer.from_pretrained(*args, **kwargs) tokenizer = TransfoXLTokenizer.from_pretrained(*args, **kwargs)
return tokenizer return tokenizer
...@@ -63,26 +63,26 @@ def transformerXLModel(*args, **kwargs): ...@@ -63,26 +63,26 @@ def transformerXLModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> tokenized_text_1 = tokenizer.tokenize(text_1) tokenized_text_1 = tokenizer.tokenize(text_1)
>>> tokenized_text_2 = tokenizer.tokenize(text_2) tokenized_text_2 = tokenizer.tokenize(text_2)
>>> indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1) indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>> indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2) indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLModel # Load transformerXLModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103') model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context # We can re-use the memory cells in a subsequent call to attend a longer context
>>> with torch.no_grad(): with torch.no_grad():
hidden_states_1, mems_1 = model(tokens_tensor_1) hidden_states_1, mems_1 = model(tokens_tensor_1)
hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1) hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
""" """
...@@ -98,33 +98,33 @@ def transformerXLLMHeadModel(*args, **kwargs): ...@@ -98,33 +98,33 @@ def transformerXLLMHeadModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> tokenized_text_1 = tokenizer.tokenize(text_1) tokenized_text_1 = tokenizer.tokenize(text_1)
>>> tokenized_text_2 = tokenizer.tokenize(text_2) tokenized_text_2 = tokenizer.tokenize(text_2)
>>> indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1) indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>> indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2) indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLLMHeadModel # Load transformerXLLMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103') model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context # We can re-use the memory cells in a subsequent call to attend a longer context
>>> with torch.no_grad(): with torch.no_grad():
predictions_1, mems_1 = model(tokens_tensor_1) predictions_1, mems_1 = model(tokens_tensor_1)
predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1) predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0] predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
>>> assert predicted_token == 'who' assert predicted_token == 'who'
""" """
model = TransfoXLLMHeadModel.from_pretrained(*args, **kwargs) model = TransfoXLLMHeadModel.from_pretrained(*args, **kwargs)
return model return model
...@@ -17,16 +17,16 @@ xlm_start_docstring = """ ...@@ -17,16 +17,16 @@ xlm_start_docstring = """
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
""" """
# A lot of models share the same param doc. Use a decorator # A lot of models share the same param doc. Use a decorator
...@@ -76,11 +76,11 @@ def xlmTokenizer(*args, **kwargs): ...@@ -76,11 +76,11 @@ def xlmTokenizer(*args, **kwargs):
Default: None Default: None
Example: Example:
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
>>> text = "Who was Jim Henson ?" text = "Who was Jim Henson ?"
>>> indexed_tokens = tokenizer.encode(tokenized_text) indexed_tokens = tokenizer.encode(tokenized_text)
""" """
tokenizer = XLMTokenizer.from_pretrained(*args, **kwargs) tokenizer = XLMTokenizer.from_pretrained(*args, **kwargs)
return tokenizer return tokenizer
...@@ -91,11 +91,11 @@ def xlmTokenizer(*args, **kwargs): ...@@ -91,11 +91,11 @@ def xlmTokenizer(*args, **kwargs):
def xlmModel(*args, **kwargs): def xlmModel(*args, **kwargs):
""" """
# Load xlmModel # Load xlmModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048') model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
hidden_states_1, mems = model(tokens_tensor_1) hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_2, mems = model(tokens_tensor_2, past=mems) hidden_states_2, mems = model(tokens_tensor_2, past=mems)
""" """
...@@ -108,26 +108,26 @@ def xlmModel(*args, **kwargs): ...@@ -108,26 +108,26 @@ def xlmModel(*args, **kwargs):
def xlmLMHeadModel(*args, **kwargs): def xlmLMHeadModel(*args, **kwargs):
""" """
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetLMHeadModel # Load xlnetLMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048') model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
predictions_1, mems = model(tokens_tensor_1) predictions_1, mems = model(tokens_tensor_1)
predictions_2, mems = model(tokens_tensor_2, mems=mems) predictions_2, mems = model(tokens_tensor_2, mems=mems)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.decode([predicted_index]) predicted_token = tokenizer.decode([predicted_index])
>>> assert predicted_token == ' who' assert predicted_token == ' who'
""" """
model = XLMWithLMHeadModel.from_pretrained(*args, **kwargs) model = XLMWithLMHeadModel.from_pretrained(*args, **kwargs)
return model return model
...@@ -142,25 +142,25 @@ def xlmLMHeadModel(*args, **kwargs): ...@@ -142,25 +142,25 @@ def xlmLMHeadModel(*args, **kwargs):
# Example: # Example:
# # Load the tokenizer # # Load the tokenizer
# >>> import torch # import torch
# >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048') # tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048')
# # Prepare tokenized input # # Prepare tokenized input
# >>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" # text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
# >>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" # text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
# >>> tokenized_text1 = tokenizer.tokenize(text1) # tokenized_text1 = tokenizer.tokenize(text1)
# >>> tokenized_text2 = tokenizer.tokenize(text2) # tokenized_text2 = tokenizer.tokenize(text2)
# >>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) # indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
# >>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) # indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
# >>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) # tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
# >>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) # mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# # Load xlnetForSequenceClassification # # Load xlnetForSequenceClassification
# >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048') # model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048')
# >>> model.eval() # model.eval()
# # Predict sequence classes logits # # Predict sequence classes logits
# >>> with torch.no_grad(): # with torch.no_grad():
# lm_logits, mems = model(tokens_tensor) # lm_logits, mems = model(tokens_tensor)
# """ # """
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs) # model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
......
...@@ -53,11 +53,11 @@ def xlnetTokenizer(*args, **kwargs): ...@@ -53,11 +53,11 @@ def xlnetTokenizer(*args, **kwargs):
Default: None Default: None
Example: Example:
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
>>> text = "Who was Jim Henson ?" text = "Who was Jim Henson ?"
>>> indexed_tokens = tokenizer.encode(tokenized_text) indexed_tokens = tokenizer.encode(tokenized_text)
""" """
tokenizer = XLNetTokenizer.from_pretrained(*args, **kwargs) tokenizer = XLNetTokenizer.from_pretrained(*args, **kwargs)
return tokenizer return tokenizer
...@@ -72,23 +72,23 @@ def xlnetModel(*args, **kwargs): ...@@ -72,23 +72,23 @@ def xlnetModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetModel # Load xlnetModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
hidden_states_1, mems = model(tokens_tensor_1) hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_2, mems = model(tokens_tensor_2, past=mems) hidden_states_2, mems = model(tokens_tensor_2, past=mems)
""" """
...@@ -106,30 +106,30 @@ def xlnetLMHeadModel(*args, **kwargs): ...@@ -106,30 +106,30 @@ def xlnetLMHeadModel(*args, **kwargs):
Example: Example:
# Load the tokenizer # Load the tokenizer
>>> import torch import torch
>>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# Prepare tokenized input # Prepare tokenized input
>>> text_1 = "Who was Jim Henson ?" text_1 = "Who was Jim Henson ?"
>>> text_2 = "Jim Henson was a puppeteer" text_2 = "Jim Henson was a puppeteer"
>>> indexed_tokens_1 = tokenizer.encode(text_1) indexed_tokens_1 = tokenizer.encode(text_1)
>>> indexed_tokens_2 = tokenizer.encode(text_2) indexed_tokens_2 = tokenizer.encode(text_2)
>>> tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>> tokens_tensor_2 = torch.tensor([indexed_tokens_2]) tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetLMHeadModel # Load xlnetLMHeadModel
>>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased') model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased')
>>> model.eval() model.eval()
# Predict hidden states features for each layer # Predict hidden states features for each layer
>>> with torch.no_grad(): with torch.no_grad():
predictions_1, mems = model(tokens_tensor_1) predictions_1, mems = model(tokens_tensor_1)
predictions_2, mems = model(tokens_tensor_2, mems=mems) predictions_2, mems = model(tokens_tensor_2, mems=mems)
# Get the predicted last token # Get the predicted last token
>>> predicted_index = torch.argmax(predictions_2[0, -1, :]).item() predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>> predicted_token = tokenizer.decode([predicted_index]) predicted_token = tokenizer.decode([predicted_index])
>>> assert predicted_token == ' who' assert predicted_token == ' who'
""" """
model = XLNetLMHeadModel.from_pretrained(*args, **kwargs) model = XLNetLMHeadModel.from_pretrained(*args, **kwargs)
return model return model
...@@ -144,25 +144,25 @@ def xlnetLMHeadModel(*args, **kwargs): ...@@ -144,25 +144,25 @@ def xlnetLMHeadModel(*args, **kwargs):
# Example: # Example:
# # Load the tokenizer # # Load the tokenizer
# >>> import torch # import torch
# >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased') # tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# # Prepare tokenized input # # Prepare tokenized input
# >>> text1 = "Who was Jim Henson ? Jim Henson was a puppeteer" # text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
# >>> text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man" # text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
# >>> tokenized_text1 = tokenizer.tokenize(text1) # tokenized_text1 = tokenizer.tokenize(text1)
# >>> tokenized_text2 = tokenizer.tokenize(text2) # tokenized_text2 = tokenizer.tokenize(text2)
# >>> indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1) # indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
# >>> indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2) # indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
# >>> tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]]) # tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
# >>> mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]]) # mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# # Load xlnetForSequenceClassification # # Load xlnetForSequenceClassification
# >>> model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased') # model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased')
# >>> model.eval() # model.eval()
# # Predict sequence classes logits # # Predict sequence classes logits
# >>> with torch.no_grad(): # with torch.no_grad():
# lm_logits, mems = model(tokens_tensor) # lm_logits, mems = model(tokens_tensor)
# """ # """
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs) # model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
......
__version__ = "1.0.0" __version__ = "1.1.0"
from .tokenization_auto import AutoTokenizer
from .tokenization_bert import BertTokenizer, BasicTokenizer, WordpieceTokenizer from .tokenization_bert import BertTokenizer, BasicTokenizer, WordpieceTokenizer
from .tokenization_openai import OpenAIGPTTokenizer from .tokenization_openai import OpenAIGPTTokenizer
from .tokenization_transfo_xl import (TransfoXLTokenizer, TransfoXLCorpus) from .tokenization_transfo_xl import (TransfoXLTokenizer, TransfoXLCorpus)
from .tokenization_gpt2 import GPT2Tokenizer from .tokenization_gpt2 import GPT2Tokenizer
from .tokenization_xlnet import XLNetTokenizer, SPIECE_UNDERLINE from .tokenization_xlnet import XLNetTokenizer, SPIECE_UNDERLINE
from .tokenization_xlm import XLMTokenizer from .tokenization_xlm import XLMTokenizer
from .tokenization_utils import (PreTrainedTokenizer, clean_up_tokenization) from .tokenization_roberta import RobertaTokenizer
from .tokenization_distilbert import DistilBertTokenizer
from .modeling_bert import (BertConfig, BertModel, BertForPreTraining, from .tokenization_utils import (PreTrainedTokenizer)
BertForMaskedLM, BertForNextSentencePrediction,
BertForSequenceClassification, BertForMultipleChoice, from .modeling_auto import (AutoConfig, AutoModel)
BertForTokenClassification, BertForQuestionAnswering,
load_tf_weights_in_bert, BERT_PRETRAINED_MODEL_ARCHIVE_MAP, from .modeling_bert import (BertConfig, BertPreTrainedModel, BertModel, BertForPreTraining,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP) BertForMaskedLM, BertForNextSentencePrediction,
from .modeling_openai import (OpenAIGPTConfig, OpenAIGPTModel, BertForSequenceClassification, BertForMultipleChoice,
BertForTokenClassification, BertForQuestionAnswering,
load_tf_weights_in_bert, BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP)
from .modeling_openai import (OpenAIGPTConfig, OpenAIGPTPreTrainedModel, OpenAIGPTModel,
OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel, OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel,
load_tf_weights_in_openai_gpt, OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP, load_tf_weights_in_openai_gpt, OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP,
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP) OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_transfo_xl import (TransfoXLConfig, TransfoXLModel, TransfoXLLMHeadModel, from .modeling_transfo_xl import (TransfoXLConfig, TransfoXLPreTrainedModel, TransfoXLModel, TransfoXLLMHeadModel,
load_tf_weights_in_transfo_xl, TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP, load_tf_weights_in_transfo_xl, TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP,
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP) TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_gpt2 import (GPT2Config, GPT2Model, from .modeling_gpt2 import (GPT2Config, GPT2PreTrainedModel, GPT2Model,
GPT2LMHeadModel, GPT2DoubleHeadsModel, GPT2LMHeadModel, GPT2DoubleHeadsModel,
load_tf_weights_in_gpt2, GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, load_tf_weights_in_gpt2, GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP) GPT2_PRETRAINED_MODEL_ARCHIVE_MAP)
...@@ -29,14 +35,19 @@ from .modeling_xlnet import (XLNetConfig, ...@@ -29,14 +35,19 @@ from .modeling_xlnet import (XLNetConfig,
XLNetForSequenceClassification, XLNetForQuestionAnswering, XLNetForSequenceClassification, XLNetForQuestionAnswering,
load_tf_weights_in_xlnet, XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP, load_tf_weights_in_xlnet, XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP) XLNET_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_xlm import (XLMConfig, XLMModel, from .modeling_xlm import (XLMConfig, XLMPreTrainedModel , XLMModel,
XLMWithLMHeadModel, XLMForSequenceClassification, XLMWithLMHeadModel, XLMForSequenceClassification,
XLMForQuestionAnswering, XLM_PRETRAINED_CONFIG_ARCHIVE_MAP, XLMForQuestionAnswering, XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP) XLM_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_roberta import (RobertaConfig, RobertaForMaskedLM, RobertaModel, RobertaForSequenceClassification,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_distilbert import (DistilBertConfig, DistilBertForMaskedLM, DistilBertModel,
DistilBertForSequenceClassification, DistilBertForQuestionAnswering,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_utils import (WEIGHTS_NAME, CONFIG_NAME, TF_WEIGHTS_NAME, from .modeling_utils import (WEIGHTS_NAME, CONFIG_NAME, TF_WEIGHTS_NAME,
PretrainedConfig, PreTrainedModel, prune_layer, Conv1D) PretrainedConfig, PreTrainedModel, prune_layer, Conv1D)
from .optimization import (AdamW, ConstantLRSchedule, WarmupConstantSchedule, WarmupCosineSchedule, from .optimization import (AdamW, ConstantLRSchedule, WarmupConstantSchedule, WarmupCosineSchedule,
WarmupCosineWithHardRestartsSchedule, WarmupLinearSchedule) WarmupCosineWithHardRestartsSchedule, WarmupLinearSchedule)
from .file_utils import (PYTORCH_PRETRAINED_BERT_CACHE, cached_path) from .file_utils import (PYTORCH_TRANSFORMERS_CACHE, PYTORCH_PRETRAINED_BERT_CACHE, cached_path)
...@@ -35,7 +35,7 @@ def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config_file, p ...@@ -35,7 +35,7 @@ def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config_file, p
if gpt2_config_file == "": if gpt2_config_file == "":
config = GPT2Config() config = GPT2Config()
else: else:
config = GPT2Config(gpt2_config_file) config = GPT2Config.from_json_file(gpt2_config_file)
model = GPT2Model(config) model = GPT2Model(config)
# Load weights from numpy # Load weights from numpy
...@@ -58,7 +58,7 @@ if __name__ == "__main__": ...@@ -58,7 +58,7 @@ if __name__ == "__main__":
default = None, default = None,
type = str, type = str,
required = True, required = True,
help = "Path the TensorFlow checkpoint path.") help = "Path to the TensorFlow checkpoint path.")
parser.add_argument("--pytorch_dump_folder_path", parser.add_argument("--pytorch_dump_folder_path",
default = None, default = None,
type = str, type = str,
......
...@@ -35,7 +35,7 @@ def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, openai_c ...@@ -35,7 +35,7 @@ def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, openai_c
if openai_config_file == "": if openai_config_file == "":
config = OpenAIGPTConfig() config = OpenAIGPTConfig()
else: else:
config = OpenAIGPTConfig(openai_config_file) config = OpenAIGPTConfig.from_json_file(openai_config_file)
model = OpenAIGPTModel(config) model = OpenAIGPTModel(config)
# Load weights from numpy # Load weights from numpy
...@@ -58,7 +58,7 @@ if __name__ == "__main__": ...@@ -58,7 +58,7 @@ if __name__ == "__main__":
default = None, default = None,
type = str, type = str,
required = True, required = True,
help = "Path the TensorFlow checkpoint path.") help = "Path to the TensorFlow checkpoint path.")
parser.add_argument("--pytorch_dump_folder_path", parser.add_argument("--pytorch_dump_folder_path",
default = None, default = None,
type = str, type = str,
......
This diff is collapsed.
...@@ -47,7 +47,7 @@ if __name__ == "__main__": ...@@ -47,7 +47,7 @@ if __name__ == "__main__":
default = None, default = None,
type = str, type = str,
required = True, required = True,
help = "Path the TensorFlow checkpoint path.") help = "Path to the TensorFlow checkpoint path.")
parser.add_argument("--bert_config_file", parser.add_argument("--bert_config_file",
default = None, default = None,
type = str, type = str,
......
...@@ -24,11 +24,10 @@ from io import open ...@@ -24,11 +24,10 @@ from io import open
import torch import torch
import pytorch_transformers.tokenization_transfo_xl as data_utils import pytorch_transformers.tokenization_transfo_xl as data_utils
from pytorch_transformers.modeling_transfo_xl import (CONFIG_NAME,
WEIGHTS_NAME, from pytorch_transformers import CONFIG_NAME, WEIGHTS_NAME
TransfoXLConfig, from pytorch_transformers.modeling_transfo_xl import (TransfoXLConfig, TransfoXLLMHeadModel,
TransfoXLLMHeadModel, load_tf_weights_in_transfo_xl)
load_tf_weights_in_transfo_xl)
from pytorch_transformers.tokenization_transfo_xl import (CORPUS_NAME, VOCAB_FILES_NAMES) from pytorch_transformers.tokenization_transfo_xl import (CORPUS_NAME, VOCAB_FILES_NAMES)
if sys.version_info[0] == 2: if sys.version_info[0] == 2:
...@@ -76,7 +75,7 @@ def convert_transfo_xl_checkpoint_to_pytorch(tf_checkpoint_path, ...@@ -76,7 +75,7 @@ def convert_transfo_xl_checkpoint_to_pytorch(tf_checkpoint_path,
if transfo_xl_config_file == "": if transfo_xl_config_file == "":
config = TransfoXLConfig() config = TransfoXLConfig()
else: else:
config = TransfoXLConfig(transfo_xl_config_file) config = TransfoXLConfig.from_json_file(transfo_xl_config_file)
print("Building PyTorch model from configuration: {}".format(str(config))) print("Building PyTorch model from configuration: {}".format(str(config)))
model = TransfoXLLMHeadModel(config) model = TransfoXLLMHeadModel(config)
......
...@@ -36,7 +36,7 @@ def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_p ...@@ -36,7 +36,7 @@ def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_p
model = chkpt['model'] model = chkpt['model']
config = chkpt['params'] config = chkpt['params']
config = dict((n, v) for n, v in config.items() if not isinstance(v, (torch.Tensor, numpy.ndarray))) config = dict((n, v) for n, v in config.items() if not isinstance(v, (torch.FloatTensor, numpy.ndarray)))
vocab = chkpt['dico_word2id'] vocab = chkpt['dico_word2id']
vocab = dict((s + '</w>' if s.find('@@') == -1 and i > 13 else s.replace('@@', ''), i) for s, i in vocab.items()) vocab = dict((s + '</w>' if s.find('@@') == -1 and i > 13 else s.replace('@@', ''), i) for s, i in vocab.items())
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment