Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
0a2fecdf
"macapp/src/index.ts" did not exist on "832b4db9d4baf22497145dde55f334b292ed665f"
Unverified
Commit
0a2fecdf
authored
Aug 30, 2019
by
Thomas Wolf
Committed by
GitHub
Aug 30, 2019
Browse files
Merge branch 'master' into master
parents
39eb31e1
e0caab0c
Changes
98
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
841 additions
and
382 deletions
+841
-382
examples/single_model_scripts/run_openai_gpt.py
examples/single_model_scripts/run_openai_gpt.py
+1
-1
examples/test_examples.py
examples/test_examples.py
+1
-1
examples/utils_glue.py
examples/utils_glue.py
+20
-9
hubconfs/bert_hubconf.py
hubconfs/bert_hubconf.py
+104
-104
hubconfs/gpt2_hubconf.py
hubconfs/gpt2_hubconf.py
+42
-42
hubconfs/gpt_hubconf.py
hubconfs/gpt_hubconf.py
+39
-39
hubconfs/transformer_xl_hubconf.py
hubconfs/transformer_xl_hubconf.py
+35
-35
hubconfs/xlm_hubconf.py
hubconfs/xlm_hubconf.py
+40
-40
hubconfs/xlnet_hubconf.1.py
hubconfs/xlnet_hubconf.1.py
+42
-42
pytorch_transformers/__init__.py
pytorch_transformers/__init__.py
+24
-13
pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py
pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py
+2
-2
pytorch_transformers/convert_openai_checkpoint_to_pytorch.py
pytorch_transformers/convert_openai_checkpoint_to_pytorch.py
+2
-2
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
+21
-21
pytorch_transformers/convert_roberta_checkpoint_to_pytorch.py
...rch_transformers/convert_roberta_checkpoint_to_pytorch.py
+181
-0
pytorch_transformers/convert_tf_checkpoint_to_pytorch.py
pytorch_transformers/convert_tf_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/convert_transfo_xl_checkpoint_to_pytorch.py
..._transformers/convert_transfo_xl_checkpoint_to_pytorch.py
+5
-6
pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py
pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/file_utils.py
pytorch_transformers/file_utils.py
+29
-22
pytorch_transformers/modeling_auto.py
pytorch_transformers/modeling_auto.py
+250
-0
No files found.
examples/single_model_scripts/run_openai_gpt.py
View file @
0a2fecdf
...
...
@@ -205,7 +205,7 @@ def main():
param_optimizer
=
list
(
model
.
named_parameters
())
no_decay
=
[
'bias'
,
'LayerNorm.bias'
,
'LayerNorm.weight'
]
optimizer_grouped_parameters
=
[
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
not
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.01
},
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
not
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
args
.
weight_decay
},
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
]
optimizer
=
AdamW
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
eps
=
args
.
adam_epsilon
)
...
...
examples/test_examples.py
View file @
0a2fecdf
...
...
@@ -81,7 +81,7 @@ class ExamplesTests(unittest.TestCase):
"--do_train"
,
"--do_eval"
,
"--version_2_with_negative"
,
"--learning_rate=
1
e-4"
,
"--learning_rate=
2
e-4"
,
"--per_gpu_train_batch_size=2"
,
"--per_gpu_eval_batch_size=1"
,
"--overwrite_output_dir"
,
...
...
examples/utils_glue.py
View file @
0a2fecdf
...
...
@@ -390,10 +390,16 @@ class WnliProcessor(DataProcessor):
def
convert_examples_to_features
(
examples
,
label_list
,
max_seq_length
,
tokenizer
,
output_mode
,
cls_token_at_end
=
False
,
pad_on_left
=
False
,
cls_token
=
'[CLS]'
,
sep_token
=
'[SEP]'
,
pad_token
=
0
,
sequence_a_segment_id
=
0
,
sequence_b_segment_id
=
1
,
cls_token_segment_id
=
1
,
pad_token_segment_id
=
0
,
cls_token_at_end
=
False
,
cls_token
=
'[CLS]'
,
cls_token_segment_id
=
1
,
sep_token
=
'[SEP]'
,
sep_token_extra
=
False
,
pad_on_left
=
False
,
pad_token
=
0
,
pad_token_segment_id
=
0
,
sequence_a_segment_id
=
0
,
sequence_b_segment_id
=
1
,
mask_padding_with_zero
=
True
):
""" Loads a data file into a list of `InputBatch`s
`cls_token_at_end` define the location of the CLS token:
...
...
@@ -416,12 +422,14 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
tokens_b
=
tokenizer
.
tokenize
(
example
.
text_b
)
# Modifies `tokens_a` and `tokens_b` in place so that the total
# length is less than the specified length.
# Account for [CLS], [SEP], [SEP] with "- 3"
_truncate_seq_pair
(
tokens_a
,
tokens_b
,
max_seq_length
-
3
)
# Account for [CLS], [SEP], [SEP] with "- 3". " -4" for RoBERTa.
special_tokens_count
=
4
if
sep_token_extra
else
3
_truncate_seq_pair
(
tokens_a
,
tokens_b
,
max_seq_length
-
special_tokens_count
)
else
:
# Account for [CLS] and [SEP] with "- 2"
if
len
(
tokens_a
)
>
max_seq_length
-
2
:
tokens_a
=
tokens_a
[:(
max_seq_length
-
2
)]
# Account for [CLS] and [SEP] with "- 2" and with "- 3" for RoBERTa.
special_tokens_count
=
3
if
sep_token_extra
else
2
if
len
(
tokens_a
)
>
max_seq_length
-
special_tokens_count
:
tokens_a
=
tokens_a
[:(
max_seq_length
-
special_tokens_count
)]
# The convention in BERT is:
# (a) For sequence pairs:
...
...
@@ -442,6 +450,9 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
# used as as the "sentence vector". Note that this only makes sense because
# the entire model is fine-tuned.
tokens
=
tokens_a
+
[
sep_token
]
if
sep_token_extra
:
# roberta uses an extra separator b/w pairs of sentences
tokens
+=
[
sep_token
]
segment_ids
=
[
sequence_a_segment_id
]
*
len
(
tokens
)
if
tokens_b
:
...
...
hubconfs/bert_hubconf.py
View file @
0a2fecdf
...
...
@@ -37,7 +37,7 @@ bert_docstring = """
checkpoint
cache_dir: an optional path to a folder in which the pre-trained models
will be cached.
state_dict: an optional state diction
n
ary
state_dict: an optional state dictionary
(collections.OrderedDict object) to use instead of Google
pre-trained models
*inputs, **kwargs: additional input for the specific Bert class
...
...
@@ -84,12 +84,12 @@ def bertTokenizer(*args, **kwargs):
Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"]
Example:
>>>
import torch
>>>
sentence = 'Hello, World!'
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
>>>
toks = tokenizer.tokenize(sentence)
import torch
sentence = 'Hello, World!'
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
toks = tokenizer.tokenize(sentence)
['Hello', '##,', 'World', '##!']
>>>
ids = tokenizer.convert_tokens_to_ids(toks)
ids = tokenizer.convert_tokens_to_ids(toks)
[8667, 28136, 1291, 28125]
"""
tokenizer
=
BertTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -105,20 +105,20 @@ def bertModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
encoded_layers, _ = model(tokens_tensor, segments_tensors)
"""
model
=
BertModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -134,20 +134,20 @@ def bertForNextSentencePrediction(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForNextSentencePrediction
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased')
model.eval()
# Predict the next sentence classification logits
>>>
with torch.no_grad():
with torch.no_grad():
next_sent_classif_logits = model(tokens_tensor, segments_tensors)
"""
model
=
BertForNextSentencePrediction
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -164,17 +164,17 @@ def bertForPreTraining(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForPreTraining
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased')
>>>
masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors)
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased')
masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors)
"""
model
=
BertForPreTraining
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -188,25 +188,25 @@ def bertForMaskedLM(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
masked_index = 8
>>>
tokenized_text[masked_index] = '[MASK]'
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
masked_index = 8
tokenized_text[masked_index] = '[MASK]'
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForMaskedLM
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased')
model.eval()
# Predict all tokens
>>>
with torch.no_grad():
with torch.no_grad():
predictions = model(tokens_tensor, segments_tensors)
>>>
predicted_index = torch.argmax(predictions[0, masked_index]).item()
>>>
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
predicted_index = torch.argmax(predictions[0, masked_index]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'henson'
"""
model
=
BertForMaskedLM
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -230,24 +230,24 @@ def bertForSequenceClassification(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForSequenceClassification
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2)
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2)
model.eval()
# Predict the sequence classification logits
>>>
with torch.no_grad():
with torch.no_grad():
seq_classif_logits = model(tokens_tensor, segments_tensors)
# Or get the sequence classification loss
>>>
labels = torch.tensor([1])
>>>
seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
labels = torch.tensor([1])
seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
model
=
BertForSequenceClassification
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -265,24 +265,24 @@ def bertForMultipleChoice(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0)
>>>
segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0)
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0)
segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0)
# Load bertForMultipleChoice
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2)
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2)
model.eval()
# Predict the multiple choice logits
>>>
with torch.no_grad():
with torch.no_grad():
multiple_choice_logits = model(tokens_tensor, segments_tensors)
# Or get the multiple choice loss
>>>
labels = torch.tensor([1])
>>>
multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
labels = torch.tensor([1])
multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
model
=
BertForMultipleChoice
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -298,25 +298,25 @@ def bertForQuestionAnswering(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForQuestionAnswering
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased')
model.eval()
# Predict the start and end positions logits
>>>
with torch.no_grad():
with torch.no_grad():
start_logits, end_logits = model(tokens_tensor, segments_tensors)
# Or get the total loss which is the sum of the CrossEntropy loss for the start and end token positions
>>>
start_positions, end_positions = torch.tensor([12]), torch.tensor([14])
start_positions, end_positions = torch.tensor([12]), torch.tensor([14])
# set model.train() before if training this loss
>>>
multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions)
multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions)
"""
model
=
BertForQuestionAnswering
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -337,24 +337,24 @@ def bertForTokenClassification(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForTokenClassification
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2)
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2)
model.eval()
# Predict the token classification logits
>>>
with torch.no_grad():
with torch.no_grad():
classif_logits = model(tokens_tensor, segments_tensors)
# Or get the token classification loss
>>>
labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]])
>>>
classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]])
classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
model
=
BertForTokenClassification
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
hubconfs/gpt2_hubconf.py
View file @
0a2fecdf
...
...
@@ -52,11 +52,11 @@ def gpt2Tokenizer(*args, **kwargs):
Default: None
Example:
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
>>>
text = "Who was Jim Henson ?"
>>>
indexed_tokens = tokenizer.encode(tokenized_text)
text = "Who was Jim Henson ?"
indexed_tokens = tokenizer.encode(tokenized_text)
"""
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
...
...
@@ -71,24 +71,24 @@ def gpt2Model(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2Model
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2')
model.eval()
# Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, past = model(tokens_tensor_1)
hidden_states_2, past = model(tokens_tensor_2, past=past)
"""
...
...
@@ -104,31 +104,31 @@ def gpt2LMHeadModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2LMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2')
model.eval()
# Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, past = model(tokens_tensor_1)
predictions_2, past = model(tokens_tensor_2, past=past)
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.decode([predicted_index])
>>>
assert predicted_token == ' who'
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_token = tokenizer.decode([predicted_index])
assert predicted_token == ' who'
"""
model
=
GPT2LMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -143,25 +143,25 @@ def gpt2DoubleHeadsModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>>
tokenized_text1 = tokenizer.tokenize(text1)
>>>
tokenized_text2 = tokenizer.tokenize(text2)
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
tokenized_text1 = tokenizer.tokenize(text1)
tokenized_text2 = tokenizer.tokenize(text2)
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load gpt2DoubleHeadsModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
lm_logits, multiple_choice_logits, presents = model(tokens_tensor, mc_token_ids)
"""
model
=
GPT2DoubleHeadsModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
hubconfs/gpt_hubconf.py
View file @
0a2fecdf
...
...
@@ -40,7 +40,7 @@ gpt_docstring = """
. a series of NumPy files containing OpenAI TensorFlow trained weights
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state diction
n
ary (collections.OrderedDict object)
state_dict: an optional state dictionary (collections.OrderedDict object)
to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific OpenAI-GPT class
"""
...
...
@@ -76,12 +76,12 @@ def openAIGPTTokenizer(*args, **kwargs):
Default: None
Example:
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
>>>
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
[763, 509, 4265, 2298, 945, 257, 4265, 2298, 945, 509, 246, 10148, 39041, 483]
"""
tokenizer
=
OpenAIGPTTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -97,21 +97,21 @@ def openAIGPTModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input
>>>
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
tokens_tensor = torch.tensor([indexed_tokens])
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
tokens_tensor = torch.tensor([indexed_tokens])
# Load openAIGPTModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states = model(tokens_tensor)
"""
model
=
OpenAIGPTModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -126,26 +126,26 @@ def openAIGPTLMHeadModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input
>>>
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
tokens_tensor = torch.tensor([indexed_tokens])
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
tokens_tensor = torch.tensor([indexed_tokens])
# Load openAIGPTLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
predictions = model(tokens_tensor)
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions[0, -1, :]).item()
>>>
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
predicted_index = torch.argmax(predictions[0, -1, :]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'.</w>'
"""
model
=
OpenAIGPTLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -161,25 +161,25 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>>
tokenized_text1 = tokenizer.tokenize(text1)
>>>
tokenized_text2 = tokenizer.tokenize(text2)
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
tokenized_text1 = tokenizer.tokenize(text1)
tokenized_text2 = tokenizer.tokenize(text2)
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load openAIGPTDoubleHeadsModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
lm_logits, multiple_choice_logits = model(tokens_tensor, mc_token_ids)
"""
model
=
OpenAIGPTDoubleHeadsModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
hubconfs/transformer_xl_hubconf.py
View file @
0a2fecdf
...
...
@@ -23,7 +23,7 @@ transformer_xl_docstring = """
. `model.chkpt` a TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state diction
n
ary (collections.OrderedDict object) to use instead of pre-trained models
state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific TransformerXL class
"""
...
...
@@ -45,12 +45,12 @@ def transformerXLTokenizer(*args, **kwargs):
* transfo-xl-wt103
Example:
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
>>>
text = "Who was Jim Henson ?"
>>>
tokenized_text = tokenizer.tokenize(tokenized_text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
text = "Who was Jim Henson ?"
tokenized_text = tokenizer.tokenize(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
"""
tokenizer
=
TransfoXLTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
...
...
@@ -63,26 +63,26 @@ def transformerXLModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
tokenized_text_1 = tokenizer.tokenize(text_1)
>>>
tokenized_text_2 = tokenizer.tokenize(text_2)
>>>
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>>
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
tokenized_text_1 = tokenizer.tokenize(text_1)
tokenized_text_2 = tokenizer.tokenize(text_2)
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103')
model.eval()
# Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, mems_1 = model(tokens_tensor_1)
hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
"""
...
...
@@ -98,33 +98,33 @@ def transformerXLLMHeadModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
tokenized_text_1 = tokenizer.tokenize(text_1)
>>>
tokenized_text_2 = tokenizer.tokenize(text_2)
>>>
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>>
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
tokenized_text_1 = tokenizer.tokenize(text_1)
tokenized_text_2 = tokenizer.tokenize(text_2)
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103')
model.eval()
# Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, mems_1 = model(tokens_tensor_1)
predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
>>>
assert predicted_token == 'who'
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
assert predicted_token == 'who'
"""
model
=
TransfoXLLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
hubconfs/xlm_hubconf.py
View file @
0a2fecdf
...
...
@@ -17,16 +17,16 @@ xlm_start_docstring = """
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
"""
# A lot of models share the same param doc. Use a decorator
...
...
@@ -76,11 +76,11 @@ def xlmTokenizer(*args, **kwargs):
Default: None
Example:
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
>>>
text = "Who was Jim Henson ?"
>>>
indexed_tokens = tokenizer.encode(tokenized_text)
text = "Who was Jim Henson ?"
indexed_tokens = tokenizer.encode(tokenized_text)
"""
tokenizer
=
XLMTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
...
...
@@ -91,11 +91,11 @@ def xlmTokenizer(*args, **kwargs):
def
xlmModel
(
*
args
,
**
kwargs
):
"""
# Load xlmModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_2, mems = model(tokens_tensor_2, past=mems)
"""
...
...
@@ -108,26 +108,26 @@ def xlmModel(*args, **kwargs):
def
xlmLMHeadModel
(
*
args
,
**
kwargs
):
"""
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, mems = model(tokens_tensor_1)
predictions_2, mems = model(tokens_tensor_2, mems=mems)
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.decode([predicted_index])
>>>
assert predicted_token == ' who'
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_token = tokenizer.decode([predicted_index])
assert predicted_token == ' who'
"""
model
=
XLMWithLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -142,25 +142,25 @@ def xlmLMHeadModel(*args, **kwargs):
# Example:
# # Load the tokenizer
#
>>>
import torch
#
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048')
# import torch
# tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048')
# # Prepare tokenized input
#
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
#
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
#
>>>
tokenized_text1 = tokenizer.tokenize(text1)
#
>>>
tokenized_text2 = tokenizer.tokenize(text2)
#
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
#
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
#
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
#
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
# text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
# tokenized_text1 = tokenizer.tokenize(text1)
# tokenized_text2 = tokenizer.tokenize(text2)
# indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
# indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
# tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
# mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# # Load xlnetForSequenceClassification
#
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048')
#
>>>
model.eval()
# model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048')
# model.eval()
# # Predict sequence classes logits
#
>>>
with torch.no_grad():
# with torch.no_grad():
# lm_logits, mems = model(tokens_tensor)
# """
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
...
...
hubconfs/xlnet_hubconf.1.py
View file @
0a2fecdf
...
...
@@ -53,11 +53,11 @@ def xlnetTokenizer(*args, **kwargs):
Default: None
Example:
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
>>>
text = "Who was Jim Henson ?"
>>>
indexed_tokens = tokenizer.encode(tokenized_text)
text = "Who was Jim Henson ?"
indexed_tokens = tokenizer.encode(tokenized_text)
"""
tokenizer
=
XLNetTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
...
...
@@ -72,23 +72,23 @@ def xlnetModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_2, mems = model(tokens_tensor_2, past=mems)
"""
...
...
@@ -106,30 +106,30 @@ def xlnetLMHeadModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, mems = model(tokens_tensor_1)
predictions_2, mems = model(tokens_tensor_2, mems=mems)
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.decode([predicted_index])
>>>
assert predicted_token == ' who'
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_token = tokenizer.decode([predicted_index])
assert predicted_token == ' who'
"""
model
=
XLNetLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -144,25 +144,25 @@ def xlnetLMHeadModel(*args, **kwargs):
# Example:
# # Load the tokenizer
#
>>>
import torch
#
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# import torch
# tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# # Prepare tokenized input
#
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
#
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
#
>>>
tokenized_text1 = tokenizer.tokenize(text1)
#
>>>
tokenized_text2 = tokenizer.tokenize(text2)
#
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
#
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
#
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
#
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
# text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
# tokenized_text1 = tokenizer.tokenize(text1)
# tokenized_text2 = tokenizer.tokenize(text2)
# indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
# indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
# tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
# mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# # Load xlnetForSequenceClassification
#
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased')
#
>>>
model.eval()
# model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased')
# model.eval()
# # Predict sequence classes logits
#
>>>
with torch.no_grad():
# with torch.no_grad():
# lm_logits, mems = model(tokens_tensor)
# """
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
...
...
pytorch_transformers/__init__.py
View file @
0a2fecdf
__version__
=
"1.0.0"
__version__
=
"1.1.0"
from
.tokenization_auto
import
AutoTokenizer
from
.tokenization_bert
import
BertTokenizer
,
BasicTokenizer
,
WordpieceTokenizer
from
.tokenization_openai
import
OpenAIGPTTokenizer
from
.tokenization_transfo_xl
import
(
TransfoXLTokenizer
,
TransfoXLCorpus
)
from
.tokenization_gpt2
import
GPT2Tokenizer
from
.tokenization_xlnet
import
XLNetTokenizer
,
SPIECE_UNDERLINE
from
.tokenization_xlm
import
XLMTokenizer
from
.tokenization_utils
import
(
PreTrainedTokenizer
,
clean_up_tokenization
)
from
.tokenization_roberta
import
RobertaTokenizer
from
.tokenization_distilbert
import
DistilBertTokenizer
from
.modeling_bert
import
(
BertConfig
,
BertModel
,
BertForPreTraining
,
BertForMaskedLM
,
BertForNextSentencePrediction
,
BertForSequenceClassification
,
BertForMultipleChoice
,
BertForTokenClassification
,
BertForQuestionAnswering
,
load_tf_weights_in_bert
,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
)
from
.modeling_openai
import
(
OpenAIGPTConfig
,
OpenAIGPTModel
,
from
.tokenization_utils
import
(
PreTrainedTokenizer
)
from
.modeling_auto
import
(
AutoConfig
,
AutoModel
)
from
.modeling_bert
import
(
BertConfig
,
BertPreTrainedModel
,
BertModel
,
BertForPreTraining
,
BertForMaskedLM
,
BertForNextSentencePrediction
,
BertForSequenceClassification
,
BertForMultipleChoice
,
BertForTokenClassification
,
BertForQuestionAnswering
,
load_tf_weights_in_bert
,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
)
from
.modeling_openai
import
(
OpenAIGPTConfig
,
OpenAIGPTPreTrainedModel
,
OpenAIGPTModel
,
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
,
load_tf_weights_in_openai_gpt
,
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_transfo_xl
import
(
TransfoXLConfig
,
TransfoXLModel
,
TransfoXLLMHeadModel
,
from
.modeling_transfo_xl
import
(
TransfoXLConfig
,
TransfoXLPreTrainedModel
,
TransfoXLModel
,
TransfoXLLMHeadModel
,
load_tf_weights_in_transfo_xl
,
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
,
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_gpt2
import
(
GPT2Config
,
GPT2Model
,
from
.modeling_gpt2
import
(
GPT2Config
,
GPT2PreTrainedModel
,
GPT2Model
,
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
,
load_tf_weights_in_gpt2
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
)
...
...
@@ -29,14 +35,19 @@ from .modeling_xlnet import (XLNetConfig,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
,
load_tf_weights_in_xlnet
,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_xlm
import
(
XLMConfig
,
XLMModel
,
from
.modeling_xlm
import
(
XLMConfig
,
XLMPreTrainedModel
,
XLMModel
,
XLMWithLMHeadModel
,
XLMForSequenceClassification
,
XLMForQuestionAnswering
,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_roberta
import
(
RobertaConfig
,
RobertaForMaskedLM
,
RobertaModel
,
RobertaForSequenceClassification
,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
,
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_distilbert
import
(
DistilBertConfig
,
DistilBertForMaskedLM
,
DistilBertModel
,
DistilBertForSequenceClassification
,
DistilBertForQuestionAnswering
,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_utils
import
(
WEIGHTS_NAME
,
CONFIG_NAME
,
TF_WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_layer
,
Conv1D
)
from
.optimization
import
(
AdamW
,
ConstantLRSchedule
,
WarmupConstantSchedule
,
WarmupCosineSchedule
,
WarmupCosineWithHardRestartsSchedule
,
WarmupLinearSchedule
)
from
.file_utils
import
(
PYTORCH_PRETRAINED_BERT_CACHE
,
cached_path
)
from
.file_utils
import
(
PYTORCH_TRANSFORMERS_CACHE
,
PYTORCH_PRETRAINED_BERT_CACHE
,
cached_path
)
pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
...
@@ -35,7 +35,7 @@ def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config_file, p
if
gpt2_config_file
==
""
:
config
=
GPT2Config
()
else
:
config
=
GPT2Config
(
gpt2_config_file
)
config
=
GPT2Config
.
from_json_file
(
gpt2_config_file
)
model
=
GPT2Model
(
config
)
# Load weights from numpy
...
...
@@ -58,7 +58,7 @@ if __name__ == "__main__":
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--pytorch_dump_folder_path"
,
default
=
None
,
type
=
str
,
...
...
pytorch_transformers/convert_openai_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
...
@@ -35,7 +35,7 @@ def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, openai_c
if
openai_config_file
==
""
:
config
=
OpenAIGPTConfig
()
else
:
config
=
OpenAIGPTConfig
(
openai_config_file
)
config
=
OpenAIGPTConfig
.
from_json_file
(
openai_config_file
)
model
=
OpenAIGPTModel
(
config
)
# Load weights from numpy
...
...
@@ -58,7 +58,7 @@ if __name__ == "__main__":
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--pytorch_dump_folder_path"
,
default
=
None
,
type
=
str
,
...
...
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
View file @
0a2fecdf
...
...
@@ -20,7 +20,7 @@ import argparse
import
torch
import
numpy
as
np
import
tensorflow
as
tf
from
pytorch_
pretrained_b
er
t
.modeling
import
BertModel
from
pytorch_
transform
er
s
.modeling
import
BertModel
def
convert_pytorch_checkpoint_to_tf
(
model
:
BertModel
,
ckpt_dir
:
str
,
model_name
:
str
):
...
...
@@ -41,7 +41,7 @@ def convert_pytorch_checkpoint_to_tf(model:BertModel, ckpt_dir:str, model_name:s
N BertForQuestionAnswering
"""
tensors_to_trans
o
pse
=
(
tensors_to_transp
o
se
=
(
"dense.weight"
,
"attention.self.query"
,
"attention.self.key"
,
...
...
@@ -62,34 +62,34 @@ def convert_pytorch_checkpoint_to_tf(model:BertModel, ckpt_dir:str, model_name:s
if
not
os
.
path
.
isdir
(
ckpt_dir
):
os
.
makedirs
(
ckpt_dir
)
session
=
tf
.
Session
()
state_dict
=
model
.
state_dict
()
tf_vars
=
[]
def
to_tf_var_name
(
name
:
str
):
for
patt
,
repl
in
iter
(
var_map
):
name
=
name
.
replace
(
patt
,
repl
)
return
'bert/{}'
.
format
(
name
)
def
assign_tf_var
(
tensor
:
np
.
ndarray
,
name
:
str
):
tmp_var
=
tf
.
Variable
(
initial_value
=
tensor
)
tf_var
=
tf
.
get_variable
(
dtype
=
tmp_var
.
dtype
,
shape
=
tmp_var
.
shape
,
name
=
name
)
op
=
tf
.
assign
(
ref
=
tf_var
,
value
=
tmp_var
)
session
.
run
(
tf
.
variables_initializer
([
tmp_var
,
tf_var
]))
session
.
run
(
fetches
=
[
op
,
tf_var
])
def
create_tf_var
(
tensor
:
np
.
ndarray
,
name
:
str
,
session
:
tf
.
Session
):
tf_dtype
=
tf
.
dtypes
.
as_dtype
(
tensor
.
dtype
)
tf_var
=
tf
.
get_variable
(
dtype
=
tf_dtype
,
shape
=
tensor
.
shape
,
name
=
name
,
initializer
=
tf
.
zeros_initializer
())
session
.
run
(
tf
.
variables_initializer
([
tf_var
]))
session
.
run
(
tf_var
)
return
tf_var
for
var_name
in
state_dict
:
tf_name
=
to_tf_var_name
(
var_name
)
torch_tensor
=
state_dict
[
var_name
].
numpy
()
if
any
([
x
in
var_name
for
x
in
tensors_to_transopse
]):
torch_tensor
=
torch_tensor
.
T
tf_tensor
=
assign_tf_var
(
tensor
=
torch_tensor
,
name
=
tf_name
)
tf_vars
.
append
(
tf_tensor
)
print
(
"{0}{1}initialized"
.
format
(
tf_name
,
" "
*
(
60
-
len
(
tf_name
))))
saver
=
tf
.
train
.
Saver
(
tf_vars
)
saver
.
save
(
session
,
os
.
path
.
join
(
ckpt_dir
,
model_name
.
replace
(
"-"
,
"_"
)
+
".ckpt"
))
tf
.
reset_default_graph
()
with
tf
.
Session
()
as
session
:
for
var_name
in
state_dict
:
tf_name
=
to_tf_var_name
(
var_name
)
torch_tensor
=
state_dict
[
var_name
].
numpy
()
if
any
([
x
in
var_name
for
x
in
tensors_to_transpose
]):
torch_tensor
=
torch_tensor
.
T
tf_var
=
create_tf_var
(
tensor
=
torch_tensor
,
name
=
tf_name
,
session
=
session
)
tf
.
keras
.
backend
.
set_value
(
tf_var
,
torch_tensor
)
tf_weight
=
session
.
run
(
tf_var
)
print
(
"Successfully created {}: {}"
.
format
(
tf_name
,
np
.
allclose
(
tf_weight
,
torch_tensor
)))
saver
=
tf
.
train
.
Saver
(
tf
.
trainable_variables
())
saver
.
save
(
session
,
os
.
path
.
join
(
ckpt_dir
,
model_name
.
replace
(
"-"
,
"_"
)
+
".ckpt"
))
def
main
(
raw_args
=
None
):
...
...
pytorch_transformers/convert_roberta_checkpoint_to_pytorch.py
0 → 100644
View file @
0a2fecdf
# coding=utf-8
# Copyright 2018 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Convert RoBERTa checkpoint."""
from
__future__
import
absolute_import
,
division
,
print_function
import
argparse
import
logging
import
numpy
as
np
import
torch
from
fairseq.models.roberta
import
RobertaModel
as
FairseqRobertaModel
from
fairseq.modules
import
TransformerSentenceEncoderLayer
from
pytorch_transformers.modeling_bert
import
(
BertConfig
,
BertEncoder
,
BertIntermediate
,
BertLayer
,
BertModel
,
BertOutput
,
BertSelfAttention
,
BertSelfOutput
)
from
pytorch_transformers.modeling_roberta
import
(
RobertaEmbeddings
,
RobertaForMaskedLM
,
RobertaForSequenceClassification
,
RobertaModel
)
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logger
=
logging
.
getLogger
(
__name__
)
SAMPLE_TEXT
=
'Hello world! cécé herlolip'
def
convert_roberta_checkpoint_to_pytorch
(
roberta_checkpoint_path
,
pytorch_dump_folder_path
,
classification_head
):
"""
Copy/paste/tweak roberta's weights to our BERT structure.
"""
roberta
=
FairseqRobertaModel
.
from_pretrained
(
roberta_checkpoint_path
)
roberta
.
eval
()
# disable dropout
config
=
BertConfig
(
vocab_size_or_config_json_file
=
50265
,
hidden_size
=
roberta
.
args
.
encoder_embed_dim
,
num_hidden_layers
=
roberta
.
args
.
encoder_layers
,
num_attention_heads
=
roberta
.
args
.
encoder_attention_heads
,
intermediate_size
=
roberta
.
args
.
encoder_ffn_embed_dim
,
max_position_embeddings
=
514
,
type_vocab_size
=
1
,
)
if
classification_head
:
config
.
num_labels
=
roberta
.
args
.
num_classes
print
(
"Our BERT config:"
,
config
)
model
=
RobertaForSequenceClassification
(
config
)
if
classification_head
else
RobertaForMaskedLM
(
config
)
model
.
eval
()
# Now let's copy all the weights.
# Embeddings
roberta_sent_encoder
=
roberta
.
model
.
decoder
.
sentence_encoder
model
.
roberta
.
embeddings
.
word_embeddings
.
weight
=
roberta_sent_encoder
.
embed_tokens
.
weight
model
.
roberta
.
embeddings
.
position_embeddings
.
weight
=
roberta_sent_encoder
.
embed_positions
.
weight
model
.
roberta
.
embeddings
.
token_type_embeddings
.
weight
.
data
=
torch
.
zeros_like
(
model
.
roberta
.
embeddings
.
token_type_embeddings
.
weight
)
# just zero them out b/c RoBERTa doesn't use them.
model
.
roberta
.
embeddings
.
LayerNorm
.
weight
=
roberta_sent_encoder
.
emb_layer_norm
.
weight
model
.
roberta
.
embeddings
.
LayerNorm
.
bias
=
roberta_sent_encoder
.
emb_layer_norm
.
bias
model
.
roberta
.
embeddings
.
LayerNorm
.
variance_epsilon
=
roberta_sent_encoder
.
emb_layer_norm
.
eps
for
i
in
range
(
config
.
num_hidden_layers
):
# Encoder: start of layer
layer
:
BertLayer
=
model
.
roberta
.
encoder
.
layer
[
i
]
roberta_layer
:
TransformerSentenceEncoderLayer
=
roberta_sent_encoder
.
layers
[
i
]
### self attention
self_attn
:
BertSelfAttention
=
layer
.
attention
.
self
assert
(
roberta_layer
.
self_attn
.
in_proj_weight
.
shape
==
torch
.
Size
((
3
*
config
.
hidden_size
,
config
.
hidden_size
))
)
# we use three distinct linear layers so we split the source layer here.
self_attn
.
query
.
weight
.
data
=
roberta_layer
.
self_attn
.
in_proj_weight
[:
config
.
hidden_size
,
:]
self_attn
.
query
.
bias
.
data
=
roberta_layer
.
self_attn
.
in_proj_bias
[:
config
.
hidden_size
]
self_attn
.
key
.
weight
.
data
=
roberta_layer
.
self_attn
.
in_proj_weight
[
config
.
hidden_size
:
2
*
config
.
hidden_size
,
:]
self_attn
.
key
.
bias
.
data
=
roberta_layer
.
self_attn
.
in_proj_bias
[
config
.
hidden_size
:
2
*
config
.
hidden_size
]
self_attn
.
value
.
weight
.
data
=
roberta_layer
.
self_attn
.
in_proj_weight
[
2
*
config
.
hidden_size
:,
:]
self_attn
.
value
.
bias
.
data
=
roberta_layer
.
self_attn
.
in_proj_bias
[
2
*
config
.
hidden_size
:]
### self-attention output
self_output
:
BertSelfOutput
=
layer
.
attention
.
output
assert
(
self_output
.
dense
.
weight
.
shape
==
roberta_layer
.
self_attn
.
out_proj
.
weight
.
shape
)
self_output
.
dense
.
weight
=
roberta_layer
.
self_attn
.
out_proj
.
weight
self_output
.
dense
.
bias
=
roberta_layer
.
self_attn
.
out_proj
.
bias
self_output
.
LayerNorm
.
weight
=
roberta_layer
.
self_attn_layer_norm
.
weight
self_output
.
LayerNorm
.
bias
=
roberta_layer
.
self_attn_layer_norm
.
bias
self_output
.
LayerNorm
.
variance_epsilon
=
roberta_layer
.
self_attn_layer_norm
.
eps
### intermediate
intermediate
:
BertIntermediate
=
layer
.
intermediate
assert
(
intermediate
.
dense
.
weight
.
shape
==
roberta_layer
.
fc1
.
weight
.
shape
)
intermediate
.
dense
.
weight
=
roberta_layer
.
fc1
.
weight
intermediate
.
dense
.
bias
=
roberta_layer
.
fc1
.
bias
### output
bert_output
:
BertOutput
=
layer
.
output
assert
(
bert_output
.
dense
.
weight
.
shape
==
roberta_layer
.
fc2
.
weight
.
shape
)
bert_output
.
dense
.
weight
=
roberta_layer
.
fc2
.
weight
bert_output
.
dense
.
bias
=
roberta_layer
.
fc2
.
bias
bert_output
.
LayerNorm
.
weight
=
roberta_layer
.
final_layer_norm
.
weight
bert_output
.
LayerNorm
.
bias
=
roberta_layer
.
final_layer_norm
.
bias
bert_output
.
LayerNorm
.
variance_epsilon
=
roberta_layer
.
final_layer_norm
.
eps
#### end of layer
if
classification_head
:
model
.
classifier
.
dense
.
weight
=
roberta
.
model
.
classification_heads
[
'mnli'
].
dense
.
weight
model
.
classifier
.
dense
.
bias
=
roberta
.
model
.
classification_heads
[
'mnli'
].
dense
.
bias
model
.
classifier
.
out_proj
.
weight
=
roberta
.
model
.
classification_heads
[
'mnli'
].
out_proj
.
weight
model
.
classifier
.
out_proj
.
bias
=
roberta
.
model
.
classification_heads
[
'mnli'
].
out_proj
.
bias
else
:
# LM Head
model
.
lm_head
.
dense
.
weight
=
roberta
.
model
.
decoder
.
lm_head
.
dense
.
weight
model
.
lm_head
.
dense
.
bias
=
roberta
.
model
.
decoder
.
lm_head
.
dense
.
bias
model
.
lm_head
.
layer_norm
.
weight
=
roberta
.
model
.
decoder
.
lm_head
.
layer_norm
.
weight
model
.
lm_head
.
layer_norm
.
bias
=
roberta
.
model
.
decoder
.
lm_head
.
layer_norm
.
bias
model
.
lm_head
.
layer_norm
.
variance_epsilon
=
roberta
.
model
.
decoder
.
lm_head
.
layer_norm
.
eps
model
.
lm_head
.
decoder
.
weight
=
roberta
.
model
.
decoder
.
lm_head
.
weight
model
.
lm_head
.
bias
=
roberta
.
model
.
decoder
.
lm_head
.
bias
# Let's check that we get the same results.
input_ids
:
torch
.
Tensor
=
roberta
.
encode
(
SAMPLE_TEXT
).
unsqueeze
(
0
)
# batch of size 1
our_output
=
model
(
input_ids
)[
0
]
if
classification_head
:
their_output
=
roberta
.
model
.
classification_heads
[
'mnli'
](
roberta
.
extract_features
(
input_ids
))
else
:
their_output
=
roberta
.
model
(
input_ids
)[
0
]
print
(
our_output
.
shape
,
their_output
.
shape
)
success
=
torch
.
allclose
(
our_output
,
their_output
,
atol
=
1e-3
)
print
(
"Do both models output the same tensors?"
,
"🔥"
if
success
else
"💩"
)
if
not
success
:
raise
Exception
(
"Something went wRoNg"
)
print
(
f
"Saving model to
{
pytorch_dump_folder_path
}
"
)
model
.
save_pretrained
(
pytorch_dump_folder_path
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
## Required parameters
parser
.
add_argument
(
"--roberta_checkpoint_path"
,
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path the official PyTorch dump."
)
parser
.
add_argument
(
"--pytorch_dump_folder_path"
,
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path to the output PyTorch model."
)
parser
.
add_argument
(
"--classification_head"
,
action
=
"store_true"
,
help
=
"Whether to convert a final classification head."
)
args
=
parser
.
parse_args
()
convert_roberta_checkpoint_to_pytorch
(
args
.
roberta_checkpoint_path
,
args
.
pytorch_dump_folder_path
,
args
.
classification_head
)
pytorch_transformers/convert_tf_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
...
@@ -47,7 +47,7 @@ if __name__ == "__main__":
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--bert_config_file"
,
default
=
None
,
type
=
str
,
...
...
pytorch_transformers/convert_transfo_xl_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
...
@@ -24,11 +24,10 @@ from io import open
import
torch
import
pytorch_transformers.tokenization_transfo_xl
as
data_utils
from
pytorch_transformers.modeling_transfo_xl
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
TransfoXLConfig
,
TransfoXLLMHeadModel
,
load_tf_weights_in_transfo_xl
)
from
pytorch_transformers
import
CONFIG_NAME
,
WEIGHTS_NAME
from
pytorch_transformers.modeling_transfo_xl
import
(
TransfoXLConfig
,
TransfoXLLMHeadModel
,
load_tf_weights_in_transfo_xl
)
from
pytorch_transformers.tokenization_transfo_xl
import
(
CORPUS_NAME
,
VOCAB_FILES_NAMES
)
if
sys
.
version_info
[
0
]
==
2
:
...
...
@@ -76,7 +75,7 @@ def convert_transfo_xl_checkpoint_to_pytorch(tf_checkpoint_path,
if
transfo_xl_config_file
==
""
:
config
=
TransfoXLConfig
()
else
:
config
=
TransfoXLConfig
(
transfo_xl_config_file
)
config
=
TransfoXLConfig
.
from_json_file
(
transfo_xl_config_file
)
print
(
"Building PyTorch model from configuration: {}"
.
format
(
str
(
config
)))
model
=
TransfoXLLMHeadModel
(
config
)
...
...
pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
...
@@ -36,7 +36,7 @@ def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_p
model
=
chkpt
[
'model'
]
config
=
chkpt
[
'params'
]
config
=
dict
((
n
,
v
)
for
n
,
v
in
config
.
items
()
if
not
isinstance
(
v
,
(
torch
.
Tensor
,
numpy
.
ndarray
)))
config
=
dict
((
n
,
v
)
for
n
,
v
in
config
.
items
()
if
not
isinstance
(
v
,
(
torch
.
Float
Tensor
,
numpy
.
ndarray
)))
vocab
=
chkpt
[
'dico_word2id'
]
vocab
=
dict
((
s
+
'</w>'
if
s
.
find
(
'@@'
)
==
-
1
and
i
>
13
else
s
.
replace
(
'@@'
,
''
),
i
)
for
s
,
i
in
vocab
.
items
())
...
...
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
...
@@ -79,7 +79,7 @@ if __name__ == "__main__":
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--xlnet_config_file"
,
default
=
None
,
type
=
str
,
...
...
pytorch_transformers/file_utils.py
View file @
0a2fecdf
...
...
@@ -17,8 +17,9 @@ from hashlib import sha256
from
io
import
open
import
boto3
import
requests
from
botocore.config
import
Config
from
botocore.exceptions
import
ClientError
import
requests
from
tqdm
import
tqdm
try
:
...
...
@@ -38,10 +39,13 @@ except ImportError:
try
:
from
pathlib
import
Path
PYTORCH_PRETRAINED_BERT_CACHE
=
Path
(
os
.
getenv
(
'PYTORCH_PRETRAINED_BERT_CACHE'
,
default_cache_path
))
os
.
getenv
(
'PYTORCH_TRANSFORMERS_CACHE'
,
os
.
getenv
(
'PYTORCH_PRETRAINED_BERT_CACHE'
,
default_cache_path
))
)
except
(
AttributeError
,
ImportError
):
PYTORCH_PRETRAINED_BERT_CACHE
=
os
.
getenv
(
'PYTORCH_PRETRAINED_BERT_CACHE'
,
default_cache_path
)
PYTORCH_PRETRAINED_BERT_CACHE
=
os
.
getenv
(
'PYTORCH_TRANSFORMERS_CACHE'
,
os
.
getenv
(
'PYTORCH_PRETRAINED_BERT_CACHE'
,
default_cache_path
))
PYTORCH_TRANSFORMERS_CACHE
=
PYTORCH_PRETRAINED_BERT_CACHE
# Kept for backward compatibility
logger
=
logging
.
getLogger
(
__name__
)
# pylint: disable=invalid-name
...
...
@@ -70,7 +74,7 @@ def filename_to_url(filename, cache_dir=None):
Raise ``EnvironmentError`` if `filename` or its stored metadata do not exist.
"""
if
cache_dir
is
None
:
cache_dir
=
PYTORCH_
PRETRAINED_B
ER
T
_CACHE
cache_dir
=
PYTORCH_
TRANSFORM
ER
S
_CACHE
if
sys
.
version_info
[
0
]
==
3
and
isinstance
(
cache_dir
,
Path
):
cache_dir
=
str
(
cache_dir
)
...
...
@@ -90,15 +94,18 @@ def filename_to_url(filename, cache_dir=None):
return
url
,
etag
def
cached_path
(
url_or_filename
,
cache_dir
=
None
):
def
cached_path
(
url_or_filename
,
cache_dir
=
None
,
force_download
=
False
,
proxies
=
None
):
"""
Given something that might be a URL (or might be a local path),
determine which. If it's a URL, download the file and cache it, and
return the path to the cached file. If it's already a local path,
make sure the file exists and then return the path.
Args:
cache_dir: specify a cache directory to save the file to (overwrite the default cache dir).
force_download: if True, re-dowload the file even if it's already cached in the cache dir.
"""
if
cache_dir
is
None
:
cache_dir
=
PYTORCH_
PRETRAINED_B
ER
T
_CACHE
cache_dir
=
PYTORCH_
TRANSFORM
ER
S
_CACHE
if
sys
.
version_info
[
0
]
==
3
and
isinstance
(
url_or_filename
,
Path
):
url_or_filename
=
str
(
url_or_filename
)
if
sys
.
version_info
[
0
]
==
3
and
isinstance
(
cache_dir
,
Path
):
...
...
@@ -108,7 +115,7 @@ def cached_path(url_or_filename, cache_dir=None):
if
parsed
.
scheme
in
(
'http'
,
'https'
,
's3'
):
# URL, so get it from the cache (downloading if necessary)
return
get_from_cache
(
url_or_filename
,
cache_dir
)
return
get_from_cache
(
url_or_filename
,
cache_dir
=
cache_dir
,
force_download
=
force_download
,
proxies
=
proxies
)
elif
os
.
path
.
exists
(
url_or_filename
):
# File, and it exists.
return
url_or_filename
...
...
@@ -153,24 +160,24 @@ def s3_request(func):
@
s3_request
def
s3_etag
(
url
):
def
s3_etag
(
url
,
proxies
=
None
):
"""Check ETag on S3 object."""
s3_resource
=
boto3
.
resource
(
"s3"
)
s3_resource
=
boto3
.
resource
(
"s3"
,
config
=
Config
(
proxies
=
proxies
)
)
bucket_name
,
s3_path
=
split_s3_path
(
url
)
s3_object
=
s3_resource
.
Object
(
bucket_name
,
s3_path
)
return
s3_object
.
e_tag
@
s3_request
def
s3_get
(
url
,
temp_file
):
def
s3_get
(
url
,
temp_file
,
proxies
=
None
):
"""Pull a file directly from S3."""
s3_resource
=
boto3
.
resource
(
"s3"
)
s3_resource
=
boto3
.
resource
(
"s3"
,
config
=
Config
(
proxies
=
proxies
)
)
bucket_name
,
s3_path
=
split_s3_path
(
url
)
s3_resource
.
Bucket
(
bucket_name
).
download_fileobj
(
s3_path
,
temp_file
)
def
http_get
(
url
,
temp_file
):
req
=
requests
.
get
(
url
,
stream
=
True
)
def
http_get
(
url
,
temp_file
,
proxies
=
None
):
req
=
requests
.
get
(
url
,
stream
=
True
,
proxies
=
proxies
)
content_length
=
req
.
headers
.
get
(
'Content-Length'
)
total
=
int
(
content_length
)
if
content_length
is
not
None
else
None
progress
=
tqdm
(
unit
=
"B"
,
total
=
total
)
...
...
@@ -181,13 +188,13 @@ def http_get(url, temp_file):
progress
.
close
()
def
get_from_cache
(
url
,
cache_dir
=
None
):
def
get_from_cache
(
url
,
cache_dir
=
None
,
force_download
=
False
,
proxies
=
None
):
"""
Given a URL, look for the corresponding dataset in the local cache.
If it's not there, download it. Then return the path to the cached file.
"""
if
cache_dir
is
None
:
cache_dir
=
PYTORCH_
PRETRAINED_B
ER
T
_CACHE
cache_dir
=
PYTORCH_
TRANSFORM
ER
S
_CACHE
if
sys
.
version_info
[
0
]
==
3
and
isinstance
(
cache_dir
,
Path
):
cache_dir
=
str
(
cache_dir
)
if
sys
.
version_info
[
0
]
==
2
and
not
isinstance
(
cache_dir
,
str
):
...
...
@@ -198,10 +205,10 @@ def get_from_cache(url, cache_dir=None):
# Get eTag to add to filename, if it exists.
if
url
.
startswith
(
"s3://"
):
etag
=
s3_etag
(
url
)
etag
=
s3_etag
(
url
,
proxies
=
proxies
)
else
:
try
:
response
=
requests
.
head
(
url
,
allow_redirects
=
True
)
response
=
requests
.
head
(
url
,
allow_redirects
=
True
,
proxies
=
proxies
)
if
response
.
status_code
!=
200
:
etag
=
None
else
:
...
...
@@ -224,17 +231,17 @@ def get_from_cache(url, cache_dir=None):
if
matching_files
:
cache_path
=
os
.
path
.
join
(
cache_dir
,
matching_files
[
-
1
])
if
not
os
.
path
.
exists
(
cache_path
):
if
not
os
.
path
.
exists
(
cache_path
)
or
force_download
:
# Download to temporary file, then copy to cache dir once finished.
# Otherwise you get corrupt cache entries if the download gets interrupted.
with
tempfile
.
NamedTemporaryFile
()
as
temp_file
:
logger
.
info
(
"%s not found in cache, downloading to %s"
,
url
,
temp_file
.
name
)
logger
.
info
(
"%s not found in cache
or force_download set to True
, downloading to %s"
,
url
,
temp_file
.
name
)
# GET file object
if
url
.
startswith
(
"s3://"
):
s3_get
(
url
,
temp_file
)
s3_get
(
url
,
temp_file
,
proxies
=
proxies
)
else
:
http_get
(
url
,
temp_file
)
http_get
(
url
,
temp_file
,
proxies
=
proxies
)
# we are copying the file before closing it, so flush to avoid truncation
temp_file
.
flush
()
...
...
pytorch_transformers/modeling_auto.py
0 → 100644
View file @
0a2fecdf
# coding=utf-8
# Copyright 2018 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Auto Model class. """
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
logging
import
torch
import
torch.nn
as
nn
from
torch.nn
import
CrossEntropyLoss
,
MSELoss
from
torch.nn.parameter
import
Parameter
from
.modeling_bert
import
BertConfig
,
BertModel
from
.modeling_openai
import
OpenAIGPTConfig
,
OpenAIGPTModel
from
.modeling_gpt2
import
GPT2Config
,
GPT2Model
from
.modeling_transfo_xl
import
TransfoXLConfig
,
TransfoXLModel
from
.modeling_xlnet
import
XLNetConfig
,
XLNetModel
from
.modeling_xlm
import
XLMConfig
,
XLMModel
from
.modeling_roberta
import
RobertaConfig
,
RobertaModel
from
.modeling_distilbert
import
DistilBertConfig
,
DistilBertModel
from
.modeling_utils
import
PreTrainedModel
,
SequenceSummary
logger
=
logging
.
getLogger
(
__name__
)
class
AutoConfig
(
object
):
r
""":class:`~pytorch_transformers.AutoConfig` is a generic configuration class
that will be instantiated as one of the configuration classes of the library
when created with the `AutoConfig.from_pretrained(pretrained_model_name_or_path)`
class method.
The `from_pretrained()` method take care of returning the correct model class instance
using pattern matching on the `pretrained_model_name_or_path` string.
The base model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
- contains `bert`: BertConfig (Bert model)
- contains `openai-gpt`: OpenAIGPTConfig (OpenAI GPT model)
- contains `gpt2`: GPT2Config (OpenAI GPT-2 model)
- contains `transfo-xl`: TransfoXLConfig (Transformer-XL model)
- contains `xlnet`: XLNetConfig (XLNet model)
- contains `xlm`: XLMConfig (XLM model)
- contains `roberta`: RobertaConfig (RoBERTa model)
This class cannot be instantiated using `__init__()` (throw an error).
"""
def
__init__
(
self
):
raise
EnvironmentError
(
"AutoConfig is designed to be instantiated "
"using the `AutoConfig.from_pretrained(pretrained_model_name_or_path)` method."
)
@
classmethod
def
from_pretrained
(
cls
,
pretrained_model_name_or_path
,
**
kwargs
):
r
""" Instantiate a one of the configuration classes of the library
from a pre-trained model configuration.
The configuration class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
- contains `bert`: BertConfig (Bert model)
- contains `openai-gpt`: OpenAIGPTConfig (OpenAI GPT model)
- contains `gpt2`: GPT2Config (OpenAI GPT-2 model)
- contains `transfo-xl`: TransfoXLConfig (Transformer-XL model)
- contains `xlnet`: XLNetConfig (XLNet model)
- contains `xlm`: XLMConfig (XLM model)
- contains `roberta`: RobertaConfig (RoBERTa model)
Params:
**pretrained_model_name_or_path**: either:
- a string with the `shortcut name` of a pre-trained model configuration to load from cache
or download and cache if not already stored in cache (e.g. 'bert-base-uncased').
- a path to a `directory` containing a configuration file saved
using the `save_pretrained(save_directory)` method.
- a path or url to a saved configuration `file`.
**cache_dir**: (`optional`) string:
Path to a directory in which a downloaded pre-trained model
configuration should be cached if the standard cache should not be used.
**return_unused_kwargs**: (`optional`) bool:
- If False, then this function returns just the final configuration object.
- If True, then this functions returns a tuple `(config, unused_kwargs)` where `unused_kwargs`
is a dictionary consisting of the key/value pairs whose keys are not configuration attributes:
ie the part of kwargs which has not been used to update `config` and is otherwise ignored.
**kwargs**: (`optional`) dict:
Dictionary of key/value pairs with which to update the configuration object after loading.
- The values in kwargs of any keys which are configuration attributes will be used
to override the loaded values.
- Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
by the `return_unused_kwargs` keyword parameter.
Examples::
config = AutoConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache.
config = AutoConfig.from_pretrained('./test/bert_saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')`
config = AutoConfig.from_pretrained('./test/bert_saved_model/my_configuration.json')
config = AutoConfig.from_pretrained('bert-base-uncased', output_attention=True, foo=False)
assert config.output_attention == True
config, unused_kwargs = AutoConfig.from_pretrained('bert-base-uncased', output_attention=True,
foo=False, return_unused_kwargs=True)
assert config.output_attention == True
assert unused_kwargs == {'foo': False}
"""
if
'distilbert'
in
pretrained_model_name_or_path
:
return
DistilBertConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'roberta'
in
pretrained_model_name_or_path
:
return
RobertaConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'bert'
in
pretrained_model_name_or_path
:
return
BertConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'openai-gpt'
in
pretrained_model_name_or_path
:
return
OpenAIGPTConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'gpt2'
in
pretrained_model_name_or_path
:
return
GPT2Config
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'transfo-xl'
in
pretrained_model_name_or_path
:
return
TransfoXLConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'xlnet'
in
pretrained_model_name_or_path
:
return
XLNetConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'xlm'
in
pretrained_model_name_or_path
:
return
XLMConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
raise
ValueError
(
"Unrecognized model identifier in {}. Should contains one of "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'xlm', 'roberta'"
.
format
(
pretrained_model_name_or_path
))
class
AutoModel
(
object
):
r
"""
:class:`~pytorch_transformers.AutoModel` is a generic model class
that will be instantiated as one of the base model classes of the library
when created with the `AutoModel.from_pretrained(pretrained_model_name_or_path)`
class method.
The `from_pretrained()` method take care of returning the correct model class instance
using pattern matching on the `pretrained_model_name_or_path` string.
The base model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
- contains `bert`: BertModel (Bert model)
- contains `openai-gpt`: OpenAIGPTModel (OpenAI GPT model)
- contains `gpt2`: GPT2Model (OpenAI GPT-2 model)
- contains `transfo-xl`: TransfoXLModel (Transformer-XL model)
- contains `xlnet`: XLNetModel (XLNet model)
- contains `xlm`: XLMModel (XLM model)
- contains `roberta`: RobertaModel (RoBERTa model)
This class cannot be instantiated using `__init__()` (throw an error).
"""
def
__init__
(
self
):
raise
EnvironmentError
(
"AutoModel is designed to be instantiated "
"using the `AutoModel.from_pretrained(pretrained_model_name_or_path)` method."
)
@
classmethod
def
from_pretrained
(
cls
,
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
):
r
""" Instantiate a one of the base model classes of the library
from a pre-trained model configuration.
The base model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
- contains `bert`: BertModel (Bert model)
- contains `openai-gpt`: OpenAIGPTModel (OpenAI GPT model)
- contains `gpt2`: GPT2Model (OpenAI GPT-2 model)
- contains `transfo-xl`: TransfoXLModel (Transformer-XL model)
- contains `xlnet`: XLNetModel (XLNet model)
- contains `xlm`: XLMModel (XLM model)
- contains `roberta`: RobertaModel (RoBERTa model)
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
To train the model, you should first set it back in training mode with `model.train()`
Params:
**pretrained_model_name_or_path**: either:
- a string with the `shortcut name` of a pre-trained model to load from cache
or download and cache if not already stored in cache (e.g. 'bert-base-uncased').
- a path to a `directory` containing a configuration file saved
using the `save_pretrained(save_directory)` method.
- a path or url to a tensorflow index checkpoint `file` (e.g. `./tf_model/model.ckpt.index`).
In this case, ``from_tf`` should be set to True and a configuration object should be
provided as `config` argument. This loading option is slower than converting the TensorFlow
checkpoint in a PyTorch model using the provided conversion scripts and loading
the PyTorch model afterwards.
**model_args**: (`optional`) Sequence:
All remaning positional arguments will be passed to the underlying model's __init__ function
**config**: an optional configuration for the model to use instead of an automatically loaded configuation.
Configuration can be automatically loaded when:
- the model is a model provided by the library (loaded with a `shortcut name` of a pre-trained model), or
- the model was saved using the `save_pretrained(save_directory)` (loaded by suppling the save directory).
**state_dict**: an optional state dictionnary for the model to use instead of a state dictionary loaded
from saved weights file.
This option can be used if you want to create a model from a pretrained configuration but load your own weights.
In this case though, you should check if using `save_pretrained(dir)` and `from_pretrained(save_directory)` is not
a simpler option.
**cache_dir**: (`optional`) string:
Path to a directory in which a downloaded pre-trained model
configuration should be cached if the standard cache should not be used.
**output_loading_info**: (`optional`) boolean:
Set to ``True`` to also return a dictionnary containing missing keys, unexpected keys and error messages.
**kwargs**: (`optional`) dict:
Dictionary of key, values to update the configuration object after loading.
Can be used to override selected configuration parameters. E.g. ``output_attention=True``.
- If a configuration is provided with `config`, **kwargs will be directly passed
to the underlying model's __init__ method.
- If a configuration is not provided, **kwargs will be first passed to the pretrained
model configuration class loading function (`PretrainedConfig.from_pretrained`).
Each key of **kwargs that corresponds to a configuration attribute
will be used to override said attribute with the supplied **kwargs value.
Remaining keys that do not correspond to any configuration attribute will
be passed to the underlying model's __init__ function.
Examples::
model = AutoModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache.
model = AutoModel.from_pretrained('./test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = AutoModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading
assert model.config.output_attention == True
# Loading from a TF checkpoint file instead of a PyTorch model (slower)
config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
model = AutoModel.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
"""
if
'distilbert'
in
pretrained_model_name_or_path
:
return
DistilBertModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'roberta'
in
pretrained_model_name_or_path
:
return
RobertaModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'bert'
in
pretrained_model_name_or_path
:
return
BertModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'openai-gpt'
in
pretrained_model_name_or_path
:
return
OpenAIGPTModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'gpt2'
in
pretrained_model_name_or_path
:
return
GPT2Model
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'transfo-xl'
in
pretrained_model_name_or_path
:
return
TransfoXLModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'xlnet'
in
pretrained_model_name_or_path
:
return
XLNetModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'xlm'
in
pretrained_model_name_or_path
:
return
XLMModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
raise
ValueError
(
"Unrecognized model identifier in {}. Should contains one of "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'xlm', 'roberta'"
.
format
(
pretrained_model_name_or_path
))
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment