Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
0a2fecdf
"vscode:/vscode.git/clone" did not exist on "e7cfc46fc1150535b8248c4584c24cccfc73c9e0"
Unverified
Commit
0a2fecdf
authored
Aug 30, 2019
by
Thomas Wolf
Committed by
GitHub
Aug 30, 2019
Browse files
Merge branch 'master' into master
parents
39eb31e1
e0caab0c
Changes
98
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
841 additions
and
382 deletions
+841
-382
examples/single_model_scripts/run_openai_gpt.py
examples/single_model_scripts/run_openai_gpt.py
+1
-1
examples/test_examples.py
examples/test_examples.py
+1
-1
examples/utils_glue.py
examples/utils_glue.py
+20
-9
hubconfs/bert_hubconf.py
hubconfs/bert_hubconf.py
+104
-104
hubconfs/gpt2_hubconf.py
hubconfs/gpt2_hubconf.py
+42
-42
hubconfs/gpt_hubconf.py
hubconfs/gpt_hubconf.py
+39
-39
hubconfs/transformer_xl_hubconf.py
hubconfs/transformer_xl_hubconf.py
+35
-35
hubconfs/xlm_hubconf.py
hubconfs/xlm_hubconf.py
+40
-40
hubconfs/xlnet_hubconf.1.py
hubconfs/xlnet_hubconf.1.py
+42
-42
pytorch_transformers/__init__.py
pytorch_transformers/__init__.py
+24
-13
pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py
pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py
+2
-2
pytorch_transformers/convert_openai_checkpoint_to_pytorch.py
pytorch_transformers/convert_openai_checkpoint_to_pytorch.py
+2
-2
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
+21
-21
pytorch_transformers/convert_roberta_checkpoint_to_pytorch.py
...rch_transformers/convert_roberta_checkpoint_to_pytorch.py
+181
-0
pytorch_transformers/convert_tf_checkpoint_to_pytorch.py
pytorch_transformers/convert_tf_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/convert_transfo_xl_checkpoint_to_pytorch.py
..._transformers/convert_transfo_xl_checkpoint_to_pytorch.py
+5
-6
pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py
pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/file_utils.py
pytorch_transformers/file_utils.py
+29
-22
pytorch_transformers/modeling_auto.py
pytorch_transformers/modeling_auto.py
+250
-0
No files found.
examples/single_model_scripts/run_openai_gpt.py
View file @
0a2fecdf
...
@@ -205,7 +205,7 @@ def main():
...
@@ -205,7 +205,7 @@ def main():
param_optimizer
=
list
(
model
.
named_parameters
())
param_optimizer
=
list
(
model
.
named_parameters
())
no_decay
=
[
'bias'
,
'LayerNorm.bias'
,
'LayerNorm.weight'
]
no_decay
=
[
'bias'
,
'LayerNorm.bias'
,
'LayerNorm.weight'
]
optimizer_grouped_parameters
=
[
optimizer_grouped_parameters
=
[
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
not
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.01
},
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
not
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
args
.
weight_decay
},
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
]
]
optimizer
=
AdamW
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
eps
=
args
.
adam_epsilon
)
optimizer
=
AdamW
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
eps
=
args
.
adam_epsilon
)
...
...
examples/test_examples.py
View file @
0a2fecdf
...
@@ -81,7 +81,7 @@ class ExamplesTests(unittest.TestCase):
...
@@ -81,7 +81,7 @@ class ExamplesTests(unittest.TestCase):
"--do_train"
,
"--do_train"
,
"--do_eval"
,
"--do_eval"
,
"--version_2_with_negative"
,
"--version_2_with_negative"
,
"--learning_rate=
1
e-4"
,
"--learning_rate=
2
e-4"
,
"--per_gpu_train_batch_size=2"
,
"--per_gpu_train_batch_size=2"
,
"--per_gpu_eval_batch_size=1"
,
"--per_gpu_eval_batch_size=1"
,
"--overwrite_output_dir"
,
"--overwrite_output_dir"
,
...
...
examples/utils_glue.py
View file @
0a2fecdf
...
@@ -390,10 +390,16 @@ class WnliProcessor(DataProcessor):
...
@@ -390,10 +390,16 @@ class WnliProcessor(DataProcessor):
def
convert_examples_to_features
(
examples
,
label_list
,
max_seq_length
,
def
convert_examples_to_features
(
examples
,
label_list
,
max_seq_length
,
tokenizer
,
output_mode
,
tokenizer
,
output_mode
,
cls_token_at_end
=
False
,
pad_on_left
=
False
,
cls_token_at_end
=
False
,
cls_token
=
'[CLS]'
,
sep_token
=
'[SEP]'
,
pad_token
=
0
,
cls_token
=
'[CLS]'
,
sequence_a_segment_id
=
0
,
sequence_b_segment_id
=
1
,
cls_token_segment_id
=
1
,
cls_token_segment_id
=
1
,
pad_token_segment_id
=
0
,
sep_token
=
'[SEP]'
,
sep_token_extra
=
False
,
pad_on_left
=
False
,
pad_token
=
0
,
pad_token_segment_id
=
0
,
sequence_a_segment_id
=
0
,
sequence_b_segment_id
=
1
,
mask_padding_with_zero
=
True
):
mask_padding_with_zero
=
True
):
""" Loads a data file into a list of `InputBatch`s
""" Loads a data file into a list of `InputBatch`s
`cls_token_at_end` define the location of the CLS token:
`cls_token_at_end` define the location of the CLS token:
...
@@ -416,12 +422,14 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
...
@@ -416,12 +422,14 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
tokens_b
=
tokenizer
.
tokenize
(
example
.
text_b
)
tokens_b
=
tokenizer
.
tokenize
(
example
.
text_b
)
# Modifies `tokens_a` and `tokens_b` in place so that the total
# Modifies `tokens_a` and `tokens_b` in place so that the total
# length is less than the specified length.
# length is less than the specified length.
# Account for [CLS], [SEP], [SEP] with "- 3"
# Account for [CLS], [SEP], [SEP] with "- 3". " -4" for RoBERTa.
_truncate_seq_pair
(
tokens_a
,
tokens_b
,
max_seq_length
-
3
)
special_tokens_count
=
4
if
sep_token_extra
else
3
_truncate_seq_pair
(
tokens_a
,
tokens_b
,
max_seq_length
-
special_tokens_count
)
else
:
else
:
# Account for [CLS] and [SEP] with "- 2"
# Account for [CLS] and [SEP] with "- 2" and with "- 3" for RoBERTa.
if
len
(
tokens_a
)
>
max_seq_length
-
2
:
special_tokens_count
=
3
if
sep_token_extra
else
2
tokens_a
=
tokens_a
[:(
max_seq_length
-
2
)]
if
len
(
tokens_a
)
>
max_seq_length
-
special_tokens_count
:
tokens_a
=
tokens_a
[:(
max_seq_length
-
special_tokens_count
)]
# The convention in BERT is:
# The convention in BERT is:
# (a) For sequence pairs:
# (a) For sequence pairs:
...
@@ -442,6 +450,9 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
...
@@ -442,6 +450,9 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
# used as as the "sentence vector". Note that this only makes sense because
# used as as the "sentence vector". Note that this only makes sense because
# the entire model is fine-tuned.
# the entire model is fine-tuned.
tokens
=
tokens_a
+
[
sep_token
]
tokens
=
tokens_a
+
[
sep_token
]
if
sep_token_extra
:
# roberta uses an extra separator b/w pairs of sentences
tokens
+=
[
sep_token
]
segment_ids
=
[
sequence_a_segment_id
]
*
len
(
tokens
)
segment_ids
=
[
sequence_a_segment_id
]
*
len
(
tokens
)
if
tokens_b
:
if
tokens_b
:
...
...
hubconfs/bert_hubconf.py
View file @
0a2fecdf
...
@@ -37,7 +37,7 @@ bert_docstring = """
...
@@ -37,7 +37,7 @@ bert_docstring = """
checkpoint
checkpoint
cache_dir: an optional path to a folder in which the pre-trained models
cache_dir: an optional path to a folder in which the pre-trained models
will be cached.
will be cached.
state_dict: an optional state diction
n
ary
state_dict: an optional state dictionary
(collections.OrderedDict object) to use instead of Google
(collections.OrderedDict object) to use instead of Google
pre-trained models
pre-trained models
*inputs, **kwargs: additional input for the specific Bert class
*inputs, **kwargs: additional input for the specific Bert class
...
@@ -84,12 +84,12 @@ def bertTokenizer(*args, **kwargs):
...
@@ -84,12 +84,12 @@ def bertTokenizer(*args, **kwargs):
Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"]
Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"]
Example:
Example:
>>>
import torch
import torch
>>>
sentence = 'Hello, World!'
sentence = 'Hello, World!'
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
>>>
toks = tokenizer.tokenize(sentence)
toks = tokenizer.tokenize(sentence)
['Hello', '##,', 'World', '##!']
['Hello', '##,', 'World', '##!']
>>>
ids = tokenizer.convert_tokens_to_ids(toks)
ids = tokenizer.convert_tokens_to_ids(toks)
[8667, 28136, 1291, 28125]
[8667, 28136, 1291, 28125]
"""
"""
tokenizer
=
BertTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
...
@@ -105,20 +105,20 @@ def bertModel(*args, **kwargs):
...
@@ -105,20 +105,20 @@ def bertModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
segments_tensors = torch.tensor([segments_ids])
# Load bertModel
# Load bertModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased')
model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
encoded_layers, _ = model(tokens_tensor, segments_tensors)
encoded_layers, _ = model(tokens_tensor, segments_tensors)
"""
"""
model
=
BertModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
BertModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
@@ -134,20 +134,20 @@ def bertForNextSentencePrediction(*args, **kwargs):
...
@@ -134,20 +134,20 @@ def bertForNextSentencePrediction(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
segments_tensors = torch.tensor([segments_ids])
# Load bertForNextSentencePrediction
# Load bertForNextSentencePrediction
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased')
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased')
>>>
model.eval()
model.eval()
# Predict the next sentence classification logits
# Predict the next sentence classification logits
>>>
with torch.no_grad():
with torch.no_grad():
next_sent_classif_logits = model(tokens_tensor, segments_tensors)
next_sent_classif_logits = model(tokens_tensor, segments_tensors)
"""
"""
model
=
BertForNextSentencePrediction
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
BertForNextSentencePrediction
.
from_pretrained
(
*
args
,
**
kwargs
)
...
@@ -164,17 +164,17 @@ def bertForPreTraining(*args, **kwargs):
...
@@ -164,17 +164,17 @@ def bertForPreTraining(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
segments_tensors = torch.tensor([segments_ids])
# Load bertForPreTraining
# Load bertForPreTraining
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased')
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased')
>>>
masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors)
masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors)
"""
"""
model
=
BertForPreTraining
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
BertForPreTraining
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
...
@@ -188,25 +188,25 @@ def bertForMaskedLM(*args, **kwargs):
...
@@ -188,25 +188,25 @@ def bertForMaskedLM(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
masked_index = 8
masked_index = 8
>>>
tokenized_text[masked_index] = '[MASK]'
tokenized_text[masked_index] = '[MASK]'
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
segments_tensors = torch.tensor([segments_ids])
# Load bertForMaskedLM
# Load bertForMaskedLM
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased')
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased')
>>>
model.eval()
model.eval()
# Predict all tokens
# Predict all tokens
>>>
with torch.no_grad():
with torch.no_grad():
predictions = model(tokens_tensor, segments_tensors)
predictions = model(tokens_tensor, segments_tensors)
>>>
predicted_index = torch.argmax(predictions[0, masked_index]).item()
predicted_index = torch.argmax(predictions[0, masked_index]).item()
>>>
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'henson'
'henson'
"""
"""
model
=
BertForMaskedLM
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
BertForMaskedLM
.
from_pretrained
(
*
args
,
**
kwargs
)
...
@@ -230,24 +230,24 @@ def bertForSequenceClassification(*args, **kwargs):
...
@@ -230,24 +230,24 @@ def bertForSequenceClassification(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
segments_tensors = torch.tensor([segments_ids])
# Load bertForSequenceClassification
# Load bertForSequenceClassification
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2)
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2)
>>>
model.eval()
model.eval()
# Predict the sequence classification logits
# Predict the sequence classification logits
>>>
with torch.no_grad():
with torch.no_grad():
seq_classif_logits = model(tokens_tensor, segments_tensors)
seq_classif_logits = model(tokens_tensor, segments_tensors)
# Or get the sequence classification loss
# Or get the sequence classification loss
>>>
labels = torch.tensor([1])
labels = torch.tensor([1])
>>>
seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
"""
model
=
BertForSequenceClassification
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
BertForSequenceClassification
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
...
@@ -265,24 +265,24 @@ def bertForMultipleChoice(*args, **kwargs):
...
@@ -265,24 +265,24 @@ def bertForMultipleChoice(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0)
tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0)
>>>
segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0)
segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0)
# Load bertForMultipleChoice
# Load bertForMultipleChoice
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2)
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2)
>>>
model.eval()
model.eval()
# Predict the multiple choice logits
# Predict the multiple choice logits
>>>
with torch.no_grad():
with torch.no_grad():
multiple_choice_logits = model(tokens_tensor, segments_tensors)
multiple_choice_logits = model(tokens_tensor, segments_tensors)
# Or get the multiple choice loss
# Or get the multiple choice loss
>>>
labels = torch.tensor([1])
labels = torch.tensor([1])
>>>
multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
"""
model
=
BertForMultipleChoice
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
BertForMultipleChoice
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
...
@@ -298,25 +298,25 @@ def bertForQuestionAnswering(*args, **kwargs):
...
@@ -298,25 +298,25 @@ def bertForQuestionAnswering(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
segments_tensors = torch.tensor([segments_ids])
# Load bertForQuestionAnswering
# Load bertForQuestionAnswering
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased')
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased')
>>>
model.eval()
model.eval()
# Predict the start and end positions logits
# Predict the start and end positions logits
>>>
with torch.no_grad():
with torch.no_grad():
start_logits, end_logits = model(tokens_tensor, segments_tensors)
start_logits, end_logits = model(tokens_tensor, segments_tensors)
# Or get the total loss which is the sum of the CrossEntropy loss for the start and end token positions
# Or get the total loss which is the sum of the CrossEntropy loss for the start and end token positions
>>>
start_positions, end_positions = torch.tensor([12]), torch.tensor([14])
start_positions, end_positions = torch.tensor([12]), torch.tensor([14])
# set model.train() before if training this loss
# set model.train() before if training this loss
>>>
multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions)
multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions)
"""
"""
model
=
BertForQuestionAnswering
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
BertForQuestionAnswering
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
...
@@ -337,24 +337,24 @@ def bertForTokenClassification(*args, **kwargs):
...
@@ -337,24 +337,24 @@ def bertForTokenClassification(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
segments_tensors = torch.tensor([segments_ids])
# Load bertForTokenClassification
# Load bertForTokenClassification
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2)
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2)
>>>
model.eval()
model.eval()
# Predict the token classification logits
# Predict the token classification logits
>>>
with torch.no_grad():
with torch.no_grad():
classif_logits = model(tokens_tensor, segments_tensors)
classif_logits = model(tokens_tensor, segments_tensors)
# Or get the token classification loss
# Or get the token classification loss
>>>
labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]])
labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]])
>>>
classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
"""
model
=
BertForTokenClassification
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
BertForTokenClassification
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
hubconfs/gpt2_hubconf.py
View file @
0a2fecdf
...
@@ -52,11 +52,11 @@ def gpt2Tokenizer(*args, **kwargs):
...
@@ -52,11 +52,11 @@ def gpt2Tokenizer(*args, **kwargs):
Default: None
Default: None
Example:
Example:
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
>>>
text = "Who was Jim Henson ?"
text = "Who was Jim Henson ?"
>>>
indexed_tokens = tokenizer.encode(tokenized_text)
indexed_tokens = tokenizer.encode(tokenized_text)
"""
"""
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
return
tokenizer
...
@@ -71,24 +71,24 @@ def gpt2Model(*args, **kwargs):
...
@@ -71,24 +71,24 @@ def gpt2Model(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2Model
# Load gpt2Model
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2')
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions
# past can be used to reuse precomputed hidden state in a subsequent predictions
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, past = model(tokens_tensor_1)
hidden_states_1, past = model(tokens_tensor_1)
hidden_states_2, past = model(tokens_tensor_2, past=past)
hidden_states_2, past = model(tokens_tensor_2, past=past)
"""
"""
...
@@ -104,31 +104,31 @@ def gpt2LMHeadModel(*args, **kwargs):
...
@@ -104,31 +104,31 @@ def gpt2LMHeadModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2LMHeadModel
# Load gpt2LMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2')
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions
# past can be used to reuse precomputed hidden state in a subsequent predictions
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, past = model(tokens_tensor_1)
predictions_1, past = model(tokens_tensor_1)
predictions_2, past = model(tokens_tensor_2, past=past)
predictions_2, past = model(tokens_tensor_2, past=past)
# Get the predicted last token
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.decode([predicted_index])
predicted_token = tokenizer.decode([predicted_index])
>>>
assert predicted_token == ' who'
assert predicted_token == ' who'
"""
"""
model
=
GPT2LMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
GPT2LMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
...
@@ -143,25 +143,25 @@ def gpt2DoubleHeadsModel(*args, **kwargs):
...
@@ -143,25 +143,25 @@ def gpt2DoubleHeadsModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
# Prepare tokenized input
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>>
tokenized_text1 = tokenizer.tokenize(text1)
tokenized_text1 = tokenizer.tokenize(text1)
>>>
tokenized_text2 = tokenizer.tokenize(text2)
tokenized_text2 = tokenizer.tokenize(text2)
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load gpt2DoubleHeadsModel
# Load gpt2DoubleHeadsModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2')
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
lm_logits, multiple_choice_logits, presents = model(tokens_tensor, mc_token_ids)
lm_logits, multiple_choice_logits, presents = model(tokens_tensor, mc_token_ids)
"""
"""
model
=
GPT2DoubleHeadsModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
GPT2DoubleHeadsModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
hubconfs/gpt_hubconf.py
View file @
0a2fecdf
...
@@ -40,7 +40,7 @@ gpt_docstring = """
...
@@ -40,7 +40,7 @@ gpt_docstring = """
. a series of NumPy files containing OpenAI TensorFlow trained weights
. a series of NumPy files containing OpenAI TensorFlow trained weights
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state diction
n
ary (collections.OrderedDict object)
state_dict: an optional state dictionary (collections.OrderedDict object)
to use instead of pre-trained models
to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific OpenAI-GPT class
*inputs, **kwargs: additional input for the specific OpenAI-GPT class
"""
"""
...
@@ -76,12 +76,12 @@ def openAIGPTTokenizer(*args, **kwargs):
...
@@ -76,12 +76,12 @@ def openAIGPTTokenizer(*args, **kwargs):
Default: None
Default: None
Example:
Example:
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
>>>
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
[763, 509, 4265, 2298, 945, 257, 4265, 2298, 945, 509, 246, 10148, 39041, 483]
[763, 509, 4265, 2298, 945, 257, 4265, 2298, 945, 509, 246, 10148, 39041, 483]
"""
"""
tokenizer
=
OpenAIGPTTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
tokenizer
=
OpenAIGPTTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
...
@@ -97,21 +97,21 @@ def openAIGPTModel(*args, **kwargs):
...
@@ -97,21 +97,21 @@ def openAIGPTModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
# Load openAIGPTModel
# Load openAIGPTModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt')
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states = model(tokens_tensor)
hidden_states = model(tokens_tensor)
"""
"""
model
=
OpenAIGPTModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
OpenAIGPTModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
@@ -126,26 +126,26 @@ def openAIGPTLMHeadModel(*args, **kwargs):
...
@@ -126,26 +126,26 @@ def openAIGPTLMHeadModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
# Load openAIGPTLMHeadModel
# Load openAIGPTLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt')
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
predictions = model(tokens_tensor)
predictions = model(tokens_tensor)
# Get the predicted last token
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions[0, -1, :]).item()
predicted_index = torch.argmax(predictions[0, -1, :]).item()
>>>
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'.</w>'
'.</w>'
"""
"""
model
=
OpenAIGPTLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
OpenAIGPTLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
@@ -161,25 +161,25 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs):
...
@@ -161,25 +161,25 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input
# Prepare tokenized input
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>>
tokenized_text1 = tokenizer.tokenize(text1)
tokenized_text1 = tokenizer.tokenize(text1)
>>>
tokenized_text2 = tokenizer.tokenize(text2)
tokenized_text2 = tokenizer.tokenize(text2)
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load openAIGPTDoubleHeadsModel
# Load openAIGPTDoubleHeadsModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt')
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
lm_logits, multiple_choice_logits = model(tokens_tensor, mc_token_ids)
lm_logits, multiple_choice_logits = model(tokens_tensor, mc_token_ids)
"""
"""
model
=
OpenAIGPTDoubleHeadsModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
OpenAIGPTDoubleHeadsModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
hubconfs/transformer_xl_hubconf.py
View file @
0a2fecdf
...
@@ -23,7 +23,7 @@ transformer_xl_docstring = """
...
@@ -23,7 +23,7 @@ transformer_xl_docstring = """
. `model.chkpt` a TensorFlow checkpoint
. `model.chkpt` a TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state diction
n
ary (collections.OrderedDict object) to use instead of pre-trained models
state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific TransformerXL class
*inputs, **kwargs: additional input for the specific TransformerXL class
"""
"""
...
@@ -45,12 +45,12 @@ def transformerXLTokenizer(*args, **kwargs):
...
@@ -45,12 +45,12 @@ def transformerXLTokenizer(*args, **kwargs):
* transfo-xl-wt103
* transfo-xl-wt103
Example:
Example:
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
>>>
text = "Who was Jim Henson ?"
text = "Who was Jim Henson ?"
>>>
tokenized_text = tokenizer.tokenize(tokenized_text)
tokenized_text = tokenizer.tokenize(tokenized_text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
"""
"""
tokenizer
=
TransfoXLTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
tokenizer
=
TransfoXLTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
return
tokenizer
...
@@ -63,26 +63,26 @@ def transformerXLModel(*args, **kwargs):
...
@@ -63,26 +63,26 @@ def transformerXLModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
text_2 = "Jim Henson was a puppeteer"
>>>
tokenized_text_1 = tokenizer.tokenize(text_1)
tokenized_text_1 = tokenizer.tokenize(text_1)
>>>
tokenized_text_2 = tokenizer.tokenize(text_2)
tokenized_text_2 = tokenizer.tokenize(text_2)
>>>
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>>
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLModel
# Load transformerXLModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103')
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context
# We can re-use the memory cells in a subsequent call to attend a longer context
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, mems_1 = model(tokens_tensor_1)
hidden_states_1, mems_1 = model(tokens_tensor_1)
hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
"""
"""
...
@@ -98,33 +98,33 @@ def transformerXLLMHeadModel(*args, **kwargs):
...
@@ -98,33 +98,33 @@ def transformerXLLMHeadModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
text_2 = "Jim Henson was a puppeteer"
>>>
tokenized_text_1 = tokenizer.tokenize(text_1)
tokenized_text_1 = tokenizer.tokenize(text_1)
>>>
tokenized_text_2 = tokenizer.tokenize(text_2)
tokenized_text_2 = tokenizer.tokenize(text_2)
>>>
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>>
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLLMHeadModel
# Load transformerXLLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103')
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context
# We can re-use the memory cells in a subsequent call to attend a longer context
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, mems_1 = model(tokens_tensor_1)
predictions_1, mems_1 = model(tokens_tensor_1)
predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
# Get the predicted last token
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
>>>
assert predicted_token == 'who'
assert predicted_token == 'who'
"""
"""
model
=
TransfoXLLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
TransfoXLLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
hubconfs/xlm_hubconf.py
View file @
0a2fecdf
...
@@ -17,16 +17,16 @@ xlm_start_docstring = """
...
@@ -17,16 +17,16 @@ xlm_start_docstring = """
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
# Prepare tokenized input
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
"""
"""
# A lot of models share the same param doc. Use a decorator
# A lot of models share the same param doc. Use a decorator
...
@@ -76,11 +76,11 @@ def xlmTokenizer(*args, **kwargs):
...
@@ -76,11 +76,11 @@ def xlmTokenizer(*args, **kwargs):
Default: None
Default: None
Example:
Example:
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
>>>
text = "Who was Jim Henson ?"
text = "Who was Jim Henson ?"
>>>
indexed_tokens = tokenizer.encode(tokenized_text)
indexed_tokens = tokenizer.encode(tokenized_text)
"""
"""
tokenizer
=
XLMTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
tokenizer
=
XLMTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
return
tokenizer
...
@@ -91,11 +91,11 @@ def xlmTokenizer(*args, **kwargs):
...
@@ -91,11 +91,11 @@ def xlmTokenizer(*args, **kwargs):
def
xlmModel
(
*
args
,
**
kwargs
):
def
xlmModel
(
*
args
,
**
kwargs
):
"""
"""
# Load xlmModel
# Load xlmModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048')
model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_2, mems = model(tokens_tensor_2, past=mems)
hidden_states_2, mems = model(tokens_tensor_2, past=mems)
"""
"""
...
@@ -108,26 +108,26 @@ def xlmModel(*args, **kwargs):
...
@@ -108,26 +108,26 @@ def xlmModel(*args, **kwargs):
def
xlmLMHeadModel
(
*
args
,
**
kwargs
):
def
xlmLMHeadModel
(
*
args
,
**
kwargs
):
"""
"""
# Prepare tokenized input
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetLMHeadModel
# Load xlnetLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048')
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, mems = model(tokens_tensor_1)
predictions_1, mems = model(tokens_tensor_1)
predictions_2, mems = model(tokens_tensor_2, mems=mems)
predictions_2, mems = model(tokens_tensor_2, mems=mems)
# Get the predicted last token
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.decode([predicted_index])
predicted_token = tokenizer.decode([predicted_index])
>>>
assert predicted_token == ' who'
assert predicted_token == ' who'
"""
"""
model
=
XLMWithLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
XLMWithLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
...
@@ -142,25 +142,25 @@ def xlmLMHeadModel(*args, **kwargs):
...
@@ -142,25 +142,25 @@ def xlmLMHeadModel(*args, **kwargs):
# Example:
# Example:
# # Load the tokenizer
# # Load the tokenizer
#
>>>
import torch
# import torch
#
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048')
# tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048')
# # Prepare tokenized input
# # Prepare tokenized input
#
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
# text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
#
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
# text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
#
>>>
tokenized_text1 = tokenizer.tokenize(text1)
# tokenized_text1 = tokenizer.tokenize(text1)
#
>>>
tokenized_text2 = tokenizer.tokenize(text2)
# tokenized_text2 = tokenizer.tokenize(text2)
#
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
# indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
#
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
# indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
#
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
# tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
#
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# # Load xlnetForSequenceClassification
# # Load xlnetForSequenceClassification
#
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048')
# model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048')
#
>>>
model.eval()
# model.eval()
# # Predict sequence classes logits
# # Predict sequence classes logits
#
>>>
with torch.no_grad():
# with torch.no_grad():
# lm_logits, mems = model(tokens_tensor)
# lm_logits, mems = model(tokens_tensor)
# """
# """
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
...
...
hubconfs/xlnet_hubconf.1.py
View file @
0a2fecdf
...
@@ -53,11 +53,11 @@ def xlnetTokenizer(*args, **kwargs):
...
@@ -53,11 +53,11 @@ def xlnetTokenizer(*args, **kwargs):
Default: None
Default: None
Example:
Example:
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
>>>
text = "Who was Jim Henson ?"
text = "Who was Jim Henson ?"
>>>
indexed_tokens = tokenizer.encode(tokenized_text)
indexed_tokens = tokenizer.encode(tokenized_text)
"""
"""
tokenizer
=
XLNetTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
tokenizer
=
XLNetTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
return
tokenizer
...
@@ -72,23 +72,23 @@ def xlnetModel(*args, **kwargs):
...
@@ -72,23 +72,23 @@ def xlnetModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# Prepare tokenized input
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetModel
# Load xlnetModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased')
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_2, mems = model(tokens_tensor_2, past=mems)
hidden_states_2, mems = model(tokens_tensor_2, past=mems)
"""
"""
...
@@ -106,30 +106,30 @@ def xlnetLMHeadModel(*args, **kwargs):
...
@@ -106,30 +106,30 @@ def xlnetLMHeadModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# Prepare tokenized input
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetLMHeadModel
# Load xlnetLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased')
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, mems = model(tokens_tensor_1)
predictions_1, mems = model(tokens_tensor_1)
predictions_2, mems = model(tokens_tensor_2, mems=mems)
predictions_2, mems = model(tokens_tensor_2, mems=mems)
# Get the predicted last token
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.decode([predicted_index])
predicted_token = tokenizer.decode([predicted_index])
>>>
assert predicted_token == ' who'
assert predicted_token == ' who'
"""
"""
model
=
XLNetLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
XLNetLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
...
@@ -144,25 +144,25 @@ def xlnetLMHeadModel(*args, **kwargs):
...
@@ -144,25 +144,25 @@ def xlnetLMHeadModel(*args, **kwargs):
# Example:
# Example:
# # Load the tokenizer
# # Load the tokenizer
#
>>>
import torch
# import torch
#
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# # Prepare tokenized input
# # Prepare tokenized input
#
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
# text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
#
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
# text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
#
>>>
tokenized_text1 = tokenizer.tokenize(text1)
# tokenized_text1 = tokenizer.tokenize(text1)
#
>>>
tokenized_text2 = tokenizer.tokenize(text2)
# tokenized_text2 = tokenizer.tokenize(text2)
#
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
# indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
#
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
# indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
#
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
# tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
#
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# # Load xlnetForSequenceClassification
# # Load xlnetForSequenceClassification
#
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased')
# model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased')
#
>>>
model.eval()
# model.eval()
# # Predict sequence classes logits
# # Predict sequence classes logits
#
>>>
with torch.no_grad():
# with torch.no_grad():
# lm_logits, mems = model(tokens_tensor)
# lm_logits, mems = model(tokens_tensor)
# """
# """
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
...
...
pytorch_transformers/__init__.py
View file @
0a2fecdf
__version__
=
"1.0.0"
__version__
=
"1.1.0"
from
.tokenization_auto
import
AutoTokenizer
from
.tokenization_bert
import
BertTokenizer
,
BasicTokenizer
,
WordpieceTokenizer
from
.tokenization_bert
import
BertTokenizer
,
BasicTokenizer
,
WordpieceTokenizer
from
.tokenization_openai
import
OpenAIGPTTokenizer
from
.tokenization_openai
import
OpenAIGPTTokenizer
from
.tokenization_transfo_xl
import
(
TransfoXLTokenizer
,
TransfoXLCorpus
)
from
.tokenization_transfo_xl
import
(
TransfoXLTokenizer
,
TransfoXLCorpus
)
from
.tokenization_gpt2
import
GPT2Tokenizer
from
.tokenization_gpt2
import
GPT2Tokenizer
from
.tokenization_xlnet
import
XLNetTokenizer
,
SPIECE_UNDERLINE
from
.tokenization_xlnet
import
XLNetTokenizer
,
SPIECE_UNDERLINE
from
.tokenization_xlm
import
XLMTokenizer
from
.tokenization_xlm
import
XLMTokenizer
from
.tokenization_utils
import
(
PreTrainedTokenizer
,
clean_up_tokenization
)
from
.tokenization_roberta
import
RobertaTokenizer
from
.tokenization_distilbert
import
DistilBertTokenizer
from
.modeling_bert
import
(
BertConfig
,
BertModel
,
BertForPreTraining
,
from
.tokenization_utils
import
(
PreTrainedTokenizer
)
BertForMaskedLM
,
BertForNextSentencePrediction
,
BertForSequenceClassification
,
BertForMultipleChoice
,
from
.modeling_auto
import
(
AutoConfig
,
AutoModel
)
BertForTokenClassification
,
BertForQuestionAnswering
,
load_tf_weights_in_bert
,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
,
from
.modeling_bert
import
(
BertConfig
,
BertPreTrainedModel
,
BertModel
,
BertForPreTraining
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
)
BertForMaskedLM
,
BertForNextSentencePrediction
,
from
.modeling_openai
import
(
OpenAIGPTConfig
,
OpenAIGPTModel
,
BertForSequenceClassification
,
BertForMultipleChoice
,
BertForTokenClassification
,
BertForQuestionAnswering
,
load_tf_weights_in_bert
,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
)
from
.modeling_openai
import
(
OpenAIGPTConfig
,
OpenAIGPTPreTrainedModel
,
OpenAIGPTModel
,
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
,
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
,
load_tf_weights_in_openai_gpt
,
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
load_tf_weights_in_openai_gpt
,
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
)
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_transfo_xl
import
(
TransfoXLConfig
,
TransfoXLModel
,
TransfoXLLMHeadModel
,
from
.modeling_transfo_xl
import
(
TransfoXLConfig
,
TransfoXLPreTrainedModel
,
TransfoXLModel
,
TransfoXLLMHeadModel
,
load_tf_weights_in_transfo_xl
,
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
,
load_tf_weights_in_transfo_xl
,
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
,
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
)
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_gpt2
import
(
GPT2Config
,
GPT2Model
,
from
.modeling_gpt2
import
(
GPT2Config
,
GPT2PreTrainedModel
,
GPT2Model
,
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
,
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
,
load_tf_weights_in_gpt2
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
,
load_tf_weights_in_gpt2
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
)
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
)
...
@@ -29,14 +35,19 @@ from .modeling_xlnet import (XLNetConfig,
...
@@ -29,14 +35,19 @@ from .modeling_xlnet import (XLNetConfig,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
,
load_tf_weights_in_xlnet
,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
,
load_tf_weights_in_xlnet
,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
)
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_xlm
import
(
XLMConfig
,
XLMModel
,
from
.modeling_xlm
import
(
XLMConfig
,
XLMPreTrainedModel
,
XLMModel
,
XLMWithLMHeadModel
,
XLMForSequenceClassification
,
XLMWithLMHeadModel
,
XLMForSequenceClassification
,
XLMForQuestionAnswering
,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLMForQuestionAnswering
,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
)
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_roberta
import
(
RobertaConfig
,
RobertaForMaskedLM
,
RobertaModel
,
RobertaForSequenceClassification
,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
,
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_distilbert
import
(
DistilBertConfig
,
DistilBertForMaskedLM
,
DistilBertModel
,
DistilBertForSequenceClassification
,
DistilBertForQuestionAnswering
,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_utils
import
(
WEIGHTS_NAME
,
CONFIG_NAME
,
TF_WEIGHTS_NAME
,
from
.modeling_utils
import
(
WEIGHTS_NAME
,
CONFIG_NAME
,
TF_WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_layer
,
Conv1D
)
PretrainedConfig
,
PreTrainedModel
,
prune_layer
,
Conv1D
)
from
.optimization
import
(
AdamW
,
ConstantLRSchedule
,
WarmupConstantSchedule
,
WarmupCosineSchedule
,
from
.optimization
import
(
AdamW
,
ConstantLRSchedule
,
WarmupConstantSchedule
,
WarmupCosineSchedule
,
WarmupCosineWithHardRestartsSchedule
,
WarmupLinearSchedule
)
WarmupCosineWithHardRestartsSchedule
,
WarmupLinearSchedule
)
from
.file_utils
import
(
PYTORCH_PRETRAINED_BERT_CACHE
,
cached_path
)
from
.file_utils
import
(
PYTORCH_TRANSFORMERS_CACHE
,
PYTORCH_PRETRAINED_BERT_CACHE
,
cached_path
)
pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
@@ -35,7 +35,7 @@ def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config_file, p
...
@@ -35,7 +35,7 @@ def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config_file, p
if
gpt2_config_file
==
""
:
if
gpt2_config_file
==
""
:
config
=
GPT2Config
()
config
=
GPT2Config
()
else
:
else
:
config
=
GPT2Config
(
gpt2_config_file
)
config
=
GPT2Config
.
from_json_file
(
gpt2_config_file
)
model
=
GPT2Model
(
config
)
model
=
GPT2Model
(
config
)
# Load weights from numpy
# Load weights from numpy
...
@@ -58,7 +58,7 @@ if __name__ == "__main__":
...
@@ -58,7 +58,7 @@ if __name__ == "__main__":
default
=
None
,
default
=
None
,
type
=
str
,
type
=
str
,
required
=
True
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--pytorch_dump_folder_path"
,
parser
.
add_argument
(
"--pytorch_dump_folder_path"
,
default
=
None
,
default
=
None
,
type
=
str
,
type
=
str
,
...
...
pytorch_transformers/convert_openai_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
@@ -35,7 +35,7 @@ def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, openai_c
...
@@ -35,7 +35,7 @@ def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, openai_c
if
openai_config_file
==
""
:
if
openai_config_file
==
""
:
config
=
OpenAIGPTConfig
()
config
=
OpenAIGPTConfig
()
else
:
else
:
config
=
OpenAIGPTConfig
(
openai_config_file
)
config
=
OpenAIGPTConfig
.
from_json_file
(
openai_config_file
)
model
=
OpenAIGPTModel
(
config
)
model
=
OpenAIGPTModel
(
config
)
# Load weights from numpy
# Load weights from numpy
...
@@ -58,7 +58,7 @@ if __name__ == "__main__":
...
@@ -58,7 +58,7 @@ if __name__ == "__main__":
default
=
None
,
default
=
None
,
type
=
str
,
type
=
str
,
required
=
True
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--pytorch_dump_folder_path"
,
parser
.
add_argument
(
"--pytorch_dump_folder_path"
,
default
=
None
,
default
=
None
,
type
=
str
,
type
=
str
,
...
...
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
View file @
0a2fecdf
...
@@ -20,7 +20,7 @@ import argparse
...
@@ -20,7 +20,7 @@ import argparse
import
torch
import
torch
import
numpy
as
np
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
pytorch_
pretrained_b
er
t
.modeling
import
BertModel
from
pytorch_
transform
er
s
.modeling
import
BertModel
def
convert_pytorch_checkpoint_to_tf
(
model
:
BertModel
,
ckpt_dir
:
str
,
model_name
:
str
):
def
convert_pytorch_checkpoint_to_tf
(
model
:
BertModel
,
ckpt_dir
:
str
,
model_name
:
str
):
...
@@ -41,7 +41,7 @@ def convert_pytorch_checkpoint_to_tf(model:BertModel, ckpt_dir:str, model_name:s
...
@@ -41,7 +41,7 @@ def convert_pytorch_checkpoint_to_tf(model:BertModel, ckpt_dir:str, model_name:s
N BertForQuestionAnswering
N BertForQuestionAnswering
"""
"""
tensors_to_trans
o
pse
=
(
tensors_to_transp
o
se
=
(
"dense.weight"
,
"dense.weight"
,
"attention.self.query"
,
"attention.self.query"
,
"attention.self.key"
,
"attention.self.key"
,
...
@@ -62,34 +62,34 @@ def convert_pytorch_checkpoint_to_tf(model:BertModel, ckpt_dir:str, model_name:s
...
@@ -62,34 +62,34 @@ def convert_pytorch_checkpoint_to_tf(model:BertModel, ckpt_dir:str, model_name:s
if
not
os
.
path
.
isdir
(
ckpt_dir
):
if
not
os
.
path
.
isdir
(
ckpt_dir
):
os
.
makedirs
(
ckpt_dir
)
os
.
makedirs
(
ckpt_dir
)
session
=
tf
.
Session
()
state_dict
=
model
.
state_dict
()
state_dict
=
model
.
state_dict
()
tf_vars
=
[]
def
to_tf_var_name
(
name
:
str
):
def
to_tf_var_name
(
name
:
str
):
for
patt
,
repl
in
iter
(
var_map
):
for
patt
,
repl
in
iter
(
var_map
):
name
=
name
.
replace
(
patt
,
repl
)
name
=
name
.
replace
(
patt
,
repl
)
return
'bert/{}'
.
format
(
name
)
return
'bert/{}'
.
format
(
name
)
def
assign_tf_var
(
tensor
:
np
.
ndarray
,
name
:
str
):
def
create_tf_var
(
tensor
:
np
.
ndarray
,
name
:
str
,
session
:
tf
.
Session
):
tmp_var
=
tf
.
Variable
(
initial_value
=
tensor
)
tf_dtype
=
tf
.
dtypes
.
as_dtype
(
tensor
.
dtype
)
tf_var
=
tf
.
get_variable
(
dtype
=
tmp_var
.
dtype
,
shape
=
tmp_var
.
shape
,
name
=
name
)
tf_var
=
tf
.
get_variable
(
dtype
=
tf_dtype
,
shape
=
tensor
.
shape
,
name
=
name
,
initializer
=
tf
.
zeros_initializer
())
op
=
tf
.
assign
(
ref
=
tf_var
,
value
=
tmp_var
)
session
.
run
(
tf
.
variables_initializer
([
tf_var
]))
session
.
run
(
tf
.
variables_initializer
([
tmp_var
,
tf_var
]))
session
.
run
(
tf_var
)
session
.
run
(
fetches
=
[
op
,
tf_var
])
return
tf_var
return
tf_var
for
var_name
in
state_dict
:
tf
.
reset_default_graph
()
tf_name
=
to_tf_var_name
(
var_name
)
with
tf
.
Session
()
as
session
:
torch_tensor
=
state_dict
[
var_name
].
numpy
()
for
var_name
in
state_dict
:
if
any
([
x
in
var_name
for
x
in
tensors_to_transopse
]):
tf_name
=
to_tf_var_name
(
var_name
)
torch_tensor
=
torch_tensor
.
T
torch_tensor
=
state_dict
[
var_name
].
numpy
()
tf_tensor
=
assign_tf_var
(
tensor
=
torch_tensor
,
name
=
tf_name
)
if
any
([
x
in
var_name
for
x
in
tensors_to_transpose
]):
tf_vars
.
append
(
tf_tensor
)
torch_tensor
=
torch_tensor
.
T
print
(
"{0}{1}initialized"
.
format
(
tf_name
,
" "
*
(
60
-
len
(
tf_name
))))
tf_var
=
create_tf_var
(
tensor
=
torch_tensor
,
name
=
tf_name
,
session
=
session
)
tf
.
keras
.
backend
.
set_value
(
tf_var
,
torch_tensor
)
saver
=
tf
.
train
.
Saver
(
tf_vars
)
tf_weight
=
session
.
run
(
tf_var
)
saver
.
save
(
session
,
os
.
path
.
join
(
ckpt_dir
,
model_name
.
replace
(
"-"
,
"_"
)
+
".ckpt"
))
print
(
"Successfully created {}: {}"
.
format
(
tf_name
,
np
.
allclose
(
tf_weight
,
torch_tensor
)))
saver
=
tf
.
train
.
Saver
(
tf
.
trainable_variables
())
saver
.
save
(
session
,
os
.
path
.
join
(
ckpt_dir
,
model_name
.
replace
(
"-"
,
"_"
)
+
".ckpt"
))
def
main
(
raw_args
=
None
):
def
main
(
raw_args
=
None
):
...
...
pytorch_transformers/convert_roberta_checkpoint_to_pytorch.py
0 → 100644
View file @
0a2fecdf
# coding=utf-8
# Copyright 2018 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Convert RoBERTa checkpoint."""
from
__future__
import
absolute_import
,
division
,
print_function
import
argparse
import
logging
import
numpy
as
np
import
torch
from
fairseq.models.roberta
import
RobertaModel
as
FairseqRobertaModel
from
fairseq.modules
import
TransformerSentenceEncoderLayer
from
pytorch_transformers.modeling_bert
import
(
BertConfig
,
BertEncoder
,
BertIntermediate
,
BertLayer
,
BertModel
,
BertOutput
,
BertSelfAttention
,
BertSelfOutput
)
from
pytorch_transformers.modeling_roberta
import
(
RobertaEmbeddings
,
RobertaForMaskedLM
,
RobertaForSequenceClassification
,
RobertaModel
)
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logger
=
logging
.
getLogger
(
__name__
)
SAMPLE_TEXT
=
'Hello world! cécé herlolip'
def
convert_roberta_checkpoint_to_pytorch
(
roberta_checkpoint_path
,
pytorch_dump_folder_path
,
classification_head
):
"""
Copy/paste/tweak roberta's weights to our BERT structure.
"""
roberta
=
FairseqRobertaModel
.
from_pretrained
(
roberta_checkpoint_path
)
roberta
.
eval
()
# disable dropout
config
=
BertConfig
(
vocab_size_or_config_json_file
=
50265
,
hidden_size
=
roberta
.
args
.
encoder_embed_dim
,
num_hidden_layers
=
roberta
.
args
.
encoder_layers
,
num_attention_heads
=
roberta
.
args
.
encoder_attention_heads
,
intermediate_size
=
roberta
.
args
.
encoder_ffn_embed_dim
,
max_position_embeddings
=
514
,
type_vocab_size
=
1
,
)
if
classification_head
:
config
.
num_labels
=
roberta
.
args
.
num_classes
print
(
"Our BERT config:"
,
config
)
model
=
RobertaForSequenceClassification
(
config
)
if
classification_head
else
RobertaForMaskedLM
(
config
)
model
.
eval
()
# Now let's copy all the weights.
# Embeddings
roberta_sent_encoder
=
roberta
.
model
.
decoder
.
sentence_encoder
model
.
roberta
.
embeddings
.
word_embeddings
.
weight
=
roberta_sent_encoder
.
embed_tokens
.
weight
model
.
roberta
.
embeddings
.
position_embeddings
.
weight
=
roberta_sent_encoder
.
embed_positions
.
weight
model
.
roberta
.
embeddings
.
token_type_embeddings
.
weight
.
data
=
torch
.
zeros_like
(
model
.
roberta
.
embeddings
.
token_type_embeddings
.
weight
)
# just zero them out b/c RoBERTa doesn't use them.
model
.
roberta
.
embeddings
.
LayerNorm
.
weight
=
roberta_sent_encoder
.
emb_layer_norm
.
weight
model
.
roberta
.
embeddings
.
LayerNorm
.
bias
=
roberta_sent_encoder
.
emb_layer_norm
.
bias
model
.
roberta
.
embeddings
.
LayerNorm
.
variance_epsilon
=
roberta_sent_encoder
.
emb_layer_norm
.
eps
for
i
in
range
(
config
.
num_hidden_layers
):
# Encoder: start of layer
layer
:
BertLayer
=
model
.
roberta
.
encoder
.
layer
[
i
]
roberta_layer
:
TransformerSentenceEncoderLayer
=
roberta_sent_encoder
.
layers
[
i
]
### self attention
self_attn
:
BertSelfAttention
=
layer
.
attention
.
self
assert
(
roberta_layer
.
self_attn
.
in_proj_weight
.
shape
==
torch
.
Size
((
3
*
config
.
hidden_size
,
config
.
hidden_size
))
)
# we use three distinct linear layers so we split the source layer here.
self_attn
.
query
.
weight
.
data
=
roberta_layer
.
self_attn
.
in_proj_weight
[:
config
.
hidden_size
,
:]
self_attn
.
query
.
bias
.
data
=
roberta_layer
.
self_attn
.
in_proj_bias
[:
config
.
hidden_size
]
self_attn
.
key
.
weight
.
data
=
roberta_layer
.
self_attn
.
in_proj_weight
[
config
.
hidden_size
:
2
*
config
.
hidden_size
,
:]
self_attn
.
key
.
bias
.
data
=
roberta_layer
.
self_attn
.
in_proj_bias
[
config
.
hidden_size
:
2
*
config
.
hidden_size
]
self_attn
.
value
.
weight
.
data
=
roberta_layer
.
self_attn
.
in_proj_weight
[
2
*
config
.
hidden_size
:,
:]
self_attn
.
value
.
bias
.
data
=
roberta_layer
.
self_attn
.
in_proj_bias
[
2
*
config
.
hidden_size
:]
### self-attention output
self_output
:
BertSelfOutput
=
layer
.
attention
.
output
assert
(
self_output
.
dense
.
weight
.
shape
==
roberta_layer
.
self_attn
.
out_proj
.
weight
.
shape
)
self_output
.
dense
.
weight
=
roberta_layer
.
self_attn
.
out_proj
.
weight
self_output
.
dense
.
bias
=
roberta_layer
.
self_attn
.
out_proj
.
bias
self_output
.
LayerNorm
.
weight
=
roberta_layer
.
self_attn_layer_norm
.
weight
self_output
.
LayerNorm
.
bias
=
roberta_layer
.
self_attn_layer_norm
.
bias
self_output
.
LayerNorm
.
variance_epsilon
=
roberta_layer
.
self_attn_layer_norm
.
eps
### intermediate
intermediate
:
BertIntermediate
=
layer
.
intermediate
assert
(
intermediate
.
dense
.
weight
.
shape
==
roberta_layer
.
fc1
.
weight
.
shape
)
intermediate
.
dense
.
weight
=
roberta_layer
.
fc1
.
weight
intermediate
.
dense
.
bias
=
roberta_layer
.
fc1
.
bias
### output
bert_output
:
BertOutput
=
layer
.
output
assert
(
bert_output
.
dense
.
weight
.
shape
==
roberta_layer
.
fc2
.
weight
.
shape
)
bert_output
.
dense
.
weight
=
roberta_layer
.
fc2
.
weight
bert_output
.
dense
.
bias
=
roberta_layer
.
fc2
.
bias
bert_output
.
LayerNorm
.
weight
=
roberta_layer
.
final_layer_norm
.
weight
bert_output
.
LayerNorm
.
bias
=
roberta_layer
.
final_layer_norm
.
bias
bert_output
.
LayerNorm
.
variance_epsilon
=
roberta_layer
.
final_layer_norm
.
eps
#### end of layer
if
classification_head
:
model
.
classifier
.
dense
.
weight
=
roberta
.
model
.
classification_heads
[
'mnli'
].
dense
.
weight
model
.
classifier
.
dense
.
bias
=
roberta
.
model
.
classification_heads
[
'mnli'
].
dense
.
bias
model
.
classifier
.
out_proj
.
weight
=
roberta
.
model
.
classification_heads
[
'mnli'
].
out_proj
.
weight
model
.
classifier
.
out_proj
.
bias
=
roberta
.
model
.
classification_heads
[
'mnli'
].
out_proj
.
bias
else
:
# LM Head
model
.
lm_head
.
dense
.
weight
=
roberta
.
model
.
decoder
.
lm_head
.
dense
.
weight
model
.
lm_head
.
dense
.
bias
=
roberta
.
model
.
decoder
.
lm_head
.
dense
.
bias
model
.
lm_head
.
layer_norm
.
weight
=
roberta
.
model
.
decoder
.
lm_head
.
layer_norm
.
weight
model
.
lm_head
.
layer_norm
.
bias
=
roberta
.
model
.
decoder
.
lm_head
.
layer_norm
.
bias
model
.
lm_head
.
layer_norm
.
variance_epsilon
=
roberta
.
model
.
decoder
.
lm_head
.
layer_norm
.
eps
model
.
lm_head
.
decoder
.
weight
=
roberta
.
model
.
decoder
.
lm_head
.
weight
model
.
lm_head
.
bias
=
roberta
.
model
.
decoder
.
lm_head
.
bias
# Let's check that we get the same results.
input_ids
:
torch
.
Tensor
=
roberta
.
encode
(
SAMPLE_TEXT
).
unsqueeze
(
0
)
# batch of size 1
our_output
=
model
(
input_ids
)[
0
]
if
classification_head
:
their_output
=
roberta
.
model
.
classification_heads
[
'mnli'
](
roberta
.
extract_features
(
input_ids
))
else
:
their_output
=
roberta
.
model
(
input_ids
)[
0
]
print
(
our_output
.
shape
,
their_output
.
shape
)
success
=
torch
.
allclose
(
our_output
,
their_output
,
atol
=
1e-3
)
print
(
"Do both models output the same tensors?"
,
"🔥"
if
success
else
"💩"
)
if
not
success
:
raise
Exception
(
"Something went wRoNg"
)
print
(
f
"Saving model to
{
pytorch_dump_folder_path
}
"
)
model
.
save_pretrained
(
pytorch_dump_folder_path
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
## Required parameters
parser
.
add_argument
(
"--roberta_checkpoint_path"
,
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path the official PyTorch dump."
)
parser
.
add_argument
(
"--pytorch_dump_folder_path"
,
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path to the output PyTorch model."
)
parser
.
add_argument
(
"--classification_head"
,
action
=
"store_true"
,
help
=
"Whether to convert a final classification head."
)
args
=
parser
.
parse_args
()
convert_roberta_checkpoint_to_pytorch
(
args
.
roberta_checkpoint_path
,
args
.
pytorch_dump_folder_path
,
args
.
classification_head
)
pytorch_transformers/convert_tf_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
@@ -47,7 +47,7 @@ if __name__ == "__main__":
...
@@ -47,7 +47,7 @@ if __name__ == "__main__":
default
=
None
,
default
=
None
,
type
=
str
,
type
=
str
,
required
=
True
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--bert_config_file"
,
parser
.
add_argument
(
"--bert_config_file"
,
default
=
None
,
default
=
None
,
type
=
str
,
type
=
str
,
...
...
pytorch_transformers/convert_transfo_xl_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
@@ -24,11 +24,10 @@ from io import open
...
@@ -24,11 +24,10 @@ from io import open
import
torch
import
torch
import
pytorch_transformers.tokenization_transfo_xl
as
data_utils
import
pytorch_transformers.tokenization_transfo_xl
as
data_utils
from
pytorch_transformers.modeling_transfo_xl
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
from
pytorch_transformers
import
CONFIG_NAME
,
WEIGHTS_NAME
TransfoXLConfig
,
from
pytorch_transformers.modeling_transfo_xl
import
(
TransfoXLConfig
,
TransfoXLLMHeadModel
,
TransfoXLLMHeadModel
,
load_tf_weights_in_transfo_xl
)
load_tf_weights_in_transfo_xl
)
from
pytorch_transformers.tokenization_transfo_xl
import
(
CORPUS_NAME
,
VOCAB_FILES_NAMES
)
from
pytorch_transformers.tokenization_transfo_xl
import
(
CORPUS_NAME
,
VOCAB_FILES_NAMES
)
if
sys
.
version_info
[
0
]
==
2
:
if
sys
.
version_info
[
0
]
==
2
:
...
@@ -76,7 +75,7 @@ def convert_transfo_xl_checkpoint_to_pytorch(tf_checkpoint_path,
...
@@ -76,7 +75,7 @@ def convert_transfo_xl_checkpoint_to_pytorch(tf_checkpoint_path,
if
transfo_xl_config_file
==
""
:
if
transfo_xl_config_file
==
""
:
config
=
TransfoXLConfig
()
config
=
TransfoXLConfig
()
else
:
else
:
config
=
TransfoXLConfig
(
transfo_xl_config_file
)
config
=
TransfoXLConfig
.
from_json_file
(
transfo_xl_config_file
)
print
(
"Building PyTorch model from configuration: {}"
.
format
(
str
(
config
)))
print
(
"Building PyTorch model from configuration: {}"
.
format
(
str
(
config
)))
model
=
TransfoXLLMHeadModel
(
config
)
model
=
TransfoXLLMHeadModel
(
config
)
...
...
pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
@@ -36,7 +36,7 @@ def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_p
...
@@ -36,7 +36,7 @@ def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_p
model
=
chkpt
[
'model'
]
model
=
chkpt
[
'model'
]
config
=
chkpt
[
'params'
]
config
=
chkpt
[
'params'
]
config
=
dict
((
n
,
v
)
for
n
,
v
in
config
.
items
()
if
not
isinstance
(
v
,
(
torch
.
Tensor
,
numpy
.
ndarray
)))
config
=
dict
((
n
,
v
)
for
n
,
v
in
config
.
items
()
if
not
isinstance
(
v
,
(
torch
.
Float
Tensor
,
numpy
.
ndarray
)))
vocab
=
chkpt
[
'dico_word2id'
]
vocab
=
chkpt
[
'dico_word2id'
]
vocab
=
dict
((
s
+
'</w>'
if
s
.
find
(
'@@'
)
==
-
1
and
i
>
13
else
s
.
replace
(
'@@'
,
''
),
i
)
for
s
,
i
in
vocab
.
items
())
vocab
=
dict
((
s
+
'</w>'
if
s
.
find
(
'@@'
)
==
-
1
and
i
>
13
else
s
.
replace
(
'@@'
,
''
),
i
)
for
s
,
i
in
vocab
.
items
())
...
...
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
@@ -79,7 +79,7 @@ if __name__ == "__main__":
...
@@ -79,7 +79,7 @@ if __name__ == "__main__":
default
=
None
,
default
=
None
,
type
=
str
,
type
=
str
,
required
=
True
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--xlnet_config_file"
,
parser
.
add_argument
(
"--xlnet_config_file"
,
default
=
None
,
default
=
None
,
type
=
str
,
type
=
str
,
...
...
pytorch_transformers/file_utils.py
View file @
0a2fecdf
...
@@ -17,8 +17,9 @@ from hashlib import sha256
...
@@ -17,8 +17,9 @@ from hashlib import sha256
from
io
import
open
from
io
import
open
import
boto3
import
boto3
import
requests
from
botocore.config
import
Config
from
botocore.exceptions
import
ClientError
from
botocore.exceptions
import
ClientError
import
requests
from
tqdm
import
tqdm
from
tqdm
import
tqdm
try
:
try
:
...
@@ -38,10 +39,13 @@ except ImportError:
...
@@ -38,10 +39,13 @@ except ImportError:
try
:
try
:
from
pathlib
import
Path
from
pathlib
import
Path
PYTORCH_PRETRAINED_BERT_CACHE
=
Path
(
PYTORCH_PRETRAINED_BERT_CACHE
=
Path
(
os
.
getenv
(
'PYTORCH_PRETRAINED_BERT_CACHE'
,
default_cache_path
))
os
.
getenv
(
'PYTORCH_TRANSFORMERS_CACHE'
,
os
.
getenv
(
'PYTORCH_PRETRAINED_BERT_CACHE'
,
default_cache_path
))
)
except
(
AttributeError
,
ImportError
):
except
(
AttributeError
,
ImportError
):
PYTORCH_PRETRAINED_BERT_CACHE
=
os
.
getenv
(
'PYTORCH_PRETRAINED_BERT_CACHE'
,
PYTORCH_PRETRAINED_BERT_CACHE
=
os
.
getenv
(
'PYTORCH_TRANSFORMERS_CACHE'
,
default_cache_path
)
os
.
getenv
(
'PYTORCH_PRETRAINED_BERT_CACHE'
,
default_cache_path
))
PYTORCH_TRANSFORMERS_CACHE
=
PYTORCH_PRETRAINED_BERT_CACHE
# Kept for backward compatibility
logger
=
logging
.
getLogger
(
__name__
)
# pylint: disable=invalid-name
logger
=
logging
.
getLogger
(
__name__
)
# pylint: disable=invalid-name
...
@@ -70,7 +74,7 @@ def filename_to_url(filename, cache_dir=None):
...
@@ -70,7 +74,7 @@ def filename_to_url(filename, cache_dir=None):
Raise ``EnvironmentError`` if `filename` or its stored metadata do not exist.
Raise ``EnvironmentError`` if `filename` or its stored metadata do not exist.
"""
"""
if
cache_dir
is
None
:
if
cache_dir
is
None
:
cache_dir
=
PYTORCH_
PRETRAINED_B
ER
T
_CACHE
cache_dir
=
PYTORCH_
TRANSFORM
ER
S
_CACHE
if
sys
.
version_info
[
0
]
==
3
and
isinstance
(
cache_dir
,
Path
):
if
sys
.
version_info
[
0
]
==
3
and
isinstance
(
cache_dir
,
Path
):
cache_dir
=
str
(
cache_dir
)
cache_dir
=
str
(
cache_dir
)
...
@@ -90,15 +94,18 @@ def filename_to_url(filename, cache_dir=None):
...
@@ -90,15 +94,18 @@ def filename_to_url(filename, cache_dir=None):
return
url
,
etag
return
url
,
etag
def
cached_path
(
url_or_filename
,
cache_dir
=
None
):
def
cached_path
(
url_or_filename
,
cache_dir
=
None
,
force_download
=
False
,
proxies
=
None
):
"""
"""
Given something that might be a URL (or might be a local path),
Given something that might be a URL (or might be a local path),
determine which. If it's a URL, download the file and cache it, and
determine which. If it's a URL, download the file and cache it, and
return the path to the cached file. If it's already a local path,
return the path to the cached file. If it's already a local path,
make sure the file exists and then return the path.
make sure the file exists and then return the path.
Args:
cache_dir: specify a cache directory to save the file to (overwrite the default cache dir).
force_download: if True, re-dowload the file even if it's already cached in the cache dir.
"""
"""
if
cache_dir
is
None
:
if
cache_dir
is
None
:
cache_dir
=
PYTORCH_
PRETRAINED_B
ER
T
_CACHE
cache_dir
=
PYTORCH_
TRANSFORM
ER
S
_CACHE
if
sys
.
version_info
[
0
]
==
3
and
isinstance
(
url_or_filename
,
Path
):
if
sys
.
version_info
[
0
]
==
3
and
isinstance
(
url_or_filename
,
Path
):
url_or_filename
=
str
(
url_or_filename
)
url_or_filename
=
str
(
url_or_filename
)
if
sys
.
version_info
[
0
]
==
3
and
isinstance
(
cache_dir
,
Path
):
if
sys
.
version_info
[
0
]
==
3
and
isinstance
(
cache_dir
,
Path
):
...
@@ -108,7 +115,7 @@ def cached_path(url_or_filename, cache_dir=None):
...
@@ -108,7 +115,7 @@ def cached_path(url_or_filename, cache_dir=None):
if
parsed
.
scheme
in
(
'http'
,
'https'
,
's3'
):
if
parsed
.
scheme
in
(
'http'
,
'https'
,
's3'
):
# URL, so get it from the cache (downloading if necessary)
# URL, so get it from the cache (downloading if necessary)
return
get_from_cache
(
url_or_filename
,
cache_dir
)
return
get_from_cache
(
url_or_filename
,
cache_dir
=
cache_dir
,
force_download
=
force_download
,
proxies
=
proxies
)
elif
os
.
path
.
exists
(
url_or_filename
):
elif
os
.
path
.
exists
(
url_or_filename
):
# File, and it exists.
# File, and it exists.
return
url_or_filename
return
url_or_filename
...
@@ -153,24 +160,24 @@ def s3_request(func):
...
@@ -153,24 +160,24 @@ def s3_request(func):
@
s3_request
@
s3_request
def
s3_etag
(
url
):
def
s3_etag
(
url
,
proxies
=
None
):
"""Check ETag on S3 object."""
"""Check ETag on S3 object."""
s3_resource
=
boto3
.
resource
(
"s3"
)
s3_resource
=
boto3
.
resource
(
"s3"
,
config
=
Config
(
proxies
=
proxies
)
)
bucket_name
,
s3_path
=
split_s3_path
(
url
)
bucket_name
,
s3_path
=
split_s3_path
(
url
)
s3_object
=
s3_resource
.
Object
(
bucket_name
,
s3_path
)
s3_object
=
s3_resource
.
Object
(
bucket_name
,
s3_path
)
return
s3_object
.
e_tag
return
s3_object
.
e_tag
@
s3_request
@
s3_request
def
s3_get
(
url
,
temp_file
):
def
s3_get
(
url
,
temp_file
,
proxies
=
None
):
"""Pull a file directly from S3."""
"""Pull a file directly from S3."""
s3_resource
=
boto3
.
resource
(
"s3"
)
s3_resource
=
boto3
.
resource
(
"s3"
,
config
=
Config
(
proxies
=
proxies
)
)
bucket_name
,
s3_path
=
split_s3_path
(
url
)
bucket_name
,
s3_path
=
split_s3_path
(
url
)
s3_resource
.
Bucket
(
bucket_name
).
download_fileobj
(
s3_path
,
temp_file
)
s3_resource
.
Bucket
(
bucket_name
).
download_fileobj
(
s3_path
,
temp_file
)
def
http_get
(
url
,
temp_file
):
def
http_get
(
url
,
temp_file
,
proxies
=
None
):
req
=
requests
.
get
(
url
,
stream
=
True
)
req
=
requests
.
get
(
url
,
stream
=
True
,
proxies
=
proxies
)
content_length
=
req
.
headers
.
get
(
'Content-Length'
)
content_length
=
req
.
headers
.
get
(
'Content-Length'
)
total
=
int
(
content_length
)
if
content_length
is
not
None
else
None
total
=
int
(
content_length
)
if
content_length
is
not
None
else
None
progress
=
tqdm
(
unit
=
"B"
,
total
=
total
)
progress
=
tqdm
(
unit
=
"B"
,
total
=
total
)
...
@@ -181,13 +188,13 @@ def http_get(url, temp_file):
...
@@ -181,13 +188,13 @@ def http_get(url, temp_file):
progress
.
close
()
progress
.
close
()
def
get_from_cache
(
url
,
cache_dir
=
None
):
def
get_from_cache
(
url
,
cache_dir
=
None
,
force_download
=
False
,
proxies
=
None
):
"""
"""
Given a URL, look for the corresponding dataset in the local cache.
Given a URL, look for the corresponding dataset in the local cache.
If it's not there, download it. Then return the path to the cached file.
If it's not there, download it. Then return the path to the cached file.
"""
"""
if
cache_dir
is
None
:
if
cache_dir
is
None
:
cache_dir
=
PYTORCH_
PRETRAINED_B
ER
T
_CACHE
cache_dir
=
PYTORCH_
TRANSFORM
ER
S
_CACHE
if
sys
.
version_info
[
0
]
==
3
and
isinstance
(
cache_dir
,
Path
):
if
sys
.
version_info
[
0
]
==
3
and
isinstance
(
cache_dir
,
Path
):
cache_dir
=
str
(
cache_dir
)
cache_dir
=
str
(
cache_dir
)
if
sys
.
version_info
[
0
]
==
2
and
not
isinstance
(
cache_dir
,
str
):
if
sys
.
version_info
[
0
]
==
2
and
not
isinstance
(
cache_dir
,
str
):
...
@@ -198,10 +205,10 @@ def get_from_cache(url, cache_dir=None):
...
@@ -198,10 +205,10 @@ def get_from_cache(url, cache_dir=None):
# Get eTag to add to filename, if it exists.
# Get eTag to add to filename, if it exists.
if
url
.
startswith
(
"s3://"
):
if
url
.
startswith
(
"s3://"
):
etag
=
s3_etag
(
url
)
etag
=
s3_etag
(
url
,
proxies
=
proxies
)
else
:
else
:
try
:
try
:
response
=
requests
.
head
(
url
,
allow_redirects
=
True
)
response
=
requests
.
head
(
url
,
allow_redirects
=
True
,
proxies
=
proxies
)
if
response
.
status_code
!=
200
:
if
response
.
status_code
!=
200
:
etag
=
None
etag
=
None
else
:
else
:
...
@@ -224,17 +231,17 @@ def get_from_cache(url, cache_dir=None):
...
@@ -224,17 +231,17 @@ def get_from_cache(url, cache_dir=None):
if
matching_files
:
if
matching_files
:
cache_path
=
os
.
path
.
join
(
cache_dir
,
matching_files
[
-
1
])
cache_path
=
os
.
path
.
join
(
cache_dir
,
matching_files
[
-
1
])
if
not
os
.
path
.
exists
(
cache_path
):
if
not
os
.
path
.
exists
(
cache_path
)
or
force_download
:
# Download to temporary file, then copy to cache dir once finished.
# Download to temporary file, then copy to cache dir once finished.
# Otherwise you get corrupt cache entries if the download gets interrupted.
# Otherwise you get corrupt cache entries if the download gets interrupted.
with
tempfile
.
NamedTemporaryFile
()
as
temp_file
:
with
tempfile
.
NamedTemporaryFile
()
as
temp_file
:
logger
.
info
(
"%s not found in cache, downloading to %s"
,
url
,
temp_file
.
name
)
logger
.
info
(
"%s not found in cache
or force_download set to True
, downloading to %s"
,
url
,
temp_file
.
name
)
# GET file object
# GET file object
if
url
.
startswith
(
"s3://"
):
if
url
.
startswith
(
"s3://"
):
s3_get
(
url
,
temp_file
)
s3_get
(
url
,
temp_file
,
proxies
=
proxies
)
else
:
else
:
http_get
(
url
,
temp_file
)
http_get
(
url
,
temp_file
,
proxies
=
proxies
)
# we are copying the file before closing it, so flush to avoid truncation
# we are copying the file before closing it, so flush to avoid truncation
temp_file
.
flush
()
temp_file
.
flush
()
...
...
pytorch_transformers/modeling_auto.py
0 → 100644
View file @
0a2fecdf
# coding=utf-8
# Copyright 2018 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Auto Model class. """
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
logging
import
torch
import
torch.nn
as
nn
from
torch.nn
import
CrossEntropyLoss
,
MSELoss
from
torch.nn.parameter
import
Parameter
from
.modeling_bert
import
BertConfig
,
BertModel
from
.modeling_openai
import
OpenAIGPTConfig
,
OpenAIGPTModel
from
.modeling_gpt2
import
GPT2Config
,
GPT2Model
from
.modeling_transfo_xl
import
TransfoXLConfig
,
TransfoXLModel
from
.modeling_xlnet
import
XLNetConfig
,
XLNetModel
from
.modeling_xlm
import
XLMConfig
,
XLMModel
from
.modeling_roberta
import
RobertaConfig
,
RobertaModel
from
.modeling_distilbert
import
DistilBertConfig
,
DistilBertModel
from
.modeling_utils
import
PreTrainedModel
,
SequenceSummary
logger
=
logging
.
getLogger
(
__name__
)
class
AutoConfig
(
object
):
r
""":class:`~pytorch_transformers.AutoConfig` is a generic configuration class
that will be instantiated as one of the configuration classes of the library
when created with the `AutoConfig.from_pretrained(pretrained_model_name_or_path)`
class method.
The `from_pretrained()` method take care of returning the correct model class instance
using pattern matching on the `pretrained_model_name_or_path` string.
The base model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
- contains `bert`: BertConfig (Bert model)
- contains `openai-gpt`: OpenAIGPTConfig (OpenAI GPT model)
- contains `gpt2`: GPT2Config (OpenAI GPT-2 model)
- contains `transfo-xl`: TransfoXLConfig (Transformer-XL model)
- contains `xlnet`: XLNetConfig (XLNet model)
- contains `xlm`: XLMConfig (XLM model)
- contains `roberta`: RobertaConfig (RoBERTa model)
This class cannot be instantiated using `__init__()` (throw an error).
"""
def
__init__
(
self
):
raise
EnvironmentError
(
"AutoConfig is designed to be instantiated "
"using the `AutoConfig.from_pretrained(pretrained_model_name_or_path)` method."
)
@
classmethod
def
from_pretrained
(
cls
,
pretrained_model_name_or_path
,
**
kwargs
):
r
""" Instantiate a one of the configuration classes of the library
from a pre-trained model configuration.
The configuration class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
- contains `bert`: BertConfig (Bert model)
- contains `openai-gpt`: OpenAIGPTConfig (OpenAI GPT model)
- contains `gpt2`: GPT2Config (OpenAI GPT-2 model)
- contains `transfo-xl`: TransfoXLConfig (Transformer-XL model)
- contains `xlnet`: XLNetConfig (XLNet model)
- contains `xlm`: XLMConfig (XLM model)
- contains `roberta`: RobertaConfig (RoBERTa model)
Params:
**pretrained_model_name_or_path**: either:
- a string with the `shortcut name` of a pre-trained model configuration to load from cache
or download and cache if not already stored in cache (e.g. 'bert-base-uncased').
- a path to a `directory` containing a configuration file saved
using the `save_pretrained(save_directory)` method.
- a path or url to a saved configuration `file`.
**cache_dir**: (`optional`) string:
Path to a directory in which a downloaded pre-trained model
configuration should be cached if the standard cache should not be used.
**return_unused_kwargs**: (`optional`) bool:
- If False, then this function returns just the final configuration object.
- If True, then this functions returns a tuple `(config, unused_kwargs)` where `unused_kwargs`
is a dictionary consisting of the key/value pairs whose keys are not configuration attributes:
ie the part of kwargs which has not been used to update `config` and is otherwise ignored.
**kwargs**: (`optional`) dict:
Dictionary of key/value pairs with which to update the configuration object after loading.
- The values in kwargs of any keys which are configuration attributes will be used
to override the loaded values.
- Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
by the `return_unused_kwargs` keyword parameter.
Examples::
config = AutoConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache.
config = AutoConfig.from_pretrained('./test/bert_saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')`
config = AutoConfig.from_pretrained('./test/bert_saved_model/my_configuration.json')
config = AutoConfig.from_pretrained('bert-base-uncased', output_attention=True, foo=False)
assert config.output_attention == True
config, unused_kwargs = AutoConfig.from_pretrained('bert-base-uncased', output_attention=True,
foo=False, return_unused_kwargs=True)
assert config.output_attention == True
assert unused_kwargs == {'foo': False}
"""
if
'distilbert'
in
pretrained_model_name_or_path
:
return
DistilBertConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'roberta'
in
pretrained_model_name_or_path
:
return
RobertaConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'bert'
in
pretrained_model_name_or_path
:
return
BertConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'openai-gpt'
in
pretrained_model_name_or_path
:
return
OpenAIGPTConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'gpt2'
in
pretrained_model_name_or_path
:
return
GPT2Config
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'transfo-xl'
in
pretrained_model_name_or_path
:
return
TransfoXLConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'xlnet'
in
pretrained_model_name_or_path
:
return
XLNetConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'xlm'
in
pretrained_model_name_or_path
:
return
XLMConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
raise
ValueError
(
"Unrecognized model identifier in {}. Should contains one of "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'xlm', 'roberta'"
.
format
(
pretrained_model_name_or_path
))
class
AutoModel
(
object
):
r
"""
:class:`~pytorch_transformers.AutoModel` is a generic model class
that will be instantiated as one of the base model classes of the library
when created with the `AutoModel.from_pretrained(pretrained_model_name_or_path)`
class method.
The `from_pretrained()` method take care of returning the correct model class instance
using pattern matching on the `pretrained_model_name_or_path` string.
The base model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
- contains `bert`: BertModel (Bert model)
- contains `openai-gpt`: OpenAIGPTModel (OpenAI GPT model)
- contains `gpt2`: GPT2Model (OpenAI GPT-2 model)
- contains `transfo-xl`: TransfoXLModel (Transformer-XL model)
- contains `xlnet`: XLNetModel (XLNet model)
- contains `xlm`: XLMModel (XLM model)
- contains `roberta`: RobertaModel (RoBERTa model)
This class cannot be instantiated using `__init__()` (throw an error).
"""
def
__init__
(
self
):
raise
EnvironmentError
(
"AutoModel is designed to be instantiated "
"using the `AutoModel.from_pretrained(pretrained_model_name_or_path)` method."
)
@
classmethod
def
from_pretrained
(
cls
,
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
):
r
""" Instantiate a one of the base model classes of the library
from a pre-trained model configuration.
The base model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
- contains `bert`: BertModel (Bert model)
- contains `openai-gpt`: OpenAIGPTModel (OpenAI GPT model)
- contains `gpt2`: GPT2Model (OpenAI GPT-2 model)
- contains `transfo-xl`: TransfoXLModel (Transformer-XL model)
- contains `xlnet`: XLNetModel (XLNet model)
- contains `xlm`: XLMModel (XLM model)
- contains `roberta`: RobertaModel (RoBERTa model)
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
To train the model, you should first set it back in training mode with `model.train()`
Params:
**pretrained_model_name_or_path**: either:
- a string with the `shortcut name` of a pre-trained model to load from cache
or download and cache if not already stored in cache (e.g. 'bert-base-uncased').
- a path to a `directory` containing a configuration file saved
using the `save_pretrained(save_directory)` method.
- a path or url to a tensorflow index checkpoint `file` (e.g. `./tf_model/model.ckpt.index`).
In this case, ``from_tf`` should be set to True and a configuration object should be
provided as `config` argument. This loading option is slower than converting the TensorFlow
checkpoint in a PyTorch model using the provided conversion scripts and loading
the PyTorch model afterwards.
**model_args**: (`optional`) Sequence:
All remaning positional arguments will be passed to the underlying model's __init__ function
**config**: an optional configuration for the model to use instead of an automatically loaded configuation.
Configuration can be automatically loaded when:
- the model is a model provided by the library (loaded with a `shortcut name` of a pre-trained model), or
- the model was saved using the `save_pretrained(save_directory)` (loaded by suppling the save directory).
**state_dict**: an optional state dictionnary for the model to use instead of a state dictionary loaded
from saved weights file.
This option can be used if you want to create a model from a pretrained configuration but load your own weights.
In this case though, you should check if using `save_pretrained(dir)` and `from_pretrained(save_directory)` is not
a simpler option.
**cache_dir**: (`optional`) string:
Path to a directory in which a downloaded pre-trained model
configuration should be cached if the standard cache should not be used.
**output_loading_info**: (`optional`) boolean:
Set to ``True`` to also return a dictionnary containing missing keys, unexpected keys and error messages.
**kwargs**: (`optional`) dict:
Dictionary of key, values to update the configuration object after loading.
Can be used to override selected configuration parameters. E.g. ``output_attention=True``.
- If a configuration is provided with `config`, **kwargs will be directly passed
to the underlying model's __init__ method.
- If a configuration is not provided, **kwargs will be first passed to the pretrained
model configuration class loading function (`PretrainedConfig.from_pretrained`).
Each key of **kwargs that corresponds to a configuration attribute
will be used to override said attribute with the supplied **kwargs value.
Remaining keys that do not correspond to any configuration attribute will
be passed to the underlying model's __init__ function.
Examples::
model = AutoModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache.
model = AutoModel.from_pretrained('./test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = AutoModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading
assert model.config.output_attention == True
# Loading from a TF checkpoint file instead of a PyTorch model (slower)
config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
model = AutoModel.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
"""
if
'distilbert'
in
pretrained_model_name_or_path
:
return
DistilBertModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'roberta'
in
pretrained_model_name_or_path
:
return
RobertaModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'bert'
in
pretrained_model_name_or_path
:
return
BertModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'openai-gpt'
in
pretrained_model_name_or_path
:
return
OpenAIGPTModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'gpt2'
in
pretrained_model_name_or_path
:
return
GPT2Model
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'transfo-xl'
in
pretrained_model_name_or_path
:
return
TransfoXLModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'xlnet'
in
pretrained_model_name_or_path
:
return
XLNetModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'xlm'
in
pretrained_model_name_or_path
:
return
XLMModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
raise
ValueError
(
"Unrecognized model identifier in {}. Should contains one of "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'xlm', 'roberta'"
.
format
(
pretrained_model_name_or_path
))
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment