Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
0a2fecdf
Unverified
Commit
0a2fecdf
authored
Aug 30, 2019
by
Thomas Wolf
Committed by
GitHub
Aug 30, 2019
Browse files
Merge branch 'master' into master
parents
39eb31e1
e0caab0c
Changes
98
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
841 additions
and
382 deletions
+841
-382
examples/single_model_scripts/run_openai_gpt.py
examples/single_model_scripts/run_openai_gpt.py
+1
-1
examples/test_examples.py
examples/test_examples.py
+1
-1
examples/utils_glue.py
examples/utils_glue.py
+20
-9
hubconfs/bert_hubconf.py
hubconfs/bert_hubconf.py
+104
-104
hubconfs/gpt2_hubconf.py
hubconfs/gpt2_hubconf.py
+42
-42
hubconfs/gpt_hubconf.py
hubconfs/gpt_hubconf.py
+39
-39
hubconfs/transformer_xl_hubconf.py
hubconfs/transformer_xl_hubconf.py
+35
-35
hubconfs/xlm_hubconf.py
hubconfs/xlm_hubconf.py
+40
-40
hubconfs/xlnet_hubconf.1.py
hubconfs/xlnet_hubconf.1.py
+42
-42
pytorch_transformers/__init__.py
pytorch_transformers/__init__.py
+24
-13
pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py
pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py
+2
-2
pytorch_transformers/convert_openai_checkpoint_to_pytorch.py
pytorch_transformers/convert_openai_checkpoint_to_pytorch.py
+2
-2
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
+21
-21
pytorch_transformers/convert_roberta_checkpoint_to_pytorch.py
...rch_transformers/convert_roberta_checkpoint_to_pytorch.py
+181
-0
pytorch_transformers/convert_tf_checkpoint_to_pytorch.py
pytorch_transformers/convert_tf_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/convert_transfo_xl_checkpoint_to_pytorch.py
..._transformers/convert_transfo_xl_checkpoint_to_pytorch.py
+5
-6
pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py
pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/file_utils.py
pytorch_transformers/file_utils.py
+29
-22
pytorch_transformers/modeling_auto.py
pytorch_transformers/modeling_auto.py
+250
-0
No files found.
examples/single_model_scripts/run_openai_gpt.py
View file @
0a2fecdf
...
...
@@ -205,7 +205,7 @@ def main():
param_optimizer
=
list
(
model
.
named_parameters
())
no_decay
=
[
'bias'
,
'LayerNorm.bias'
,
'LayerNorm.weight'
]
optimizer_grouped_parameters
=
[
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
not
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.01
},
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
not
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
args
.
weight_decay
},
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
]
optimizer
=
AdamW
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
eps
=
args
.
adam_epsilon
)
...
...
examples/test_examples.py
View file @
0a2fecdf
...
...
@@ -81,7 +81,7 @@ class ExamplesTests(unittest.TestCase):
"--do_train"
,
"--do_eval"
,
"--version_2_with_negative"
,
"--learning_rate=
1
e-4"
,
"--learning_rate=
2
e-4"
,
"--per_gpu_train_batch_size=2"
,
"--per_gpu_eval_batch_size=1"
,
"--overwrite_output_dir"
,
...
...
examples/utils_glue.py
View file @
0a2fecdf
...
...
@@ -390,10 +390,16 @@ class WnliProcessor(DataProcessor):
def
convert_examples_to_features
(
examples
,
label_list
,
max_seq_length
,
tokenizer
,
output_mode
,
cls_token_at_end
=
False
,
pad_on_left
=
False
,
cls_token
=
'[CLS]'
,
sep_token
=
'[SEP]'
,
pad_token
=
0
,
sequence_a_segment_id
=
0
,
sequence_b_segment_id
=
1
,
cls_token_segment_id
=
1
,
pad_token_segment_id
=
0
,
cls_token_at_end
=
False
,
cls_token
=
'[CLS]'
,
cls_token_segment_id
=
1
,
sep_token
=
'[SEP]'
,
sep_token_extra
=
False
,
pad_on_left
=
False
,
pad_token
=
0
,
pad_token_segment_id
=
0
,
sequence_a_segment_id
=
0
,
sequence_b_segment_id
=
1
,
mask_padding_with_zero
=
True
):
""" Loads a data file into a list of `InputBatch`s
`cls_token_at_end` define the location of the CLS token:
...
...
@@ -416,12 +422,14 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
tokens_b
=
tokenizer
.
tokenize
(
example
.
text_b
)
# Modifies `tokens_a` and `tokens_b` in place so that the total
# length is less than the specified length.
# Account for [CLS], [SEP], [SEP] with "- 3"
_truncate_seq_pair
(
tokens_a
,
tokens_b
,
max_seq_length
-
3
)
# Account for [CLS], [SEP], [SEP] with "- 3". " -4" for RoBERTa.
special_tokens_count
=
4
if
sep_token_extra
else
3
_truncate_seq_pair
(
tokens_a
,
tokens_b
,
max_seq_length
-
special_tokens_count
)
else
:
# Account for [CLS] and [SEP] with "- 2"
if
len
(
tokens_a
)
>
max_seq_length
-
2
:
tokens_a
=
tokens_a
[:(
max_seq_length
-
2
)]
# Account for [CLS] and [SEP] with "- 2" and with "- 3" for RoBERTa.
special_tokens_count
=
3
if
sep_token_extra
else
2
if
len
(
tokens_a
)
>
max_seq_length
-
special_tokens_count
:
tokens_a
=
tokens_a
[:(
max_seq_length
-
special_tokens_count
)]
# The convention in BERT is:
# (a) For sequence pairs:
...
...
@@ -442,6 +450,9 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
# used as as the "sentence vector". Note that this only makes sense because
# the entire model is fine-tuned.
tokens
=
tokens_a
+
[
sep_token
]
if
sep_token_extra
:
# roberta uses an extra separator b/w pairs of sentences
tokens
+=
[
sep_token
]
segment_ids
=
[
sequence_a_segment_id
]
*
len
(
tokens
)
if
tokens_b
:
...
...
hubconfs/bert_hubconf.py
View file @
0a2fecdf
...
...
@@ -37,7 +37,7 @@ bert_docstring = """
checkpoint
cache_dir: an optional path to a folder in which the pre-trained models
will be cached.
state_dict: an optional state diction
n
ary
state_dict: an optional state dictionary
(collections.OrderedDict object) to use instead of Google
pre-trained models
*inputs, **kwargs: additional input for the specific Bert class
...
...
@@ -84,12 +84,12 @@ def bertTokenizer(*args, **kwargs):
Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"]
Example:
>>>
import torch
>>>
sentence = 'Hello, World!'
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
>>>
toks = tokenizer.tokenize(sentence)
import torch
sentence = 'Hello, World!'
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
toks = tokenizer.tokenize(sentence)
['Hello', '##,', 'World', '##!']
>>>
ids = tokenizer.convert_tokens_to_ids(toks)
ids = tokenizer.convert_tokens_to_ids(toks)
[8667, 28136, 1291, 28125]
"""
tokenizer
=
BertTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -105,20 +105,20 @@ def bertModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
encoded_layers, _ = model(tokens_tensor, segments_tensors)
"""
model
=
BertModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -134,20 +134,20 @@ def bertForNextSentencePrediction(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForNextSentencePrediction
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased')
model.eval()
# Predict the next sentence classification logits
>>>
with torch.no_grad():
with torch.no_grad():
next_sent_classif_logits = model(tokens_tensor, segments_tensors)
"""
model
=
BertForNextSentencePrediction
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -164,17 +164,17 @@ def bertForPreTraining(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForPreTraining
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased')
>>>
masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors)
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased')
masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors)
"""
model
=
BertForPreTraining
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -188,25 +188,25 @@ def bertForMaskedLM(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
masked_index = 8
>>>
tokenized_text[masked_index] = '[MASK]'
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
masked_index = 8
tokenized_text[masked_index] = '[MASK]'
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForMaskedLM
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased')
model.eval()
# Predict all tokens
>>>
with torch.no_grad():
with torch.no_grad():
predictions = model(tokens_tensor, segments_tensors)
>>>
predicted_index = torch.argmax(predictions[0, masked_index]).item()
>>>
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
predicted_index = torch.argmax(predictions[0, masked_index]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'henson'
"""
model
=
BertForMaskedLM
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -230,24 +230,24 @@ def bertForSequenceClassification(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForSequenceClassification
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2)
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2)
model.eval()
# Predict the sequence classification logits
>>>
with torch.no_grad():
with torch.no_grad():
seq_classif_logits = model(tokens_tensor, segments_tensors)
# Or get the sequence classification loss
>>>
labels = torch.tensor([1])
>>>
seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
labels = torch.tensor([1])
seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
model
=
BertForSequenceClassification
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -265,24 +265,24 @@ def bertForMultipleChoice(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0)
>>>
segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0)
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0)
segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0)
# Load bertForMultipleChoice
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2)
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2)
model.eval()
# Predict the multiple choice logits
>>>
with torch.no_grad():
with torch.no_grad():
multiple_choice_logits = model(tokens_tensor, segments_tensors)
# Or get the multiple choice loss
>>>
labels = torch.tensor([1])
>>>
multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
labels = torch.tensor([1])
multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
model
=
BertForMultipleChoice
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -298,25 +298,25 @@ def bertForQuestionAnswering(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForQuestionAnswering
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased')
model.eval()
# Predict the start and end positions logits
>>>
with torch.no_grad():
with torch.no_grad():
start_logits, end_logits = model(tokens_tensor, segments_tensors)
# Or get the total loss which is the sum of the CrossEntropy loss for the start and end token positions
>>>
start_positions, end_positions = torch.tensor([12]), torch.tensor([14])
start_positions, end_positions = torch.tensor([12]), torch.tensor([14])
# set model.train() before if training this loss
>>>
multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions)
multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions)
"""
model
=
BertForQuestionAnswering
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -337,24 +337,24 @@ def bertForTokenClassification(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForTokenClassification
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2)
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2)
model.eval()
# Predict the token classification logits
>>>
with torch.no_grad():
with torch.no_grad():
classif_logits = model(tokens_tensor, segments_tensors)
# Or get the token classification loss
>>>
labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]])
>>>
classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]])
classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
model
=
BertForTokenClassification
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
hubconfs/gpt2_hubconf.py
View file @
0a2fecdf
...
...
@@ -52,11 +52,11 @@ def gpt2Tokenizer(*args, **kwargs):
Default: None
Example:
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
>>>
text = "Who was Jim Henson ?"
>>>
indexed_tokens = tokenizer.encode(tokenized_text)
text = "Who was Jim Henson ?"
indexed_tokens = tokenizer.encode(tokenized_text)
"""
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
...
...
@@ -71,24 +71,24 @@ def gpt2Model(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2Model
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2')
model.eval()
# Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, past = model(tokens_tensor_1)
hidden_states_2, past = model(tokens_tensor_2, past=past)
"""
...
...
@@ -104,31 +104,31 @@ def gpt2LMHeadModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2LMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2')
model.eval()
# Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, past = model(tokens_tensor_1)
predictions_2, past = model(tokens_tensor_2, past=past)
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.decode([predicted_index])
>>>
assert predicted_token == ' who'
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_token = tokenizer.decode([predicted_index])
assert predicted_token == ' who'
"""
model
=
GPT2LMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -143,25 +143,25 @@ def gpt2DoubleHeadsModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>>
tokenized_text1 = tokenizer.tokenize(text1)
>>>
tokenized_text2 = tokenizer.tokenize(text2)
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
tokenized_text1 = tokenizer.tokenize(text1)
tokenized_text2 = tokenizer.tokenize(text2)
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load gpt2DoubleHeadsModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
lm_logits, multiple_choice_logits, presents = model(tokens_tensor, mc_token_ids)
"""
model
=
GPT2DoubleHeadsModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
hubconfs/gpt_hubconf.py
View file @
0a2fecdf
...
...
@@ -40,7 +40,7 @@ gpt_docstring = """
. a series of NumPy files containing OpenAI TensorFlow trained weights
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state diction
n
ary (collections.OrderedDict object)
state_dict: an optional state dictionary (collections.OrderedDict object)
to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific OpenAI-GPT class
"""
...
...
@@ -76,12 +76,12 @@ def openAIGPTTokenizer(*args, **kwargs):
Default: None
Example:
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
>>>
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
[763, 509, 4265, 2298, 945, 257, 4265, 2298, 945, 509, 246, 10148, 39041, 483]
"""
tokenizer
=
OpenAIGPTTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -97,21 +97,21 @@ def openAIGPTModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input
>>>
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
tokens_tensor = torch.tensor([indexed_tokens])
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
tokens_tensor = torch.tensor([indexed_tokens])
# Load openAIGPTModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states = model(tokens_tensor)
"""
model
=
OpenAIGPTModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -126,26 +126,26 @@ def openAIGPTLMHeadModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input
>>>
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
tokens_tensor = torch.tensor([indexed_tokens])
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
tokens_tensor = torch.tensor([indexed_tokens])
# Load openAIGPTLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
predictions = model(tokens_tensor)
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions[0, -1, :]).item()
>>>
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
predicted_index = torch.argmax(predictions[0, -1, :]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'.</w>'
"""
model
=
OpenAIGPTLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -161,25 +161,25 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>>
tokenized_text1 = tokenizer.tokenize(text1)
>>>
tokenized_text2 = tokenizer.tokenize(text2)
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
tokenized_text1 = tokenizer.tokenize(text1)
tokenized_text2 = tokenizer.tokenize(text2)
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load openAIGPTDoubleHeadsModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
lm_logits, multiple_choice_logits = model(tokens_tensor, mc_token_ids)
"""
model
=
OpenAIGPTDoubleHeadsModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
hubconfs/transformer_xl_hubconf.py
View file @
0a2fecdf
...
...
@@ -23,7 +23,7 @@ transformer_xl_docstring = """
. `model.chkpt` a TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state diction
n
ary (collections.OrderedDict object) to use instead of pre-trained models
state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific TransformerXL class
"""
...
...
@@ -45,12 +45,12 @@ def transformerXLTokenizer(*args, **kwargs):
* transfo-xl-wt103
Example:
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
>>>
text = "Who was Jim Henson ?"
>>>
tokenized_text = tokenizer.tokenize(tokenized_text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
text = "Who was Jim Henson ?"
tokenized_text = tokenizer.tokenize(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
"""
tokenizer
=
TransfoXLTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
...
...
@@ -63,26 +63,26 @@ def transformerXLModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
tokenized_text_1 = tokenizer.tokenize(text_1)
>>>
tokenized_text_2 = tokenizer.tokenize(text_2)
>>>
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>>
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
tokenized_text_1 = tokenizer.tokenize(text_1)
tokenized_text_2 = tokenizer.tokenize(text_2)
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103')
model.eval()
# Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, mems_1 = model(tokens_tensor_1)
hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
"""
...
...
@@ -98,33 +98,33 @@ def transformerXLLMHeadModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
tokenized_text_1 = tokenizer.tokenize(text_1)
>>>
tokenized_text_2 = tokenizer.tokenize(text_2)
>>>
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>>
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
tokenized_text_1 = tokenizer.tokenize(text_1)
tokenized_text_2 = tokenizer.tokenize(text_2)
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103')
model.eval()
# Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, mems_1 = model(tokens_tensor_1)
predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
>>>
assert predicted_token == 'who'
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
assert predicted_token == 'who'
"""
model
=
TransfoXLLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
hubconfs/xlm_hubconf.py
View file @
0a2fecdf
...
...
@@ -17,16 +17,16 @@ xlm_start_docstring = """
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
"""
# A lot of models share the same param doc. Use a decorator
...
...
@@ -76,11 +76,11 @@ def xlmTokenizer(*args, **kwargs):
Default: None
Example:
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
>>>
text = "Who was Jim Henson ?"
>>>
indexed_tokens = tokenizer.encode(tokenized_text)
text = "Who was Jim Henson ?"
indexed_tokens = tokenizer.encode(tokenized_text)
"""
tokenizer
=
XLMTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
...
...
@@ -91,11 +91,11 @@ def xlmTokenizer(*args, **kwargs):
def
xlmModel
(
*
args
,
**
kwargs
):
"""
# Load xlmModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_2, mems = model(tokens_tensor_2, past=mems)
"""
...
...
@@ -108,26 +108,26 @@ def xlmModel(*args, **kwargs):
def
xlmLMHeadModel
(
*
args
,
**
kwargs
):
"""
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, mems = model(tokens_tensor_1)
predictions_2, mems = model(tokens_tensor_2, mems=mems)
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.decode([predicted_index])
>>>
assert predicted_token == ' who'
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_token = tokenizer.decode([predicted_index])
assert predicted_token == ' who'
"""
model
=
XLMWithLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -142,25 +142,25 @@ def xlmLMHeadModel(*args, **kwargs):
# Example:
# # Load the tokenizer
#
>>>
import torch
#
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048')
# import torch
# tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048')
# # Prepare tokenized input
#
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
#
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
#
>>>
tokenized_text1 = tokenizer.tokenize(text1)
#
>>>
tokenized_text2 = tokenizer.tokenize(text2)
#
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
#
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
#
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
#
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
# text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
# tokenized_text1 = tokenizer.tokenize(text1)
# tokenized_text2 = tokenizer.tokenize(text2)
# indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
# indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
# tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
# mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# # Load xlnetForSequenceClassification
#
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048')
#
>>>
model.eval()
# model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048')
# model.eval()
# # Predict sequence classes logits
#
>>>
with torch.no_grad():
# with torch.no_grad():
# lm_logits, mems = model(tokens_tensor)
# """
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
...
...
hubconfs/xlnet_hubconf.1.py
View file @
0a2fecdf
...
...
@@ -53,11 +53,11 @@ def xlnetTokenizer(*args, **kwargs):
Default: None
Example:
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
>>>
text = "Who was Jim Henson ?"
>>>
indexed_tokens = tokenizer.encode(tokenized_text)
text = "Who was Jim Henson ?"
indexed_tokens = tokenizer.encode(tokenized_text)
"""
tokenizer
=
XLNetTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
...
...
@@ -72,23 +72,23 @@ def xlnetModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_2, mems = model(tokens_tensor_2, past=mems)
"""
...
...
@@ -106,30 +106,30 @@ def xlnetLMHeadModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, mems = model(tokens_tensor_1)
predictions_2, mems = model(tokens_tensor_2, mems=mems)
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.decode([predicted_index])
>>>
assert predicted_token == ' who'
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_token = tokenizer.decode([predicted_index])
assert predicted_token == ' who'
"""
model
=
XLNetLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -144,25 +144,25 @@ def xlnetLMHeadModel(*args, **kwargs):
# Example:
# # Load the tokenizer
#
>>>
import torch
#
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# import torch
# tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# # Prepare tokenized input
#
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
#
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
#
>>>
tokenized_text1 = tokenizer.tokenize(text1)
#
>>>
tokenized_text2 = tokenizer.tokenize(text2)
#
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
#
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
#
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
#
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
# text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
# tokenized_text1 = tokenizer.tokenize(text1)
# tokenized_text2 = tokenizer.tokenize(text2)
# indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
# indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
# tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
# mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# # Load xlnetForSequenceClassification
#
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased')
#
>>>
model.eval()
# model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased')
# model.eval()
# # Predict sequence classes logits
#
>>>
with torch.no_grad():
# with torch.no_grad():
# lm_logits, mems = model(tokens_tensor)
# """
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
...
...
pytorch_transformers/__init__.py
View file @
0a2fecdf
__version__
=
"1.0.0"
__version__
=
"1.1.0"
from
.tokenization_auto
import
AutoTokenizer
from
.tokenization_bert
import
BertTokenizer
,
BasicTokenizer
,
WordpieceTokenizer
from
.tokenization_openai
import
OpenAIGPTTokenizer
from
.tokenization_transfo_xl
import
(
TransfoXLTokenizer
,
TransfoXLCorpus
)
from
.tokenization_gpt2
import
GPT2Tokenizer
from
.tokenization_xlnet
import
XLNetTokenizer
,
SPIECE_UNDERLINE
from
.tokenization_xlm
import
XLMTokenizer
from
.tokenization_utils
import
(
PreTrainedTokenizer
,
clean_up_tokenization
)
from
.tokenization_roberta
import
RobertaTokenizer
from
.tokenization_distilbert
import
DistilBertTokenizer
from
.modeling_bert
import
(
BertConfig
,
BertModel
,
BertForPreTraining
,
BertForMaskedLM
,
BertForNextSentencePrediction
,
BertForSequenceClassification
,
BertForMultipleChoice
,
BertForTokenClassification
,
BertForQuestionAnswering
,
load_tf_weights_in_bert
,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
)
from
.modeling_openai
import
(
OpenAIGPTConfig
,
OpenAIGPTModel
,
from
.tokenization_utils
import
(
PreTrainedTokenizer
)
from
.modeling_auto
import
(
AutoConfig
,
AutoModel
)
from
.modeling_bert
import
(
BertConfig
,
BertPreTrainedModel
,
BertModel
,
BertForPreTraining
,
BertForMaskedLM
,
BertForNextSentencePrediction
,
BertForSequenceClassification
,
BertForMultipleChoice
,
BertForTokenClassification
,
BertForQuestionAnswering
,
load_tf_weights_in_bert
,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
)
from
.modeling_openai
import
(
OpenAIGPTConfig
,
OpenAIGPTPreTrainedModel
,
OpenAIGPTModel
,
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
,
load_tf_weights_in_openai_gpt
,
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_transfo_xl
import
(
TransfoXLConfig
,
TransfoXLModel
,
TransfoXLLMHeadModel
,
from
.modeling_transfo_xl
import
(
TransfoXLConfig
,
TransfoXLPreTrainedModel
,
TransfoXLModel
,
TransfoXLLMHeadModel
,
load_tf_weights_in_transfo_xl
,
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
,
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_gpt2
import
(
GPT2Config
,
GPT2Model
,
from
.modeling_gpt2
import
(
GPT2Config
,
GPT2PreTrainedModel
,
GPT2Model
,
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
,
load_tf_weights_in_gpt2
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
)
...
...
@@ -29,14 +35,19 @@ from .modeling_xlnet import (XLNetConfig,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
,
load_tf_weights_in_xlnet
,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_xlm
import
(
XLMConfig
,
XLMModel
,
from
.modeling_xlm
import
(
XLMConfig
,
XLMPreTrainedModel
,
XLMModel
,
XLMWithLMHeadModel
,
XLMForSequenceClassification
,
XLMForQuestionAnswering
,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_roberta
import
(
RobertaConfig
,
RobertaForMaskedLM
,
RobertaModel
,
RobertaForSequenceClassification
,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
,
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_distilbert
import
(
DistilBertConfig
,
DistilBertForMaskedLM
,
DistilBertModel
,
DistilBertForSequenceClassification
,
DistilBertForQuestionAnswering
,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_utils
import
(
WEIGHTS_NAME
,
CONFIG_NAME
,
TF_WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_layer
,
Conv1D
)
from
.optimization
import
(
AdamW
,
ConstantLRSchedule
,
WarmupConstantSchedule
,
WarmupCosineSchedule
,
WarmupCosineWithHardRestartsSchedule
,
WarmupLinearSchedule
)
from
.file_utils
import
(
PYTORCH_PRETRAINED_BERT_CACHE
,
cached_path
)
from
.file_utils
import
(
PYTORCH_TRANSFORMERS_CACHE
,
PYTORCH_PRETRAINED_BERT_CACHE
,
cached_path
)
pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
...
@@ -35,7 +35,7 @@ def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config_file, p
if
gpt2_config_file
==
""
:
config
=
GPT2Config
()
else
:
config
=
GPT2Config
(
gpt2_config_file
)
config
=
GPT2Config
.
from_json_file
(
gpt2_config_file
)
model
=
GPT2Model
(
config
)
# Load weights from numpy
...
...
@@ -58,7 +58,7 @@ if __name__ == "__main__":
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--pytorch_dump_folder_path"
,
default
=
None
,
type
=
str
,
...
...
pytorch_transformers/convert_openai_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
...
@@ -35,7 +35,7 @@ def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, openai_c
if
openai_config_file
==
""
:
config
=
OpenAIGPTConfig
()
else
:
config
=
OpenAIGPTConfig
(
openai_config_file
)
config
=
OpenAIGPTConfig
.
from_json_file
(
openai_config_file
)
model
=
OpenAIGPTModel
(
config
)
# Load weights from numpy
...
...
@@ -58,7 +58,7 @@ if __name__ == "__main__":
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--pytorch_dump_folder_path"
,
default
=
None
,
type
=
str
,
...
...
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
View file @
0a2fecdf
This diff is collapsed.
Click to expand it.
pytorch_transformers/convert_roberta_checkpoint_to_pytorch.py
0 → 100644
View file @
0a2fecdf
This diff is collapsed.
Click to expand it.
pytorch_transformers/convert_tf_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
...
@@ -47,7 +47,7 @@ if __name__ == "__main__":
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--bert_config_file"
,
default
=
None
,
type
=
str
,
...
...
pytorch_transformers/convert_transfo_xl_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
...
@@ -24,11 +24,10 @@ from io import open
import
torch
import
pytorch_transformers.tokenization_transfo_xl
as
data_utils
from
pytorch_transformers.modeling_transfo_xl
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
TransfoXLConfig
,
TransfoXLLMHeadModel
,
load_tf_weights_in_transfo_xl
)
from
pytorch_transformers
import
CONFIG_NAME
,
WEIGHTS_NAME
from
pytorch_transformers.modeling_transfo_xl
import
(
TransfoXLConfig
,
TransfoXLLMHeadModel
,
load_tf_weights_in_transfo_xl
)
from
pytorch_transformers.tokenization_transfo_xl
import
(
CORPUS_NAME
,
VOCAB_FILES_NAMES
)
if
sys
.
version_info
[
0
]
==
2
:
...
...
@@ -76,7 +75,7 @@ def convert_transfo_xl_checkpoint_to_pytorch(tf_checkpoint_path,
if
transfo_xl_config_file
==
""
:
config
=
TransfoXLConfig
()
else
:
config
=
TransfoXLConfig
(
transfo_xl_config_file
)
config
=
TransfoXLConfig
.
from_json_file
(
transfo_xl_config_file
)
print
(
"Building PyTorch model from configuration: {}"
.
format
(
str
(
config
)))
model
=
TransfoXLLMHeadModel
(
config
)
...
...
pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
...
@@ -36,7 +36,7 @@ def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_p
model
=
chkpt
[
'model'
]
config
=
chkpt
[
'params'
]
config
=
dict
((
n
,
v
)
for
n
,
v
in
config
.
items
()
if
not
isinstance
(
v
,
(
torch
.
Tensor
,
numpy
.
ndarray
)))
config
=
dict
((
n
,
v
)
for
n
,
v
in
config
.
items
()
if
not
isinstance
(
v
,
(
torch
.
Float
Tensor
,
numpy
.
ndarray
)))
vocab
=
chkpt
[
'dico_word2id'
]
vocab
=
dict
((
s
+
'</w>'
if
s
.
find
(
'@@'
)
==
-
1
and
i
>
13
else
s
.
replace
(
'@@'
,
''
),
i
)
for
s
,
i
in
vocab
.
items
())
...
...
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
View file @
0a2fecdf
This diff is collapsed.
Click to expand it.
pytorch_transformers/file_utils.py
View file @
0a2fecdf
This diff is collapsed.
Click to expand it.
pytorch_transformers/modeling_auto.py
0 → 100644
View file @
0a2fecdf
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment