Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
0a2fecdf
Unverified
Commit
0a2fecdf
authored
Aug 30, 2019
by
Thomas Wolf
Committed by
GitHub
Aug 30, 2019
Browse files
Merge branch 'master' into master
parents
39eb31e1
e0caab0c
Changes
98
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
841 additions
and
382 deletions
+841
-382
examples/single_model_scripts/run_openai_gpt.py
examples/single_model_scripts/run_openai_gpt.py
+1
-1
examples/test_examples.py
examples/test_examples.py
+1
-1
examples/utils_glue.py
examples/utils_glue.py
+20
-9
hubconfs/bert_hubconf.py
hubconfs/bert_hubconf.py
+104
-104
hubconfs/gpt2_hubconf.py
hubconfs/gpt2_hubconf.py
+42
-42
hubconfs/gpt_hubconf.py
hubconfs/gpt_hubconf.py
+39
-39
hubconfs/transformer_xl_hubconf.py
hubconfs/transformer_xl_hubconf.py
+35
-35
hubconfs/xlm_hubconf.py
hubconfs/xlm_hubconf.py
+40
-40
hubconfs/xlnet_hubconf.1.py
hubconfs/xlnet_hubconf.1.py
+42
-42
pytorch_transformers/__init__.py
pytorch_transformers/__init__.py
+24
-13
pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py
pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py
+2
-2
pytorch_transformers/convert_openai_checkpoint_to_pytorch.py
pytorch_transformers/convert_openai_checkpoint_to_pytorch.py
+2
-2
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
+21
-21
pytorch_transformers/convert_roberta_checkpoint_to_pytorch.py
...rch_transformers/convert_roberta_checkpoint_to_pytorch.py
+181
-0
pytorch_transformers/convert_tf_checkpoint_to_pytorch.py
pytorch_transformers/convert_tf_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/convert_transfo_xl_checkpoint_to_pytorch.py
..._transformers/convert_transfo_xl_checkpoint_to_pytorch.py
+5
-6
pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py
pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/file_utils.py
pytorch_transformers/file_utils.py
+29
-22
pytorch_transformers/modeling_auto.py
pytorch_transformers/modeling_auto.py
+250
-0
No files found.
examples/single_model_scripts/run_openai_gpt.py
View file @
0a2fecdf
...
@@ -205,7 +205,7 @@ def main():
...
@@ -205,7 +205,7 @@ def main():
param_optimizer
=
list
(
model
.
named_parameters
())
param_optimizer
=
list
(
model
.
named_parameters
())
no_decay
=
[
'bias'
,
'LayerNorm.bias'
,
'LayerNorm.weight'
]
no_decay
=
[
'bias'
,
'LayerNorm.bias'
,
'LayerNorm.weight'
]
optimizer_grouped_parameters
=
[
optimizer_grouped_parameters
=
[
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
not
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.01
},
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
not
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
args
.
weight_decay
},
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
]
]
optimizer
=
AdamW
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
eps
=
args
.
adam_epsilon
)
optimizer
=
AdamW
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
eps
=
args
.
adam_epsilon
)
...
...
examples/test_examples.py
View file @
0a2fecdf
...
@@ -81,7 +81,7 @@ class ExamplesTests(unittest.TestCase):
...
@@ -81,7 +81,7 @@ class ExamplesTests(unittest.TestCase):
"--do_train"
,
"--do_train"
,
"--do_eval"
,
"--do_eval"
,
"--version_2_with_negative"
,
"--version_2_with_negative"
,
"--learning_rate=
1
e-4"
,
"--learning_rate=
2
e-4"
,
"--per_gpu_train_batch_size=2"
,
"--per_gpu_train_batch_size=2"
,
"--per_gpu_eval_batch_size=1"
,
"--per_gpu_eval_batch_size=1"
,
"--overwrite_output_dir"
,
"--overwrite_output_dir"
,
...
...
examples/utils_glue.py
View file @
0a2fecdf
...
@@ -390,10 +390,16 @@ class WnliProcessor(DataProcessor):
...
@@ -390,10 +390,16 @@ class WnliProcessor(DataProcessor):
def
convert_examples_to_features
(
examples
,
label_list
,
max_seq_length
,
def
convert_examples_to_features
(
examples
,
label_list
,
max_seq_length
,
tokenizer
,
output_mode
,
tokenizer
,
output_mode
,
cls_token_at_end
=
False
,
pad_on_left
=
False
,
cls_token_at_end
=
False
,
cls_token
=
'[CLS]'
,
sep_token
=
'[SEP]'
,
pad_token
=
0
,
cls_token
=
'[CLS]'
,
sequence_a_segment_id
=
0
,
sequence_b_segment_id
=
1
,
cls_token_segment_id
=
1
,
cls_token_segment_id
=
1
,
pad_token_segment_id
=
0
,
sep_token
=
'[SEP]'
,
sep_token_extra
=
False
,
pad_on_left
=
False
,
pad_token
=
0
,
pad_token_segment_id
=
0
,
sequence_a_segment_id
=
0
,
sequence_b_segment_id
=
1
,
mask_padding_with_zero
=
True
):
mask_padding_with_zero
=
True
):
""" Loads a data file into a list of `InputBatch`s
""" Loads a data file into a list of `InputBatch`s
`cls_token_at_end` define the location of the CLS token:
`cls_token_at_end` define the location of the CLS token:
...
@@ -416,12 +422,14 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
...
@@ -416,12 +422,14 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
tokens_b
=
tokenizer
.
tokenize
(
example
.
text_b
)
tokens_b
=
tokenizer
.
tokenize
(
example
.
text_b
)
# Modifies `tokens_a` and `tokens_b` in place so that the total
# Modifies `tokens_a` and `tokens_b` in place so that the total
# length is less than the specified length.
# length is less than the specified length.
# Account for [CLS], [SEP], [SEP] with "- 3"
# Account for [CLS], [SEP], [SEP] with "- 3". " -4" for RoBERTa.
_truncate_seq_pair
(
tokens_a
,
tokens_b
,
max_seq_length
-
3
)
special_tokens_count
=
4
if
sep_token_extra
else
3
_truncate_seq_pair
(
tokens_a
,
tokens_b
,
max_seq_length
-
special_tokens_count
)
else
:
else
:
# Account for [CLS] and [SEP] with "- 2"
# Account for [CLS] and [SEP] with "- 2" and with "- 3" for RoBERTa.
if
len
(
tokens_a
)
>
max_seq_length
-
2
:
special_tokens_count
=
3
if
sep_token_extra
else
2
tokens_a
=
tokens_a
[:(
max_seq_length
-
2
)]
if
len
(
tokens_a
)
>
max_seq_length
-
special_tokens_count
:
tokens_a
=
tokens_a
[:(
max_seq_length
-
special_tokens_count
)]
# The convention in BERT is:
# The convention in BERT is:
# (a) For sequence pairs:
# (a) For sequence pairs:
...
@@ -442,6 +450,9 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
...
@@ -442,6 +450,9 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
# used as as the "sentence vector". Note that this only makes sense because
# used as as the "sentence vector". Note that this only makes sense because
# the entire model is fine-tuned.
# the entire model is fine-tuned.
tokens
=
tokens_a
+
[
sep_token
]
tokens
=
tokens_a
+
[
sep_token
]
if
sep_token_extra
:
# roberta uses an extra separator b/w pairs of sentences
tokens
+=
[
sep_token
]
segment_ids
=
[
sequence_a_segment_id
]
*
len
(
tokens
)
segment_ids
=
[
sequence_a_segment_id
]
*
len
(
tokens
)
if
tokens_b
:
if
tokens_b
:
...
...
hubconfs/bert_hubconf.py
View file @
0a2fecdf
...
@@ -37,7 +37,7 @@ bert_docstring = """
...
@@ -37,7 +37,7 @@ bert_docstring = """
checkpoint
checkpoint
cache_dir: an optional path to a folder in which the pre-trained models
cache_dir: an optional path to a folder in which the pre-trained models
will be cached.
will be cached.
state_dict: an optional state diction
n
ary
state_dict: an optional state dictionary
(collections.OrderedDict object) to use instead of Google
(collections.OrderedDict object) to use instead of Google
pre-trained models
pre-trained models
*inputs, **kwargs: additional input for the specific Bert class
*inputs, **kwargs: additional input for the specific Bert class
...
@@ -84,12 +84,12 @@ def bertTokenizer(*args, **kwargs):
...
@@ -84,12 +84,12 @@ def bertTokenizer(*args, **kwargs):
Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"]
Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"]
Example:
Example:
>>>
import torch
import torch
>>>
sentence = 'Hello, World!'
sentence = 'Hello, World!'
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
>>>
toks = tokenizer.tokenize(sentence)
toks = tokenizer.tokenize(sentence)
['Hello', '##,', 'World', '##!']
['Hello', '##,', 'World', '##!']
>>>
ids = tokenizer.convert_tokens_to_ids(toks)
ids = tokenizer.convert_tokens_to_ids(toks)
[8667, 28136, 1291, 28125]
[8667, 28136, 1291, 28125]
"""
"""
tokenizer
=
BertTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
tokenizer
=
BertTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
...
@@ -105,20 +105,20 @@ def bertModel(*args, **kwargs):
...
@@ -105,20 +105,20 @@ def bertModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
segments_tensors = torch.tensor([segments_ids])
# Load bertModel
# Load bertModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased')
model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
encoded_layers, _ = model(tokens_tensor, segments_tensors)
encoded_layers, _ = model(tokens_tensor, segments_tensors)
"""
"""
model
=
BertModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
BertModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
@@ -134,20 +134,20 @@ def bertForNextSentencePrediction(*args, **kwargs):
...
@@ -134,20 +134,20 @@ def bertForNextSentencePrediction(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
segments_tensors = torch.tensor([segments_ids])
# Load bertForNextSentencePrediction
# Load bertForNextSentencePrediction
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased')
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased')
>>>
model.eval()
model.eval()
# Predict the next sentence classification logits
# Predict the next sentence classification logits
>>>
with torch.no_grad():
with torch.no_grad():
next_sent_classif_logits = model(tokens_tensor, segments_tensors)
next_sent_classif_logits = model(tokens_tensor, segments_tensors)
"""
"""
model
=
BertForNextSentencePrediction
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
BertForNextSentencePrediction
.
from_pretrained
(
*
args
,
**
kwargs
)
...
@@ -164,17 +164,17 @@ def bertForPreTraining(*args, **kwargs):
...
@@ -164,17 +164,17 @@ def bertForPreTraining(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
segments_tensors = torch.tensor([segments_ids])
# Load bertForPreTraining
# Load bertForPreTraining
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased')
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased')
>>>
masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors)
masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors)
"""
"""
model
=
BertForPreTraining
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
BertForPreTraining
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
...
@@ -188,25 +188,25 @@ def bertForMaskedLM(*args, **kwargs):
...
@@ -188,25 +188,25 @@ def bertForMaskedLM(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
masked_index = 8
masked_index = 8
>>>
tokenized_text[masked_index] = '[MASK]'
tokenized_text[masked_index] = '[MASK]'
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
segments_tensors = torch.tensor([segments_ids])
# Load bertForMaskedLM
# Load bertForMaskedLM
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased')
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased')
>>>
model.eval()
model.eval()
# Predict all tokens
# Predict all tokens
>>>
with torch.no_grad():
with torch.no_grad():
predictions = model(tokens_tensor, segments_tensors)
predictions = model(tokens_tensor, segments_tensors)
>>>
predicted_index = torch.argmax(predictions[0, masked_index]).item()
predicted_index = torch.argmax(predictions[0, masked_index]).item()
>>>
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'henson'
'henson'
"""
"""
model
=
BertForMaskedLM
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
BertForMaskedLM
.
from_pretrained
(
*
args
,
**
kwargs
)
...
@@ -230,24 +230,24 @@ def bertForSequenceClassification(*args, **kwargs):
...
@@ -230,24 +230,24 @@ def bertForSequenceClassification(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
segments_tensors = torch.tensor([segments_ids])
# Load bertForSequenceClassification
# Load bertForSequenceClassification
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2)
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2)
>>>
model.eval()
model.eval()
# Predict the sequence classification logits
# Predict the sequence classification logits
>>>
with torch.no_grad():
with torch.no_grad():
seq_classif_logits = model(tokens_tensor, segments_tensors)
seq_classif_logits = model(tokens_tensor, segments_tensors)
# Or get the sequence classification loss
# Or get the sequence classification loss
>>>
labels = torch.tensor([1])
labels = torch.tensor([1])
>>>
seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
"""
model
=
BertForSequenceClassification
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
BertForSequenceClassification
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
...
@@ -265,24 +265,24 @@ def bertForMultipleChoice(*args, **kwargs):
...
@@ -265,24 +265,24 @@ def bertForMultipleChoice(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0)
tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0)
>>>
segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0)
segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0)
# Load bertForMultipleChoice
# Load bertForMultipleChoice
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2)
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2)
>>>
model.eval()
model.eval()
# Predict the multiple choice logits
# Predict the multiple choice logits
>>>
with torch.no_grad():
with torch.no_grad():
multiple_choice_logits = model(tokens_tensor, segments_tensors)
multiple_choice_logits = model(tokens_tensor, segments_tensors)
# Or get the multiple choice loss
# Or get the multiple choice loss
>>>
labels = torch.tensor([1])
labels = torch.tensor([1])
>>>
multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
"""
model
=
BertForMultipleChoice
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
BertForMultipleChoice
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
...
@@ -298,25 +298,25 @@ def bertForQuestionAnswering(*args, **kwargs):
...
@@ -298,25 +298,25 @@ def bertForQuestionAnswering(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
segments_tensors = torch.tensor([segments_ids])
# Load bertForQuestionAnswering
# Load bertForQuestionAnswering
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased')
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased')
>>>
model.eval()
model.eval()
# Predict the start and end positions logits
# Predict the start and end positions logits
>>>
with torch.no_grad():
with torch.no_grad():
start_logits, end_logits = model(tokens_tensor, segments_tensors)
start_logits, end_logits = model(tokens_tensor, segments_tensors)
# Or get the total loss which is the sum of the CrossEntropy loss for the start and end token positions
# Or get the total loss which is the sum of the CrossEntropy loss for the start and end token positions
>>>
start_positions, end_positions = torch.tensor([12]), torch.tensor([14])
start_positions, end_positions = torch.tensor([12]), torch.tensor([14])
# set model.train() before if training this loss
# set model.train() before if training this loss
>>>
multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions)
multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions)
"""
"""
model
=
BertForQuestionAnswering
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
BertForQuestionAnswering
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
...
@@ -337,24 +337,24 @@ def bertForTokenClassification(*args, **kwargs):
...
@@ -337,24 +337,24 @@ def bertForTokenClassification(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
segments_tensors = torch.tensor([segments_ids])
# Load bertForTokenClassification
# Load bertForTokenClassification
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2)
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2)
>>>
model.eval()
model.eval()
# Predict the token classification logits
# Predict the token classification logits
>>>
with torch.no_grad():
with torch.no_grad():
classif_logits = model(tokens_tensor, segments_tensors)
classif_logits = model(tokens_tensor, segments_tensors)
# Or get the token classification loss
# Or get the token classification loss
>>>
labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]])
labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]])
>>>
classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
"""
model
=
BertForTokenClassification
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
BertForTokenClassification
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
hubconfs/gpt2_hubconf.py
View file @
0a2fecdf
...
@@ -52,11 +52,11 @@ def gpt2Tokenizer(*args, **kwargs):
...
@@ -52,11 +52,11 @@ def gpt2Tokenizer(*args, **kwargs):
Default: None
Default: None
Example:
Example:
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
>>>
text = "Who was Jim Henson ?"
text = "Who was Jim Henson ?"
>>>
indexed_tokens = tokenizer.encode(tokenized_text)
indexed_tokens = tokenizer.encode(tokenized_text)
"""
"""
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
return
tokenizer
...
@@ -71,24 +71,24 @@ def gpt2Model(*args, **kwargs):
...
@@ -71,24 +71,24 @@ def gpt2Model(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2Model
# Load gpt2Model
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2')
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions
# past can be used to reuse precomputed hidden state in a subsequent predictions
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, past = model(tokens_tensor_1)
hidden_states_1, past = model(tokens_tensor_1)
hidden_states_2, past = model(tokens_tensor_2, past=past)
hidden_states_2, past = model(tokens_tensor_2, past=past)
"""
"""
...
@@ -104,31 +104,31 @@ def gpt2LMHeadModel(*args, **kwargs):
...
@@ -104,31 +104,31 @@ def gpt2LMHeadModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2LMHeadModel
# Load gpt2LMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2')
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions
# past can be used to reuse precomputed hidden state in a subsequent predictions
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, past = model(tokens_tensor_1)
predictions_1, past = model(tokens_tensor_1)
predictions_2, past = model(tokens_tensor_2, past=past)
predictions_2, past = model(tokens_tensor_2, past=past)
# Get the predicted last token
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.decode([predicted_index])
predicted_token = tokenizer.decode([predicted_index])
>>>
assert predicted_token == ' who'
assert predicted_token == ' who'
"""
"""
model
=
GPT2LMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
GPT2LMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
...
@@ -143,25 +143,25 @@ def gpt2DoubleHeadsModel(*args, **kwargs):
...
@@ -143,25 +143,25 @@ def gpt2DoubleHeadsModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
# Prepare tokenized input
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>>
tokenized_text1 = tokenizer.tokenize(text1)
tokenized_text1 = tokenizer.tokenize(text1)
>>>
tokenized_text2 = tokenizer.tokenize(text2)
tokenized_text2 = tokenizer.tokenize(text2)
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load gpt2DoubleHeadsModel
# Load gpt2DoubleHeadsModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2')
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
lm_logits, multiple_choice_logits, presents = model(tokens_tensor, mc_token_ids)
lm_logits, multiple_choice_logits, presents = model(tokens_tensor, mc_token_ids)
"""
"""
model
=
GPT2DoubleHeadsModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
GPT2DoubleHeadsModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
hubconfs/gpt_hubconf.py
View file @
0a2fecdf
...
@@ -40,7 +40,7 @@ gpt_docstring = """
...
@@ -40,7 +40,7 @@ gpt_docstring = """
. a series of NumPy files containing OpenAI TensorFlow trained weights
. a series of NumPy files containing OpenAI TensorFlow trained weights
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state diction
n
ary (collections.OrderedDict object)
state_dict: an optional state dictionary (collections.OrderedDict object)
to use instead of pre-trained models
to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific OpenAI-GPT class
*inputs, **kwargs: additional input for the specific OpenAI-GPT class
"""
"""
...
@@ -76,12 +76,12 @@ def openAIGPTTokenizer(*args, **kwargs):
...
@@ -76,12 +76,12 @@ def openAIGPTTokenizer(*args, **kwargs):
Default: None
Default: None
Example:
Example:
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
>>>
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
[763, 509, 4265, 2298, 945, 257, 4265, 2298, 945, 509, 246, 10148, 39041, 483]
[763, 509, 4265, 2298, 945, 257, 4265, 2298, 945, 509, 246, 10148, 39041, 483]
"""
"""
tokenizer
=
OpenAIGPTTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
tokenizer
=
OpenAIGPTTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
...
@@ -97,21 +97,21 @@ def openAIGPTModel(*args, **kwargs):
...
@@ -97,21 +97,21 @@ def openAIGPTModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
# Load openAIGPTModel
# Load openAIGPTModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt')
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states = model(tokens_tensor)
hidden_states = model(tokens_tensor)
"""
"""
model
=
OpenAIGPTModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
OpenAIGPTModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
@@ -126,26 +126,26 @@ def openAIGPTLMHeadModel(*args, **kwargs):
...
@@ -126,26 +126,26 @@ def openAIGPTLMHeadModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input
# Prepare tokenized input
>>>
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
tokenized_text = tokenizer.tokenize(text)
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
tokens_tensor = torch.tensor([indexed_tokens])
tokens_tensor = torch.tensor([indexed_tokens])
# Load openAIGPTLMHeadModel
# Load openAIGPTLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt')
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
predictions = model(tokens_tensor)
predictions = model(tokens_tensor)
# Get the predicted last token
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions[0, -1, :]).item()
predicted_index = torch.argmax(predictions[0, -1, :]).item()
>>>
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'.</w>'
'.</w>'
"""
"""
model
=
OpenAIGPTLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
OpenAIGPTLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
@@ -161,25 +161,25 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs):
...
@@ -161,25 +161,25 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input
# Prepare tokenized input
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>>
tokenized_text1 = tokenizer.tokenize(text1)
tokenized_text1 = tokenizer.tokenize(text1)
>>>
tokenized_text2 = tokenizer.tokenize(text2)
tokenized_text2 = tokenizer.tokenize(text2)
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load openAIGPTDoubleHeadsModel
# Load openAIGPTDoubleHeadsModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt')
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
lm_logits, multiple_choice_logits = model(tokens_tensor, mc_token_ids)
lm_logits, multiple_choice_logits = model(tokens_tensor, mc_token_ids)
"""
"""
model
=
OpenAIGPTDoubleHeadsModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
OpenAIGPTDoubleHeadsModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
hubconfs/transformer_xl_hubconf.py
View file @
0a2fecdf
...
@@ -23,7 +23,7 @@ transformer_xl_docstring = """
...
@@ -23,7 +23,7 @@ transformer_xl_docstring = """
. `model.chkpt` a TensorFlow checkpoint
. `model.chkpt` a TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state diction
n
ary (collections.OrderedDict object) to use instead of pre-trained models
state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific TransformerXL class
*inputs, **kwargs: additional input for the specific TransformerXL class
"""
"""
...
@@ -45,12 +45,12 @@ def transformerXLTokenizer(*args, **kwargs):
...
@@ -45,12 +45,12 @@ def transformerXLTokenizer(*args, **kwargs):
* transfo-xl-wt103
* transfo-xl-wt103
Example:
Example:
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
>>>
text = "Who was Jim Henson ?"
text = "Who was Jim Henson ?"
>>>
tokenized_text = tokenizer.tokenize(tokenized_text)
tokenized_text = tokenizer.tokenize(tokenized_text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
"""
"""
tokenizer
=
TransfoXLTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
tokenizer
=
TransfoXLTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
return
tokenizer
...
@@ -63,26 +63,26 @@ def transformerXLModel(*args, **kwargs):
...
@@ -63,26 +63,26 @@ def transformerXLModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
text_2 = "Jim Henson was a puppeteer"
>>>
tokenized_text_1 = tokenizer.tokenize(text_1)
tokenized_text_1 = tokenizer.tokenize(text_1)
>>>
tokenized_text_2 = tokenizer.tokenize(text_2)
tokenized_text_2 = tokenizer.tokenize(text_2)
>>>
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>>
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLModel
# Load transformerXLModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103')
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context
# We can re-use the memory cells in a subsequent call to attend a longer context
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, mems_1 = model(tokens_tensor_1)
hidden_states_1, mems_1 = model(tokens_tensor_1)
hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
"""
"""
...
@@ -98,33 +98,33 @@ def transformerXLLMHeadModel(*args, **kwargs):
...
@@ -98,33 +98,33 @@ def transformerXLLMHeadModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
text_2 = "Jim Henson was a puppeteer"
>>>
tokenized_text_1 = tokenizer.tokenize(text_1)
tokenized_text_1 = tokenizer.tokenize(text_1)
>>>
tokenized_text_2 = tokenizer.tokenize(text_2)
tokenized_text_2 = tokenizer.tokenize(text_2)
>>>
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>>
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLLMHeadModel
# Load transformerXLLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103')
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context
# We can re-use the memory cells in a subsequent call to attend a longer context
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, mems_1 = model(tokens_tensor_1)
predictions_1, mems_1 = model(tokens_tensor_1)
predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
# Get the predicted last token
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
>>>
assert predicted_token == 'who'
assert predicted_token == 'who'
"""
"""
model
=
TransfoXLLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
TransfoXLLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
hubconfs/xlm_hubconf.py
View file @
0a2fecdf
...
@@ -17,16 +17,16 @@ xlm_start_docstring = """
...
@@ -17,16 +17,16 @@ xlm_start_docstring = """
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
# Prepare tokenized input
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
"""
"""
# A lot of models share the same param doc. Use a decorator
# A lot of models share the same param doc. Use a decorator
...
@@ -76,11 +76,11 @@ def xlmTokenizer(*args, **kwargs):
...
@@ -76,11 +76,11 @@ def xlmTokenizer(*args, **kwargs):
Default: None
Default: None
Example:
Example:
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
>>>
text = "Who was Jim Henson ?"
text = "Who was Jim Henson ?"
>>>
indexed_tokens = tokenizer.encode(tokenized_text)
indexed_tokens = tokenizer.encode(tokenized_text)
"""
"""
tokenizer
=
XLMTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
tokenizer
=
XLMTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
return
tokenizer
...
@@ -91,11 +91,11 @@ def xlmTokenizer(*args, **kwargs):
...
@@ -91,11 +91,11 @@ def xlmTokenizer(*args, **kwargs):
def
xlmModel
(
*
args
,
**
kwargs
):
def
xlmModel
(
*
args
,
**
kwargs
):
"""
"""
# Load xlmModel
# Load xlmModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048')
model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_2, mems = model(tokens_tensor_2, past=mems)
hidden_states_2, mems = model(tokens_tensor_2, past=mems)
"""
"""
...
@@ -108,26 +108,26 @@ def xlmModel(*args, **kwargs):
...
@@ -108,26 +108,26 @@ def xlmModel(*args, **kwargs):
def
xlmLMHeadModel
(
*
args
,
**
kwargs
):
def
xlmLMHeadModel
(
*
args
,
**
kwargs
):
"""
"""
# Prepare tokenized input
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetLMHeadModel
# Load xlnetLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048')
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, mems = model(tokens_tensor_1)
predictions_1, mems = model(tokens_tensor_1)
predictions_2, mems = model(tokens_tensor_2, mems=mems)
predictions_2, mems = model(tokens_tensor_2, mems=mems)
# Get the predicted last token
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.decode([predicted_index])
predicted_token = tokenizer.decode([predicted_index])
>>>
assert predicted_token == ' who'
assert predicted_token == ' who'
"""
"""
model
=
XLMWithLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
XLMWithLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
...
@@ -142,25 +142,25 @@ def xlmLMHeadModel(*args, **kwargs):
...
@@ -142,25 +142,25 @@ def xlmLMHeadModel(*args, **kwargs):
# Example:
# Example:
# # Load the tokenizer
# # Load the tokenizer
#
>>>
import torch
# import torch
#
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048')
# tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048')
# # Prepare tokenized input
# # Prepare tokenized input
#
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
# text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
#
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
# text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
#
>>>
tokenized_text1 = tokenizer.tokenize(text1)
# tokenized_text1 = tokenizer.tokenize(text1)
#
>>>
tokenized_text2 = tokenizer.tokenize(text2)
# tokenized_text2 = tokenizer.tokenize(text2)
#
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
# indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
#
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
# indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
#
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
# tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
#
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# # Load xlnetForSequenceClassification
# # Load xlnetForSequenceClassification
#
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048')
# model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048')
#
>>>
model.eval()
# model.eval()
# # Predict sequence classes logits
# # Predict sequence classes logits
#
>>>
with torch.no_grad():
# with torch.no_grad():
# lm_logits, mems = model(tokens_tensor)
# lm_logits, mems = model(tokens_tensor)
# """
# """
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
...
...
hubconfs/xlnet_hubconf.1.py
View file @
0a2fecdf
...
@@ -53,11 +53,11 @@ def xlnetTokenizer(*args, **kwargs):
...
@@ -53,11 +53,11 @@ def xlnetTokenizer(*args, **kwargs):
Default: None
Default: None
Example:
Example:
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
>>>
text = "Who was Jim Henson ?"
text = "Who was Jim Henson ?"
>>>
indexed_tokens = tokenizer.encode(tokenized_text)
indexed_tokens = tokenizer.encode(tokenized_text)
"""
"""
tokenizer
=
XLNetTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
tokenizer
=
XLNetTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
return
tokenizer
...
@@ -72,23 +72,23 @@ def xlnetModel(*args, **kwargs):
...
@@ -72,23 +72,23 @@ def xlnetModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# Prepare tokenized input
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetModel
# Load xlnetModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased')
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_2, mems = model(tokens_tensor_2, past=mems)
hidden_states_2, mems = model(tokens_tensor_2, past=mems)
"""
"""
...
@@ -106,30 +106,30 @@ def xlnetLMHeadModel(*args, **kwargs):
...
@@ -106,30 +106,30 @@ def xlnetLMHeadModel(*args, **kwargs):
Example:
Example:
# Load the tokenizer
# Load the tokenizer
>>>
import torch
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# Prepare tokenized input
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetLMHeadModel
# Load xlnetLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased')
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased')
>>>
model.eval()
model.eval()
# Predict hidden states features for each layer
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, mems = model(tokens_tensor_1)
predictions_1, mems = model(tokens_tensor_1)
predictions_2, mems = model(tokens_tensor_2, mems=mems)
predictions_2, mems = model(tokens_tensor_2, mems=mems)
# Get the predicted last token
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.decode([predicted_index])
predicted_token = tokenizer.decode([predicted_index])
>>>
assert predicted_token == ' who'
assert predicted_token == ' who'
"""
"""
model
=
XLNetLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
model
=
XLNetLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
return
model
...
@@ -144,25 +144,25 @@ def xlnetLMHeadModel(*args, **kwargs):
...
@@ -144,25 +144,25 @@ def xlnetLMHeadModel(*args, **kwargs):
# Example:
# Example:
# # Load the tokenizer
# # Load the tokenizer
#
>>>
import torch
# import torch
#
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# # Prepare tokenized input
# # Prepare tokenized input
#
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
# text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
#
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
# text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
#
>>>
tokenized_text1 = tokenizer.tokenize(text1)
# tokenized_text1 = tokenizer.tokenize(text1)
#
>>>
tokenized_text2 = tokenizer.tokenize(text2)
# tokenized_text2 = tokenizer.tokenize(text2)
#
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
# indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
#
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
# indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
#
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
# tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
#
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# # Load xlnetForSequenceClassification
# # Load xlnetForSequenceClassification
#
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased')
# model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased')
#
>>>
model.eval()
# model.eval()
# # Predict sequence classes logits
# # Predict sequence classes logits
#
>>>
with torch.no_grad():
# with torch.no_grad():
# lm_logits, mems = model(tokens_tensor)
# lm_logits, mems = model(tokens_tensor)
# """
# """
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
...
...
pytorch_transformers/__init__.py
View file @
0a2fecdf
__version__
=
"1.0.0"
__version__
=
"1.1.0"
from
.tokenization_auto
import
AutoTokenizer
from
.tokenization_bert
import
BertTokenizer
,
BasicTokenizer
,
WordpieceTokenizer
from
.tokenization_bert
import
BertTokenizer
,
BasicTokenizer
,
WordpieceTokenizer
from
.tokenization_openai
import
OpenAIGPTTokenizer
from
.tokenization_openai
import
OpenAIGPTTokenizer
from
.tokenization_transfo_xl
import
(
TransfoXLTokenizer
,
TransfoXLCorpus
)
from
.tokenization_transfo_xl
import
(
TransfoXLTokenizer
,
TransfoXLCorpus
)
from
.tokenization_gpt2
import
GPT2Tokenizer
from
.tokenization_gpt2
import
GPT2Tokenizer
from
.tokenization_xlnet
import
XLNetTokenizer
,
SPIECE_UNDERLINE
from
.tokenization_xlnet
import
XLNetTokenizer
,
SPIECE_UNDERLINE
from
.tokenization_xlm
import
XLMTokenizer
from
.tokenization_xlm
import
XLMTokenizer
from
.tokenization_utils
import
(
PreTrainedTokenizer
,
clean_up_tokenization
)
from
.tokenization_roberta
import
RobertaTokenizer
from
.tokenization_distilbert
import
DistilBertTokenizer
from
.modeling_bert
import
(
BertConfig
,
BertModel
,
BertForPreTraining
,
from
.tokenization_utils
import
(
PreTrainedTokenizer
)
BertForMaskedLM
,
BertForNextSentencePrediction
,
BertForSequenceClassification
,
BertForMultipleChoice
,
from
.modeling_auto
import
(
AutoConfig
,
AutoModel
)
BertForTokenClassification
,
BertForQuestionAnswering
,
load_tf_weights_in_bert
,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
,
from
.modeling_bert
import
(
BertConfig
,
BertPreTrainedModel
,
BertModel
,
BertForPreTraining
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
)
BertForMaskedLM
,
BertForNextSentencePrediction
,
from
.modeling_openai
import
(
OpenAIGPTConfig
,
OpenAIGPTModel
,
BertForSequenceClassification
,
BertForMultipleChoice
,
BertForTokenClassification
,
BertForQuestionAnswering
,
load_tf_weights_in_bert
,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
)
from
.modeling_openai
import
(
OpenAIGPTConfig
,
OpenAIGPTPreTrainedModel
,
OpenAIGPTModel
,
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
,
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
,
load_tf_weights_in_openai_gpt
,
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
load_tf_weights_in_openai_gpt
,
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
)
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_transfo_xl
import
(
TransfoXLConfig
,
TransfoXLModel
,
TransfoXLLMHeadModel
,
from
.modeling_transfo_xl
import
(
TransfoXLConfig
,
TransfoXLPreTrainedModel
,
TransfoXLModel
,
TransfoXLLMHeadModel
,
load_tf_weights_in_transfo_xl
,
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
,
load_tf_weights_in_transfo_xl
,
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
,
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
)
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_gpt2
import
(
GPT2Config
,
GPT2Model
,
from
.modeling_gpt2
import
(
GPT2Config
,
GPT2PreTrainedModel
,
GPT2Model
,
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
,
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
,
load_tf_weights_in_gpt2
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
,
load_tf_weights_in_gpt2
,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
)
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
)
...
@@ -29,14 +35,19 @@ from .modeling_xlnet import (XLNetConfig,
...
@@ -29,14 +35,19 @@ from .modeling_xlnet import (XLNetConfig,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
,
load_tf_weights_in_xlnet
,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
,
load_tf_weights_in_xlnet
,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
)
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_xlm
import
(
XLMConfig
,
XLMModel
,
from
.modeling_xlm
import
(
XLMConfig
,
XLMPreTrainedModel
,
XLMModel
,
XLMWithLMHeadModel
,
XLMForSequenceClassification
,
XLMWithLMHeadModel
,
XLMForSequenceClassification
,
XLMForQuestionAnswering
,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLMForQuestionAnswering
,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
)
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_roberta
import
(
RobertaConfig
,
RobertaForMaskedLM
,
RobertaModel
,
RobertaForSequenceClassification
,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
,
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_distilbert
import
(
DistilBertConfig
,
DistilBertForMaskedLM
,
DistilBertModel
,
DistilBertForSequenceClassification
,
DistilBertForQuestionAnswering
,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
,
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_utils
import
(
WEIGHTS_NAME
,
CONFIG_NAME
,
TF_WEIGHTS_NAME
,
from
.modeling_utils
import
(
WEIGHTS_NAME
,
CONFIG_NAME
,
TF_WEIGHTS_NAME
,
PretrainedConfig
,
PreTrainedModel
,
prune_layer
,
Conv1D
)
PretrainedConfig
,
PreTrainedModel
,
prune_layer
,
Conv1D
)
from
.optimization
import
(
AdamW
,
ConstantLRSchedule
,
WarmupConstantSchedule
,
WarmupCosineSchedule
,
from
.optimization
import
(
AdamW
,
ConstantLRSchedule
,
WarmupConstantSchedule
,
WarmupCosineSchedule
,
WarmupCosineWithHardRestartsSchedule
,
WarmupLinearSchedule
)
WarmupCosineWithHardRestartsSchedule
,
WarmupLinearSchedule
)
from
.file_utils
import
(
PYTORCH_PRETRAINED_BERT_CACHE
,
cached_path
)
from
.file_utils
import
(
PYTORCH_TRANSFORMERS_CACHE
,
PYTORCH_PRETRAINED_BERT_CACHE
,
cached_path
)
pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
@@ -35,7 +35,7 @@ def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config_file, p
...
@@ -35,7 +35,7 @@ def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config_file, p
if
gpt2_config_file
==
""
:
if
gpt2_config_file
==
""
:
config
=
GPT2Config
()
config
=
GPT2Config
()
else
:
else
:
config
=
GPT2Config
(
gpt2_config_file
)
config
=
GPT2Config
.
from_json_file
(
gpt2_config_file
)
model
=
GPT2Model
(
config
)
model
=
GPT2Model
(
config
)
# Load weights from numpy
# Load weights from numpy
...
@@ -58,7 +58,7 @@ if __name__ == "__main__":
...
@@ -58,7 +58,7 @@ if __name__ == "__main__":
default
=
None
,
default
=
None
,
type
=
str
,
type
=
str
,
required
=
True
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--pytorch_dump_folder_path"
,
parser
.
add_argument
(
"--pytorch_dump_folder_path"
,
default
=
None
,
default
=
None
,
type
=
str
,
type
=
str
,
...
...
pytorch_transformers/convert_openai_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
@@ -35,7 +35,7 @@ def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, openai_c
...
@@ -35,7 +35,7 @@ def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, openai_c
if
openai_config_file
==
""
:
if
openai_config_file
==
""
:
config
=
OpenAIGPTConfig
()
config
=
OpenAIGPTConfig
()
else
:
else
:
config
=
OpenAIGPTConfig
(
openai_config_file
)
config
=
OpenAIGPTConfig
.
from_json_file
(
openai_config_file
)
model
=
OpenAIGPTModel
(
config
)
model
=
OpenAIGPTModel
(
config
)
# Load weights from numpy
# Load weights from numpy
...
@@ -58,7 +58,7 @@ if __name__ == "__main__":
...
@@ -58,7 +58,7 @@ if __name__ == "__main__":
default
=
None
,
default
=
None
,
type
=
str
,
type
=
str
,
required
=
True
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--pytorch_dump_folder_path"
,
parser
.
add_argument
(
"--pytorch_dump_folder_path"
,
default
=
None
,
default
=
None
,
type
=
str
,
type
=
str
,
...
...
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
View file @
0a2fecdf
This diff is collapsed.
Click to expand it.
pytorch_transformers/convert_roberta_checkpoint_to_pytorch.py
0 → 100644
View file @
0a2fecdf
This diff is collapsed.
Click to expand it.
pytorch_transformers/convert_tf_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
@@ -47,7 +47,7 @@ if __name__ == "__main__":
...
@@ -47,7 +47,7 @@ if __name__ == "__main__":
default
=
None
,
default
=
None
,
type
=
str
,
type
=
str
,
required
=
True
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--bert_config_file"
,
parser
.
add_argument
(
"--bert_config_file"
,
default
=
None
,
default
=
None
,
type
=
str
,
type
=
str
,
...
...
pytorch_transformers/convert_transfo_xl_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
@@ -24,11 +24,10 @@ from io import open
...
@@ -24,11 +24,10 @@ from io import open
import
torch
import
torch
import
pytorch_transformers.tokenization_transfo_xl
as
data_utils
import
pytorch_transformers.tokenization_transfo_xl
as
data_utils
from
pytorch_transformers.modeling_transfo_xl
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
from
pytorch_transformers
import
CONFIG_NAME
,
WEIGHTS_NAME
TransfoXLConfig
,
from
pytorch_transformers.modeling_transfo_xl
import
(
TransfoXLConfig
,
TransfoXLLMHeadModel
,
TransfoXLLMHeadModel
,
load_tf_weights_in_transfo_xl
)
load_tf_weights_in_transfo_xl
)
from
pytorch_transformers.tokenization_transfo_xl
import
(
CORPUS_NAME
,
VOCAB_FILES_NAMES
)
from
pytorch_transformers.tokenization_transfo_xl
import
(
CORPUS_NAME
,
VOCAB_FILES_NAMES
)
if
sys
.
version_info
[
0
]
==
2
:
if
sys
.
version_info
[
0
]
==
2
:
...
@@ -76,7 +75,7 @@ def convert_transfo_xl_checkpoint_to_pytorch(tf_checkpoint_path,
...
@@ -76,7 +75,7 @@ def convert_transfo_xl_checkpoint_to_pytorch(tf_checkpoint_path,
if
transfo_xl_config_file
==
""
:
if
transfo_xl_config_file
==
""
:
config
=
TransfoXLConfig
()
config
=
TransfoXLConfig
()
else
:
else
:
config
=
TransfoXLConfig
(
transfo_xl_config_file
)
config
=
TransfoXLConfig
.
from_json_file
(
transfo_xl_config_file
)
print
(
"Building PyTorch model from configuration: {}"
.
format
(
str
(
config
)))
print
(
"Building PyTorch model from configuration: {}"
.
format
(
str
(
config
)))
model
=
TransfoXLLMHeadModel
(
config
)
model
=
TransfoXLLMHeadModel
(
config
)
...
...
pytorch_transformers/convert_xlm_checkpoint_to_pytorch.py
View file @
0a2fecdf
...
@@ -36,7 +36,7 @@ def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_p
...
@@ -36,7 +36,7 @@ def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_p
model
=
chkpt
[
'model'
]
model
=
chkpt
[
'model'
]
config
=
chkpt
[
'params'
]
config
=
chkpt
[
'params'
]
config
=
dict
((
n
,
v
)
for
n
,
v
in
config
.
items
()
if
not
isinstance
(
v
,
(
torch
.
Tensor
,
numpy
.
ndarray
)))
config
=
dict
((
n
,
v
)
for
n
,
v
in
config
.
items
()
if
not
isinstance
(
v
,
(
torch
.
Float
Tensor
,
numpy
.
ndarray
)))
vocab
=
chkpt
[
'dico_word2id'
]
vocab
=
chkpt
[
'dico_word2id'
]
vocab
=
dict
((
s
+
'</w>'
if
s
.
find
(
'@@'
)
==
-
1
and
i
>
13
else
s
.
replace
(
'@@'
,
''
),
i
)
for
s
,
i
in
vocab
.
items
())
vocab
=
dict
((
s
+
'</w>'
if
s
.
find
(
'@@'
)
==
-
1
and
i
>
13
else
s
.
replace
(
'@@'
,
''
),
i
)
for
s
,
i
in
vocab
.
items
())
...
...
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
View file @
0a2fecdf
This diff is collapsed.
Click to expand it.
pytorch_transformers/file_utils.py
View file @
0a2fecdf
This diff is collapsed.
Click to expand it.
pytorch_transformers/modeling_auto.py
0 → 100644
View file @
0a2fecdf
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment