Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
d2cc6b10
"examples/flax/vscode:/vscode.git/clone" did not exist on "7c6ec195adbfcd22cb6baeee64dd3c24a4b80c74"
Commit
d2cc6b10
authored
Aug 08, 2019
by
LysandreJik
Browse files
Merge branch 'master' into RoBERTa
parents
39d72bcc
7729ef73
Changes
62
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
662 additions
and
425 deletions
+662
-425
examples/run_glue.py
examples/run_glue.py
+6
-0
examples/run_squad.py
examples/run_squad.py
+7
-1
examples/single_model_scripts/run_openai_gpt.py
examples/single_model_scripts/run_openai_gpt.py
+1
-1
hubconfs/bert_hubconf.py
hubconfs/bert_hubconf.py
+104
-104
hubconfs/gpt2_hubconf.py
hubconfs/gpt2_hubconf.py
+42
-42
hubconfs/gpt_hubconf.py
hubconfs/gpt_hubconf.py
+39
-39
hubconfs/transformer_xl_hubconf.py
hubconfs/transformer_xl_hubconf.py
+35
-35
hubconfs/xlm_hubconf.py
hubconfs/xlm_hubconf.py
+40
-40
hubconfs/xlnet_hubconf.1.py
hubconfs/xlnet_hubconf.1.py
+42
-42
pytorch_transformers/__init__.py
pytorch_transformers/__init__.py
+6
-1
pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py
pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/convert_openai_checkpoint_to_pytorch.py
pytorch_transformers/convert_openai_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
+21
-21
pytorch_transformers/convert_tf_checkpoint_to_pytorch.py
pytorch_transformers/convert_tf_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/convert_transfo_xl_checkpoint_to_pytorch.py
..._transformers/convert_transfo_xl_checkpoint_to_pytorch.py
+4
-5
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
+1
-1
pytorch_transformers/file_utils.py
pytorch_transformers/file_utils.py
+9
-6
pytorch_transformers/modeling_auto.py
pytorch_transformers/modeling_auto.py
+236
-0
pytorch_transformers/modeling_bert.py
pytorch_transformers/modeling_bert.py
+48
-63
pytorch_transformers/modeling_gpt2.py
pytorch_transformers/modeling_gpt2.py
+18
-21
No files found.
examples/run_glue.py
View file @
d2cc6b10
...
...
@@ -247,6 +247,9 @@ def evaluate(args, model, tokenizer, prefix=""):
def
load_and_cache_examples
(
args
,
task
,
tokenizer
,
evaluate
=
False
):
if
args
.
local_rank
not
in
[
-
1
,
0
]:
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training process the dataset, and the others will use the cache
processor
=
processors
[
task
]()
output_mode
=
output_modes
[
task
]
# Load data features from cache or dataset file
...
...
@@ -273,6 +276,9 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False):
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
torch
.
save
(
features
,
cached_features_file
)
if
args
.
local_rank
==
0
:
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training process the dataset, and the others will use the cache
# Convert to Tensors and build dataset
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
features
],
dtype
=
torch
.
long
)
all_input_mask
=
torch
.
tensor
([
f
.
input_mask
for
f
in
features
],
dtype
=
torch
.
long
)
...
...
examples/run_squad.py
View file @
d2cc6b10
...
...
@@ -138,7 +138,7 @@ def train(args, train_dataset, model, tokenizer):
'end_positions'
:
batch
[
4
]}
if
args
.
model_type
in
[
'xlnet'
,
'xlm'
]:
inputs
.
update
({
'cls_index'
:
batch
[
5
],
'p_mask'
:
batch
[
6
]})
'p_mask'
:
batch
[
6
]})
outputs
=
model
(
**
inputs
)
loss
=
outputs
[
0
]
# model outputs are always tuple in pytorch-transformers (see doc)
...
...
@@ -272,6 +272,9 @@ def evaluate(args, model, tokenizer, prefix=""):
def
load_and_cache_examples
(
args
,
tokenizer
,
evaluate
=
False
,
output_examples
=
False
):
if
args
.
local_rank
not
in
[
-
1
,
0
]:
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training process the dataset, and the others will use the cache
# Load data features from cache or dataset file
input_file
=
args
.
predict_file
if
evaluate
else
args
.
train_file
cached_features_file
=
os
.
path
.
join
(
os
.
path
.
dirname
(
input_file
),
'cached_{}_{}_{}'
.
format
(
...
...
@@ -296,6 +299,9 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
torch
.
save
(
features
,
cached_features_file
)
if
args
.
local_rank
==
0
:
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training process the dataset, and the others will use the cache
# Convert to Tensors and build dataset
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
features
],
dtype
=
torch
.
long
)
all_input_mask
=
torch
.
tensor
([
f
.
input_mask
for
f
in
features
],
dtype
=
torch
.
long
)
...
...
examples/single_model_scripts/run_openai_gpt.py
View file @
d2cc6b10
...
...
@@ -205,7 +205,7 @@ def main():
param_optimizer
=
list
(
model
.
named_parameters
())
no_decay
=
[
'bias'
,
'LayerNorm.bias'
,
'LayerNorm.weight'
]
optimizer_grouped_parameters
=
[
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
not
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.01
},
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
not
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
args
.
weight_decay
},
{
'params'
:
[
p
for
n
,
p
in
param_optimizer
if
any
(
nd
in
n
for
nd
in
no_decay
)],
'weight_decay'
:
0.0
}
]
optimizer
=
AdamW
(
optimizer_grouped_parameters
,
lr
=
args
.
learning_rate
,
eps
=
args
.
adam_epsilon
)
...
...
hubconfs/bert_hubconf.py
View file @
d2cc6b10
...
...
@@ -37,7 +37,7 @@ bert_docstring = """
checkpoint
cache_dir: an optional path to a folder in which the pre-trained models
will be cached.
state_dict: an optional state diction
n
ary
state_dict: an optional state dictionary
(collections.OrderedDict object) to use instead of Google
pre-trained models
*inputs, **kwargs: additional input for the specific Bert class
...
...
@@ -84,12 +84,12 @@ def bertTokenizer(*args, **kwargs):
Default: ["[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]"]
Example:
>>>
import torch
>>>
sentence = 'Hello, World!'
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
>>>
toks = tokenizer.tokenize(sentence)
import torch
sentence = 'Hello, World!'
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
toks = tokenizer.tokenize(sentence)
['Hello', '##,', 'World', '##!']
>>>
ids = tokenizer.convert_tokens_to_ids(toks)
ids = tokenizer.convert_tokens_to_ids(toks)
[8667, 28136, 1291, 28125]
"""
tokenizer
=
BertTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -105,20 +105,20 @@ def bertModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
encoded_layers, _ = model(tokens_tensor, segments_tensors)
"""
model
=
BertModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -134,20 +134,20 @@ def bertForNextSentencePrediction(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForNextSentencePrediction
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForNextSentencePrediction', 'bert-base-cased')
model.eval()
# Predict the next sentence classification logits
>>>
with torch.no_grad():
with torch.no_grad():
next_sent_classif_logits = model(tokens_tensor, segments_tensors)
"""
model
=
BertForNextSentencePrediction
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -164,17 +164,17 @@ def bertForPreTraining(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForPreTraining
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased')
>>>
masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors)
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForPreTraining', 'bert-base-cased')
masked_lm_logits_scores, seq_relationship_logits = model(tokens_tensor, segments_tensors)
"""
model
=
BertForPreTraining
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -188,25 +188,25 @@ def bertForMaskedLM(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
masked_index = 8
>>>
tokenized_text[masked_index] = '[MASK]'
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
masked_index = 8
tokenized_text[masked_index] = '[MASK]'
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForMaskedLM
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMaskedLM', 'bert-base-cased')
model.eval()
# Predict all tokens
>>>
with torch.no_grad():
with torch.no_grad():
predictions = model(tokens_tensor, segments_tensors)
>>>
predicted_index = torch.argmax(predictions[0, masked_index]).item()
>>>
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
predicted_index = torch.argmax(predictions[0, masked_index]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'henson'
"""
model
=
BertForMaskedLM
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -230,24 +230,24 @@ def bertForSequenceClassification(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForSequenceClassification
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2)
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForSequenceClassification', 'bert-base-cased', num_labels=2)
model.eval()
# Predict the sequence classification logits
>>>
with torch.no_grad():
with torch.no_grad():
seq_classif_logits = model(tokens_tensor, segments_tensors)
# Or get the sequence classification loss
>>>
labels = torch.tensor([1])
>>>
seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
labels = torch.tensor([1])
seq_classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
model
=
BertForSequenceClassification
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -265,24 +265,24 @@ def bertForMultipleChoice(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0)
>>>
segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0)
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens, indexed_tokens]).unsqueeze(0)
segments_tensors = torch.tensor([segments_ids, segments_ids]).unsqueeze(0)
# Load bertForMultipleChoice
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2)
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForMultipleChoice', 'bert-base-cased', num_choices=2)
model.eval()
# Predict the multiple choice logits
>>>
with torch.no_grad():
with torch.no_grad():
multiple_choice_logits = model(tokens_tensor, segments_tensors)
# Or get the multiple choice loss
>>>
labels = torch.tensor([1])
>>>
multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
labels = torch.tensor([1])
multiple_choice_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
model
=
BertForMultipleChoice
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -298,25 +298,25 @@ def bertForQuestionAnswering(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForQuestionAnswering
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForQuestionAnswering', 'bert-base-cased')
model.eval()
# Predict the start and end positions logits
>>>
with torch.no_grad():
with torch.no_grad():
start_logits, end_logits = model(tokens_tensor, segments_tensors)
# Or get the total loss which is the sum of the CrossEntropy loss for the start and end token positions
>>>
start_positions, end_positions = torch.tensor([12]), torch.tensor([14])
start_positions, end_positions = torch.tensor([12]), torch.tensor([14])
# set model.train() before if training this loss
>>>
multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions)
multiple_choice_loss = model(tokens_tensor, segments_tensors, start_positions=start_positions, end_positions=end_positions)
"""
model
=
BertForQuestionAnswering
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -337,24 +337,24 @@ def bertForTokenClassification(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
# Prepare tokenized input
>>>
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>>
tokens_tensor = torch.tensor([indexed_tokens])
>>>
segments_tensors = torch.tensor([segments_ids])
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])
# Load bertForTokenClassification
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2)
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'bertForTokenClassification', 'bert-base-cased', num_labels=2)
model.eval()
# Predict the token classification logits
>>>
with torch.no_grad():
with torch.no_grad():
classif_logits = model(tokens_tensor, segments_tensors)
# Or get the token classification loss
>>>
labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]])
>>>
classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
labels = torch.tensor([[0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0]])
classif_loss = model(tokens_tensor, segments_tensors, labels=labels) # set model.train() before if training this loss
"""
model
=
BertForTokenClassification
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
hubconfs/gpt2_hubconf.py
View file @
d2cc6b10
...
...
@@ -52,11 +52,11 @@ def gpt2Tokenizer(*args, **kwargs):
Default: None
Example:
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
>>>
text = "Who was Jim Henson ?"
>>>
indexed_tokens = tokenizer.encode(tokenized_text)
text = "Who was Jim Henson ?"
indexed_tokens = tokenizer.encode(tokenized_text)
"""
tokenizer
=
GPT2Tokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
...
...
@@ -71,24 +71,24 @@ def gpt2Model(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2Model
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Model', 'gpt2')
model.eval()
# Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, past = model(tokens_tensor_1)
hidden_states_2, past = model(tokens_tensor_2, past=past)
"""
...
...
@@ -104,31 +104,31 @@ def gpt2LMHeadModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load gpt2LMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2LMHeadModel', 'gpt2')
model.eval()
# Predict hidden states features for each layer
# past can be used to reuse precomputed hidden state in a subsequent predictions
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, past = model(tokens_tensor_1)
predictions_2, past = model(tokens_tensor_2, past=past)
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.decode([predicted_index])
>>>
assert predicted_token == ' who'
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_token = tokenizer.decode([predicted_index])
assert predicted_token == ' who'
"""
model
=
GPT2LMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -143,25 +143,25 @@ def gpt2DoubleHeadsModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'gpt2Tokenizer', 'gpt2')
# Prepare tokenized input
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>>
tokenized_text1 = tokenizer.tokenize(text1)
>>>
tokenized_text2 = tokenizer.tokenize(text2)
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
tokenized_text1 = tokenizer.tokenize(text1)
tokenized_text2 = tokenizer.tokenize(text2)
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load gpt2DoubleHeadsModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'gpt2DoubleHeadsModel', 'gpt2')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
lm_logits, multiple_choice_logits, presents = model(tokens_tensor, mc_token_ids)
"""
model
=
GPT2DoubleHeadsModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
hubconfs/gpt_hubconf.py
View file @
d2cc6b10
...
...
@@ -40,7 +40,7 @@ gpt_docstring = """
. a series of NumPy files containing OpenAI TensorFlow trained weights
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state diction
n
ary (collections.OrderedDict object)
state_dict: an optional state dictionary (collections.OrderedDict object)
to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific OpenAI-GPT class
"""
...
...
@@ -76,12 +76,12 @@ def openAIGPTTokenizer(*args, **kwargs):
Default: None
Example:
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
>>>
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
[763, 509, 4265, 2298, 945, 257, 4265, 2298, 945, 509, 246, 10148, 39041, 483]
"""
tokenizer
=
OpenAIGPTTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -97,21 +97,21 @@ def openAIGPTModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input
>>>
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
tokens_tensor = torch.tensor([indexed_tokens])
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
tokens_tensor = torch.tensor([indexed_tokens])
# Load openAIGPTModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states = model(tokens_tensor)
"""
model
=
OpenAIGPTModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -126,26 +126,26 @@ def openAIGPTLMHeadModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input
>>>
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
tokenized_text = tokenizer.tokenize(text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>>
tokens_tensor = torch.tensor([indexed_tokens])
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
tokenized_text = tokenizer.tokenize(text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
tokens_tensor = torch.tensor([indexed_tokens])
# Load openAIGPTLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTLMHeadModel', 'openai-gpt')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
predictions = model(tokens_tensor)
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions[0, -1, :]).item()
>>>
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
predicted_index = torch.argmax(predictions[0, -1, :]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
'.</w>'
"""
model
=
OpenAIGPTLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
@@ -161,25 +161,25 @@ def openAIGPTDoubleHeadsModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt')
# Prepare tokenized input
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
>>>
tokenized_text1 = tokenizer.tokenize(text1)
>>>
tokenized_text2 = tokenizer.tokenize(text2)
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
tokenized_text1 = tokenizer.tokenize(text1)
tokenized_text2 = tokenizer.tokenize(text2)
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# Load openAIGPTDoubleHeadsModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTDoubleHeadsModel', 'openai-gpt')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
lm_logits, multiple_choice_logits = model(tokens_tensor, mc_token_ids)
"""
model
=
OpenAIGPTDoubleHeadsModel
.
from_pretrained
(
*
args
,
**
kwargs
)
...
...
hubconfs/transformer_xl_hubconf.py
View file @
d2cc6b10
...
...
@@ -23,7 +23,7 @@ transformer_xl_docstring = """
. `model.chkpt` a TensorFlow checkpoint
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state diction
n
ary (collections.OrderedDict object) to use instead of pre-trained models
state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific TransformerXL class
"""
...
...
@@ -45,12 +45,12 @@ def transformerXLTokenizer(*args, **kwargs):
* transfo-xl-wt103
Example:
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
>>>
text = "Who was Jim Henson ?"
>>>
tokenized_text = tokenizer.tokenize(tokenized_text)
>>>
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
text = "Who was Jim Henson ?"
tokenized_text = tokenizer.tokenize(tokenized_text)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
"""
tokenizer
=
TransfoXLTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
...
...
@@ -63,26 +63,26 @@ def transformerXLModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
tokenized_text_1 = tokenizer.tokenize(text_1)
>>>
tokenized_text_2 = tokenizer.tokenize(text_2)
>>>
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>>
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
tokenized_text_1 = tokenizer.tokenize(text_1)
tokenized_text_2 = tokenizer.tokenize(text_2)
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLModel', 'transfo-xl-wt103')
model.eval()
# Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, mems_1 = model(tokens_tensor_1)
hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
"""
...
...
@@ -98,33 +98,33 @@ def transformerXLLMHeadModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
tokenized_text_1 = tokenizer.tokenize(text_1)
>>>
tokenized_text_2 = tokenizer.tokenize(text_2)
>>>
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
>>>
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
tokenized_text_1 = tokenizer.tokenize(text_1)
tokenized_text_2 = tokenizer.tokenize(text_2)
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load transformerXLLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLLMHeadModel', 'transfo-xl-wt103')
model.eval()
# Predict hidden states features for each layer
# We can re-use the memory cells in a subsequent call to attend a longer context
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, mems_1 = model(tokens_tensor_1)
predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
>>>
assert predicted_token == 'who'
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
assert predicted_token == 'who'
"""
model
=
TransfoXLLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
hubconfs/xlm_hubconf.py
View file @
d2cc6b10
...
...
@@ -17,16 +17,16 @@ xlm_start_docstring = """
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
"""
# A lot of models share the same param doc. Use a decorator
...
...
@@ -76,11 +76,11 @@ def xlmTokenizer(*args, **kwargs):
Default: None
Example:
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlmTokenizer', 'xlm-mlm-en-2048')
>>>
text = "Who was Jim Henson ?"
>>>
indexed_tokens = tokenizer.encode(tokenized_text)
text = "Who was Jim Henson ?"
indexed_tokens = tokenizer.encode(tokenized_text)
"""
tokenizer
=
XLMTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
...
...
@@ -91,11 +91,11 @@ def xlmTokenizer(*args, **kwargs):
def
xlmModel
(
*
args
,
**
kwargs
):
"""
# Load xlmModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'xlmModel', 'xlm-mlm-en-2048')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_2, mems = model(tokens_tensor_2, past=mems)
"""
...
...
@@ -108,26 +108,26 @@ def xlmModel(*args, **kwargs):
def
xlmLMHeadModel
(
*
args
,
**
kwargs
):
"""
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlm-mlm-en-2048')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, mems = model(tokens_tensor_1)
predictions_2, mems = model(tokens_tensor_2, mems=mems)
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.decode([predicted_index])
>>>
assert predicted_token == ' who'
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_token = tokenizer.decode([predicted_index])
assert predicted_token == ' who'
"""
model
=
XLMWithLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -142,25 +142,25 @@ def xlmLMHeadModel(*args, **kwargs):
# Example:
# # Load the tokenizer
#
>>>
import torch
#
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048')
# import torch
# tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlm-mlm-en-2048')
# # Prepare tokenized input
#
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
#
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
#
>>>
tokenized_text1 = tokenizer.tokenize(text1)
#
>>>
tokenized_text2 = tokenizer.tokenize(text2)
#
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
#
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
#
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
#
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
# text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
# tokenized_text1 = tokenizer.tokenize(text1)
# tokenized_text2 = tokenizer.tokenize(text2)
# indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
# indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
# tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
# mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# # Load xlnetForSequenceClassification
#
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048')
#
>>>
model.eval()
# model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlm-mlm-en-2048')
# model.eval()
# # Predict sequence classes logits
#
>>>
with torch.no_grad():
# with torch.no_grad():
# lm_logits, mems = model(tokens_tensor)
# """
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
...
...
hubconfs/xlnet_hubconf.1.py
View file @
d2cc6b10
...
...
@@ -53,11 +53,11 @@ def xlnetTokenizer(*args, **kwargs):
Default: None
Example:
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
>>>
text = "Who was Jim Henson ?"
>>>
indexed_tokens = tokenizer.encode(tokenized_text)
text = "Who was Jim Henson ?"
indexed_tokens = tokenizer.encode(tokenized_text)
"""
tokenizer
=
XLNetTokenizer
.
from_pretrained
(
*
args
,
**
kwargs
)
return
tokenizer
...
...
@@ -72,23 +72,23 @@ def xlnetModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetModel', 'xlnet-large-cased')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
hidden_states_1, mems = model(tokens_tensor_1)
hidden_states_2, mems = model(tokens_tensor_2, past=mems)
"""
...
...
@@ -106,30 +106,30 @@ def xlnetLMHeadModel(*args, **kwargs):
Example:
# Load the tokenizer
>>>
import torch
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# Prepare tokenized input
>>>
text_1 = "Who was Jim Henson ?"
>>>
text_2 = "Jim Henson was a puppeteer"
>>>
indexed_tokens_1 = tokenizer.encode(text_1)
>>>
indexed_tokens_2 = tokenizer.encode(text_2)
>>>
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
>>>
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
indexed_tokens_1 = tokenizer.encode(text_1)
indexed_tokens_2 = tokenizer.encode(text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
# Load xlnetLMHeadModel
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased')
>>>
model.eval()
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetLMHeadModel', 'xlnet-large-cased')
model.eval()
# Predict hidden states features for each layer
>>>
with torch.no_grad():
with torch.no_grad():
predictions_1, mems = model(tokens_tensor_1)
predictions_2, mems = model(tokens_tensor_2, mems=mems)
# Get the predicted last token
>>>
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
>>>
predicted_token = tokenizer.decode([predicted_index])
>>>
assert predicted_token == ' who'
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_token = tokenizer.decode([predicted_index])
assert predicted_token == ' who'
"""
model
=
XLNetLMHeadModel
.
from_pretrained
(
*
args
,
**
kwargs
)
return
model
...
...
@@ -144,25 +144,25 @@ def xlnetLMHeadModel(*args, **kwargs):
# Example:
# # Load the tokenizer
#
>>>
import torch
#
>>>
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# import torch
# tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'xlnetTokenizer', 'xlnet-large-cased')
# # Prepare tokenized input
#
>>>
text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
#
>>>
text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
#
>>>
tokenized_text1 = tokenizer.tokenize(text1)
#
>>>
tokenized_text2 = tokenizer.tokenize(text2)
#
>>>
indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
#
>>>
indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
#
>>>
tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
#
>>>
mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# text1 = "Who was Jim Henson ? Jim Henson was a puppeteer"
# text2 = "Who was Jim Henson ? Jim Henson was a mysterious young man"
# tokenized_text1 = tokenizer.tokenize(text1)
# tokenized_text2 = tokenizer.tokenize(text2)
# indexed_tokens1 = tokenizer.convert_tokens_to_ids(tokenized_text1)
# indexed_tokens2 = tokenizer.convert_tokens_to_ids(tokenized_text2)
# tokens_tensor = torch.tensor([[indexed_tokens1, indexed_tokens2]])
# mc_token_ids = torch.LongTensor([[len(tokenized_text1)-1, len(tokenized_text2)-1]])
# # Load xlnetForSequenceClassification
#
>>>
model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased')
#
>>>
model.eval()
# model = torch.hub.load('huggingface/pytorch-transformers', 'xlnetForSequenceClassification', 'xlnet-large-cased')
# model.eval()
# # Predict sequence classes logits
#
>>>
with torch.no_grad():
# with torch.no_grad():
# lm_logits, mems = model(tokens_tensor)
# """
# model = XLNetForSequenceClassification.from_pretrained(*args, **kwargs)
...
...
pytorch_transformers/__init__.py
View file @
d2cc6b10
__version__
=
"1.0.0"
from
.tokenization_auto
import
AutoTokenizer
from
.tokenization_bert
import
BertTokenizer
,
BasicTokenizer
,
WordpieceTokenizer
from
.tokenization_openai
import
OpenAIGPTTokenizer
from
.tokenization_transfo_xl
import
(
TransfoXLTokenizer
,
TransfoXLCorpus
)
...
...
@@ -8,6 +9,10 @@ from .tokenization_xlm import XLMTokenizer
from
.tokenization_roberta
import
RobertaTokenizer
from
.tokenization_utils
import
(
PreTrainedTokenizer
,
clean_up_tokenization
)
from
.tokenization_utils
import
(
PreTrainedTokenizer
)
from
.modeling_auto
import
(
AutoConfig
,
AutoModel
)
from
.modeling_bert
import
(
BertConfig
,
BertPreTrainedModel
,
BertModel
,
BertForPreTraining
,
BertForMaskedLM
,
BertForNextSentencePrediction
,
BertForSequenceClassification
,
BertForMultipleChoice
,
...
...
@@ -42,4 +47,4 @@ from .modeling_utils import (WEIGHTS_NAME, CONFIG_NAME, TF_WEIGHTS_NAME,
from
.optimization
import
(
AdamW
,
ConstantLRSchedule
,
WarmupConstantSchedule
,
WarmupCosineSchedule
,
WarmupCosineWithHardRestartsSchedule
,
WarmupLinearSchedule
)
from
.file_utils
import
(
PYTORCH_PRETRAINED_BERT_CACHE
,
cached_path
)
from
.file_utils
import
(
PYTORCH_TRANSFORMERS_CACHE
,
PYTORCH_PRETRAINED_BERT_CACHE
,
cached_path
)
pytorch_transformers/convert_gpt2_checkpoint_to_pytorch.py
View file @
d2cc6b10
...
...
@@ -58,7 +58,7 @@ if __name__ == "__main__":
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--pytorch_dump_folder_path"
,
default
=
None
,
type
=
str
,
...
...
pytorch_transformers/convert_openai_checkpoint_to_pytorch.py
View file @
d2cc6b10
...
...
@@ -58,7 +58,7 @@ if __name__ == "__main__":
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--pytorch_dump_folder_path"
,
default
=
None
,
type
=
str
,
...
...
pytorch_transformers/convert_pytorch_checkpoint_to_tf.py
View file @
d2cc6b10
...
...
@@ -20,7 +20,7 @@ import argparse
import
torch
import
numpy
as
np
import
tensorflow
as
tf
from
pytorch_
pretrained_b
er
t
.modeling
import
BertModel
from
pytorch_
transform
er
s
.modeling
import
BertModel
def
convert_pytorch_checkpoint_to_tf
(
model
:
BertModel
,
ckpt_dir
:
str
,
model_name
:
str
):
...
...
@@ -41,7 +41,7 @@ def convert_pytorch_checkpoint_to_tf(model:BertModel, ckpt_dir:str, model_name:s
N BertForQuestionAnswering
"""
tensors_to_trans
o
pse
=
(
tensors_to_transp
o
se
=
(
"dense.weight"
,
"attention.self.query"
,
"attention.self.key"
,
...
...
@@ -62,34 +62,34 @@ def convert_pytorch_checkpoint_to_tf(model:BertModel, ckpt_dir:str, model_name:s
if
not
os
.
path
.
isdir
(
ckpt_dir
):
os
.
makedirs
(
ckpt_dir
)
session
=
tf
.
Session
()
state_dict
=
model
.
state_dict
()
tf_vars
=
[]
def
to_tf_var_name
(
name
:
str
):
for
patt
,
repl
in
iter
(
var_map
):
name
=
name
.
replace
(
patt
,
repl
)
return
'bert/{}'
.
format
(
name
)
def
assign_tf_var
(
tensor
:
np
.
ndarray
,
name
:
str
):
tmp_var
=
tf
.
Variable
(
initial_value
=
tensor
)
tf_var
=
tf
.
get_variable
(
dtype
=
tmp_var
.
dtype
,
shape
=
tmp_var
.
shape
,
name
=
name
)
op
=
tf
.
assign
(
ref
=
tf_var
,
value
=
tmp_var
)
session
.
run
(
tf
.
variables_initializer
([
tmp_var
,
tf_var
]))
session
.
run
(
fetches
=
[
op
,
tf_var
])
def
create_tf_var
(
tensor
:
np
.
ndarray
,
name
:
str
,
session
:
tf
.
Session
):
tf_dtype
=
tf
.
dtypes
.
as_dtype
(
tensor
.
dtype
)
tf_var
=
tf
.
get_variable
(
dtype
=
tf_dtype
,
shape
=
tensor
.
shape
,
name
=
name
,
initializer
=
tf
.
zeros_initializer
())
session
.
run
(
tf
.
variables_initializer
([
tf_var
]))
session
.
run
(
tf_var
)
return
tf_var
for
var_name
in
state_dict
:
tf_name
=
to_tf_var_name
(
var_name
)
torch_tensor
=
state_dict
[
var_name
].
numpy
()
if
any
([
x
in
var_name
for
x
in
tensors_to_transopse
]):
torch_tensor
=
torch_tensor
.
T
tf_tensor
=
assign_tf_var
(
tensor
=
torch_tensor
,
name
=
tf_name
)
tf_vars
.
append
(
tf_tensor
)
print
(
"{0}{1}initialized"
.
format
(
tf_name
,
" "
*
(
60
-
len
(
tf_name
))))
saver
=
tf
.
train
.
Saver
(
tf_vars
)
saver
.
save
(
session
,
os
.
path
.
join
(
ckpt_dir
,
model_name
.
replace
(
"-"
,
"_"
)
+
".ckpt"
))
tf
.
reset_default_graph
()
with
tf
.
Session
()
as
session
:
for
var_name
in
state_dict
:
tf_name
=
to_tf_var_name
(
var_name
)
torch_tensor
=
state_dict
[
var_name
].
numpy
()
if
any
([
x
in
var_name
for
x
in
tensors_to_transpose
]):
torch_tensor
=
torch_tensor
.
T
tf_var
=
create_tf_var
(
tensor
=
torch_tensor
,
name
=
tf_name
,
session
=
session
)
tf
.
keras
.
backend
.
set_value
(
tf_var
,
torch_tensor
)
tf_weight
=
session
.
run
(
tf_var
)
print
(
"Successfully created {}: {}"
.
format
(
tf_name
,
np
.
allclose
(
tf_weight
,
torch_tensor
)))
saver
=
tf
.
train
.
Saver
(
tf
.
trainable_variables
())
saver
.
save
(
session
,
os
.
path
.
join
(
ckpt_dir
,
model_name
.
replace
(
"-"
,
"_"
)
+
".ckpt"
))
def
main
(
raw_args
=
None
):
...
...
pytorch_transformers/convert_tf_checkpoint_to_pytorch.py
View file @
d2cc6b10
...
...
@@ -47,7 +47,7 @@ if __name__ == "__main__":
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--bert_config_file"
,
default
=
None
,
type
=
str
,
...
...
pytorch_transformers/convert_transfo_xl_checkpoint_to_pytorch.py
View file @
d2cc6b10
...
...
@@ -24,11 +24,10 @@ from io import open
import
torch
import
pytorch_transformers.tokenization_transfo_xl
as
data_utils
from
pytorch_transformers.modeling_transfo_xl
import
(
CONFIG_NAME
,
WEIGHTS_NAME
,
TransfoXLConfig
,
TransfoXLLMHeadModel
,
load_tf_weights_in_transfo_xl
)
from
pytorch_transformers
import
CONFIG_NAME
,
WEIGHTS_NAME
from
pytorch_transformers.modeling_transfo_xl
import
(
TransfoXLConfig
,
TransfoXLLMHeadModel
,
load_tf_weights_in_transfo_xl
)
from
pytorch_transformers.tokenization_transfo_xl
import
(
CORPUS_NAME
,
VOCAB_FILES_NAMES
)
if
sys
.
version_info
[
0
]
==
2
:
...
...
pytorch_transformers/convert_xlnet_checkpoint_to_pytorch.py
View file @
d2cc6b10
...
...
@@ -79,7 +79,7 @@ if __name__ == "__main__":
default
=
None
,
type
=
str
,
required
=
True
,
help
=
"Path the TensorFlow checkpoint path."
)
help
=
"Path
to
the TensorFlow checkpoint path."
)
parser
.
add_argument
(
"--xlnet_config_file"
,
default
=
None
,
type
=
str
,
...
...
pytorch_transformers/file_utils.py
View file @
d2cc6b10
...
...
@@ -38,10 +38,13 @@ except ImportError:
try
:
from
pathlib
import
Path
PYTORCH_PRETRAINED_BERT_CACHE
=
Path
(
os
.
getenv
(
'PYTORCH_PRETRAINED_BERT_CACHE'
,
default_cache_path
))
os
.
getenv
(
'PYTORCH_TRANSFORMERS_CACHE'
,
os
.
getenv
(
'PYTORCH_PRETRAINED_BERT_CACHE'
,
default_cache_path
))
)
except
(
AttributeError
,
ImportError
):
PYTORCH_PRETRAINED_BERT_CACHE
=
os
.
getenv
(
'PYTORCH_PRETRAINED_BERT_CACHE'
,
default_cache_path
)
PYTORCH_PRETRAINED_BERT_CACHE
=
os
.
getenv
(
'PYTORCH_TRANSFORMERS_CACHE'
,
os
.
getenv
(
'PYTORCH_PRETRAINED_BERT_CACHE'
,
default_cache_path
))
PYTORCH_TRANSFORMERS_CACHE
=
PYTORCH_PRETRAINED_BERT_CACHE
# Kept for backward compatibility
logger
=
logging
.
getLogger
(
__name__
)
# pylint: disable=invalid-name
...
...
@@ -70,7 +73,7 @@ def filename_to_url(filename, cache_dir=None):
Raise ``EnvironmentError`` if `filename` or its stored metadata do not exist.
"""
if
cache_dir
is
None
:
cache_dir
=
PYTORCH_
PRETRAINED_B
ER
T
_CACHE
cache_dir
=
PYTORCH_
TRANSFORM
ER
S
_CACHE
if
sys
.
version_info
[
0
]
==
3
and
isinstance
(
cache_dir
,
Path
):
cache_dir
=
str
(
cache_dir
)
...
...
@@ -98,7 +101,7 @@ def cached_path(url_or_filename, cache_dir=None):
make sure the file exists and then return the path.
"""
if
cache_dir
is
None
:
cache_dir
=
PYTORCH_
PRETRAINED_B
ER
T
_CACHE
cache_dir
=
PYTORCH_
TRANSFORM
ER
S
_CACHE
if
sys
.
version_info
[
0
]
==
3
and
isinstance
(
url_or_filename
,
Path
):
url_or_filename
=
str
(
url_or_filename
)
if
sys
.
version_info
[
0
]
==
3
and
isinstance
(
cache_dir
,
Path
):
...
...
@@ -187,7 +190,7 @@ def get_from_cache(url, cache_dir=None):
If it's not there, download it. Then return the path to the cached file.
"""
if
cache_dir
is
None
:
cache_dir
=
PYTORCH_
PRETRAINED_B
ER
T
_CACHE
cache_dir
=
PYTORCH_
TRANSFORM
ER
S
_CACHE
if
sys
.
version_info
[
0
]
==
3
and
isinstance
(
cache_dir
,
Path
):
cache_dir
=
str
(
cache_dir
)
if
sys
.
version_info
[
0
]
==
2
and
not
isinstance
(
cache_dir
,
str
):
...
...
pytorch_transformers/modeling_auto.py
0 → 100644
View file @
d2cc6b10
# coding=utf-8
# Copyright 2018 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Auto Model class. """
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
logging
import
torch
import
torch.nn
as
nn
from
torch.nn
import
CrossEntropyLoss
,
MSELoss
from
torch.nn.parameter
import
Parameter
from
.modeling_bert
import
BertConfig
,
BertModel
from
.modeling_openai
import
OpenAIGPTConfig
,
OpenAIGPTModel
from
.modeling_gpt2
import
GPT2Config
,
GPT2Model
from
.modeling_transfo_xl
import
TransfoXLConfig
,
TransfoXLModel
from
.modeling_xlnet
import
XLNetConfig
,
XLNetModel
from
.modeling_xlm
import
XLMConfig
,
XLMModel
from
.modeling_utils
import
PreTrainedModel
,
SequenceSummary
logger
=
logging
.
getLogger
(
__name__
)
class
AutoConfig
(
object
):
r
""":class:`~pytorch_transformers.AutoConfig` is a generic configuration class
that will be instantiated as one of the configuration classes of the library
when created with the `AutoConfig.from_pretrained(pretrained_model_name_or_path)`
class method.
The `from_pretrained()` method take care of returning the correct model class instance
using pattern matching on the `pretrained_model_name_or_path` string.
The base model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
- contains `bert`: BertConfig (Bert model)
- contains `openai-gpt`: OpenAIGPTConfig (OpenAI GPT model)
- contains `gpt2`: GPT2Config (OpenAI GPT-2 model)
- contains `transfo-xl`: TransfoXLConfig (Transformer-XL model)
- contains `xlnet`: XLNetConfig (XLNet model)
- contains `xlm`: XLMConfig (XLM model)
This class cannot be instantiated using `__init__()` (throw an error).
"""
def
__init__
(
self
):
raise
EnvironmentError
(
"AutoConfig is designed to be instantiated "
"using the `AutoConfig.from_pretrained(pretrained_model_name_or_path)` method."
)
@
classmethod
def
from_pretrained
(
cls
,
pretrained_model_name_or_path
,
**
kwargs
):
r
""" Instantiate a one of the configuration classes of the library
from a pre-trained model configuration.
The configuration class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
- contains `bert`: BertConfig (Bert model)
- contains `openai-gpt`: OpenAIGPTConfig (OpenAI GPT model)
- contains `gpt2`: GPT2Config (OpenAI GPT-2 model)
- contains `transfo-xl`: TransfoXLConfig (Transformer-XL model)
- contains `xlnet`: XLNetConfig (XLNet model)
- contains `xlm`: XLMConfig (XLM model)
Params:
**pretrained_model_name_or_path**: either:
- a string with the `shortcut name` of a pre-trained model configuration to load from cache
or download and cache if not already stored in cache (e.g. 'bert-base-uncased').
- a path to a `directory` containing a configuration file saved
using the `save_pretrained(save_directory)` method.
- a path or url to a saved configuration `file`.
**cache_dir**: (`optional`) string:
Path to a directory in which a downloaded pre-trained model
configuration should be cached if the standard cache should not be used.
**return_unused_kwargs**: (`optional`) bool:
- If False, then this function returns just the final configuration object.
- If True, then this functions returns a tuple `(config, unused_kwargs)` where `unused_kwargs`
is a dictionary consisting of the key/value pairs whose keys are not configuration attributes:
ie the part of kwargs which has not been used to update `config` and is otherwise ignored.
**kwargs**: (`optional`) dict:
Dictionary of key/value pairs with which to update the configuration object after loading.
- The values in kwargs of any keys which are configuration attributes will be used
to override the loaded values.
- Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
by the `return_unused_kwargs` keyword parameter.
Examples::
config = AutoConfig.from_pretrained('bert-base-uncased') # Download configuration from S3 and cache.
config = AutoConfig.from_pretrained('./test/bert_saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')`
config = AutoConfig.from_pretrained('./test/bert_saved_model/my_configuration.json')
config = AutoConfig.from_pretrained('bert-base-uncased', output_attention=True, foo=False)
assert config.output_attention == True
config, unused_kwargs = AutoConfig.from_pretrained('bert-base-uncased', output_attention=True,
foo=False, return_unused_kwargs=True)
assert config.output_attention == True
assert unused_kwargs == {'foo': False}
"""
if
'bert'
in
pretrained_model_name_or_path
:
return
BertConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'openai-gpt'
in
pretrained_model_name_or_path
:
return
OpenAIGPTConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'gpt2'
in
pretrained_model_name_or_path
:
return
GPT2Config
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'transfo-xl'
in
pretrained_model_name_or_path
:
return
TransfoXLConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'xlnet'
in
pretrained_model_name_or_path
:
return
XLNetConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
elif
'xlm'
in
pretrained_model_name_or_path
:
return
XLMConfig
.
from_pretrained
(
pretrained_model_name_or_path
,
**
kwargs
)
raise
ValueError
(
"Unrecognized model identifier in {}. Should contains one of "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'xlm'"
.
format
(
pretrained_model_name_or_path
))
class
AutoModel
(
object
):
r
"""
:class:`~pytorch_transformers.AutoModel` is a generic model class
that will be instantiated as one of the base model classes of the library
when created with the `AutoModel.from_pretrained(pretrained_model_name_or_path)`
class method.
The `from_pretrained()` method take care of returning the correct model class instance
using pattern matching on the `pretrained_model_name_or_path` string.
The base model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
- contains `bert`: BertConfig (Bert model)
- contains `openai-gpt`: OpenAIGPTConfig (OpenAI GPT model)
- contains `gpt2`: GPT2Config (OpenAI GPT-2 model)
- contains `transfo-xl`: TransfoXLConfig (Transformer-XL model)
- contains `xlnet`: XLNetConfig (XLNet model)
- contains `xlm`: XLMConfig (XLM model)
This class cannot be instantiated using `__init__()` (throw an error).
"""
def
__init__
(
self
):
raise
EnvironmentError
(
"AutoModel is designed to be instantiated "
"using the `AutoModel.from_pretrained(pretrained_model_name_or_path)` method."
)
@
classmethod
def
from_pretrained
(
cls
,
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
):
r
""" Instantiate a one of the base model classes of the library
from a pre-trained model configuration.
The base model class to instantiate is selected as the first pattern matching
in the `pretrained_model_name_or_path` string (in the following order):
- contains `bert`: BertConfig (Bert model)
- contains `openai-gpt`: OpenAIGPTConfig (OpenAI GPT model)
- contains `gpt2`: GPT2Config (OpenAI GPT-2 model)
- contains `transfo-xl`: TransfoXLConfig (Transformer-XL model)
- contains `xlnet`: XLNetConfig (XLNet model)
- contains `xlm`: XLMConfig (XLM model)
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
To train the model, you should first set it back in training mode with `model.train()`
Params:
**pretrained_model_name_or_path**: either:
- a string with the `shortcut name` of a pre-trained model to load from cache
or download and cache if not already stored in cache (e.g. 'bert-base-uncased').
- a path to a `directory` containing a configuration file saved
using the `save_pretrained(save_directory)` method.
- a path or url to a tensorflow index checkpoint `file` (e.g. `./tf_model/model.ckpt.index`).
In this case, ``from_tf`` should be set to True and a configuration object should be
provided as `config` argument. This loading option is slower than converting the TensorFlow
checkpoint in a PyTorch model using the provided conversion scripts and loading
the PyTorch model afterwards.
**model_args**: (`optional`) Sequence:
All remaning positional arguments will be passed to the underlying model's __init__ function
**config**: an optional configuration for the model to use instead of an automatically loaded configuation.
Configuration can be automatically loaded when:
- the model is a model provided by the library (loaded with a `shortcut name` of a pre-trained model), or
- the model was saved using the `save_pretrained(save_directory)` (loaded by suppling the save directory).
**state_dict**: an optional state dictionnary for the model to use instead of a state dictionary loaded
from saved weights file.
This option can be used if you want to create a model from a pretrained configuration but load your own weights.
In this case though, you should check if using `save_pretrained(dir)` and `from_pretrained(save_directory)` is not
a simpler option.
**cache_dir**: (`optional`) string:
Path to a directory in which a downloaded pre-trained model
configuration should be cached if the standard cache should not be used.
**output_loading_info**: (`optional`) boolean:
Set to ``True`` to also return a dictionnary containing missing keys, unexpected keys and error messages.
**kwargs**: (`optional`) dict:
Dictionary of key, values to update the configuration object after loading.
Can be used to override selected configuration parameters. E.g. ``output_attention=True``.
- If a configuration is provided with `config`, **kwargs will be directly passed
to the underlying model's __init__ method.
- If a configuration is not provided, **kwargs will be first passed to the pretrained
model configuration class loading function (`PretrainedConfig.from_pretrained`).
Each key of **kwargs that corresponds to a configuration attribute
will be used to override said attribute with the supplied **kwargs value.
Remaining keys that do not correspond to any configuration attribute will
be passed to the underlying model's __init__ function.
Examples::
model = AutoModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache.
model = AutoModel.from_pretrained('./test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = AutoModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading
assert model.config.output_attention == True
# Loading from a TF checkpoint file instead of a PyTorch model (slower)
config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
model = AutoModel.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
"""
if
'bert'
in
pretrained_model_name_or_path
:
return
BertModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'openai-gpt'
in
pretrained_model_name_or_path
:
return
OpenAIGPTModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'gpt2'
in
pretrained_model_name_or_path
:
return
GPT2Model
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'transfo-xl'
in
pretrained_model_name_or_path
:
return
TransfoXLModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'xlnet'
in
pretrained_model_name_or_path
:
return
XLNetModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
elif
'xlm'
in
pretrained_model_name_or_path
:
return
XLMModel
.
from_pretrained
(
pretrained_model_name_or_path
,
*
model_args
,
**
kwargs
)
raise
ValueError
(
"Unrecognized model identifier in {}. Should contains one of "
"'bert', 'openai-gpt', 'gpt2', 'transfo-xl', 'xlnet', "
"'xlm'"
.
format
(
pretrained_model_name_or_path
))
pytorch_transformers/modeling_bert.py
View file @
d2cc6b10
...
...
@@ -222,7 +222,7 @@ class BertConfig(PretrainedConfig):
try
:
from
apex.normalization.fused_layer_norm
import
FusedLayerNorm
as
BertLayerNorm
except
ImportError
:
except
(
ImportError
,
AttributeError
)
as
e
:
logger
.
info
(
"Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex ."
)
class
BertLayerNorm
(
nn
.
Module
):
def
__init__
(
self
,
hidden_size
,
eps
=
1e-12
):
...
...
@@ -643,12 +643,11 @@ class BertModel(BertPreTrainedModel):
Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>> model = BertModel(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids)
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
"""
def
__init__
(
self
,
config
):
...
...
@@ -754,13 +753,11 @@ class BertForPreTraining(BertPreTrainedModel):
Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
>>> model = BertForPreTraining(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids)
>>> prediction_scores, seq_relationship_scores = outputs[:2]
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForPreTraining.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
prediction_scores, seq_relationship_scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
@@ -824,13 +821,11 @@ class BertForMaskedLM(BertPreTrainedModel):
Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
>>> model = BertForMaskedLM(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, masked_lm_labels=input_ids)
>>> loss, prediction_scores = outputs[:2]
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids, masked_lm_labels=input_ids)
loss, prediction_scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
@@ -857,7 +852,7 @@ class BertForMaskedLM(BertPreTrainedModel):
sequence_output
=
outputs
[
0
]
prediction_scores
=
self
.
cls
(
sequence_output
)
outputs
=
(
prediction_scores
,)
+
outputs
[
2
:]
# Add hidden states and attention i
s
they are here
outputs
=
(
prediction_scores
,)
+
outputs
[
2
:]
# Add hidden states and attention i
f
they are here
if
masked_lm_labels
is
not
None
:
loss_fct
=
CrossEntropyLoss
(
ignore_index
=-
1
)
masked_lm_loss
=
loss_fct
(
prediction_scores
.
view
(
-
1
,
self
.
config
.
vocab_size
),
masked_lm_labels
.
view
(
-
1
))
...
...
@@ -891,13 +886,11 @@ class BertForNextSentencePrediction(BertPreTrainedModel):
Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
>>> model = BertForNextSentencePrediction(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids)
>>> seq_relationship_scores = outputs[0]
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
seq_relationship_scores = outputs[0]
"""
def
__init__
(
self
,
config
):
...
...
@@ -951,14 +944,12 @@ class BertForSequenceClassification(BertPreTrainedModel):
Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
>>> model = BertForSequenceClassification(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=labels)
>>> loss, logits = outputs[:2]
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, logits = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
@@ -1057,15 +1048,13 @@ class BertForMultipleChoice(BertPreTrainedModel):
Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
>>> model = BertForMultipleChoice(config)
>>> choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
>>> input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
>>> labels = torch.tensor(1).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=labels)
>>> loss, classification_scores = outputs[:2]
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMultipleChoice.from_pretrained('bert-base-uncased')
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
labels = torch.tensor(1).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, classification_scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
@@ -1127,14 +1116,12 @@ class BertForTokenClassification(BertPreTrainedModel):
Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
>>> model = BertForTokenClassification(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=labels)
>>> loss, scores = outputs[:2]
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForTokenClassification.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=labels)
loss, scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
@@ -1203,15 +1190,13 @@ class BertForQuestionAnswering(BertPreTrainedModel):
Examples::
>>> config = BertConfig.from_pretrained('bert-base-uncased')
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
>>>
>>> model = BertForQuestionAnswering(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> start_positions = torch.tensor([1])
>>> end_positions = torch.tensor([3])
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
>>> loss, start_scores, end_scores = outputs[:2]
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
start_positions = torch.tensor([1])
end_positions = torch.tensor([3])
outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
loss, start_scores, end_scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
pytorch_transformers/modeling_gpt2.py
View file @
d2cc6b10
...
...
@@ -137,7 +137,7 @@ class GPT2Config(PretrainedConfig):
initializer_range
=
0.02
,
num_labels
=
1
,
summary_type
=
'
token_ids
'
,
summary_type
=
'
cls_index
'
,
summary_use_proj
=
True
,
summary_activation
=
None
,
summary_proj_to_labels
=
True
,
...
...
@@ -433,12 +433,11 @@ class GPT2Model(GPT2PreTrainedModel):
Examples::
>>> config = GPT2Config.from_pretrained('gpt2')
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
>>> model = GPT2Model(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids)
>>> last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2Model.from_pretrained('gpt2')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
"""
def
__init__
(
self
,
config
):
...
...
@@ -567,12 +566,11 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
Examples::
>>> config = GPT2Config.from_pretrained('gpt2')
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
>>> model = GPT2LMHeadModel(config)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, labels=input_ids)
>>> loss, logits = outputs[:2]
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids, labels=input_ids)
loss, logits = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
@@ -683,14 +681,13 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
Examples::
>>> config = GPT2Config.from_pretrained('gpt2')
>>> tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
>>> model = GPT2DoubleHeadsModel(config)
>>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary
>>> input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
>>> mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1
>>> outputs = model(input_ids, mc_token_ids)
>>> lm_prediction_scores, mc_prediction_scores = outputs[:2]
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2DoubleHeadsModel.from_pretrained('gpt2')
choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] # Assume you've added [CLS] to the vocabulary
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
mc_token_ids = torch.tensor([-1, -1]).unsqueeze(0) # Batch size 1
outputs = model(input_ids, mc_token_ids)
lm_prediction_scores, mc_prediction_scores = outputs[:2]
"""
def
__init__
(
self
,
config
):
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment