Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
292140b9
Unverified
Commit
292140b9
authored
Jul 12, 2019
by
Thomas Wolf
Committed by
GitHub
Jul 12, 2019
Browse files
Merge pull request #781 from huggingface/embeddings
Clean up input embeddings resizing and weights tying
parents
3821ecbf
c57e9d94
Changes
16
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
925 additions
and
911 deletions
+925
-911
pytorch_transformers/modeling_bert.py
pytorch_transformers/modeling_bert.py
+32
-16
pytorch_transformers/modeling_gpt2.py
pytorch_transformers/modeling_gpt2.py
+21
-122
pytorch_transformers/modeling_openai.py
pytorch_transformers/modeling_openai.py
+25
-120
pytorch_transformers/modeling_transfo_xl.py
pytorch_transformers/modeling_transfo_xl.py
+21
-3
pytorch_transformers/modeling_utils.py
pytorch_transformers/modeling_utils.py
+70
-2
pytorch_transformers/modeling_xlm.py
pytorch_transformers/modeling_xlm.py
+11
-10
pytorch_transformers/modeling_xlnet.py
pytorch_transformers/modeling_xlnet.py
+14
-9
pytorch_transformers/tests/modeling_bert_test.py
pytorch_transformers/tests/modeling_bert_test.py
+47
-39
pytorch_transformers/tests/modeling_common_test.py
pytorch_transformers/tests/modeling_common_test.py
+583
-0
pytorch_transformers/tests/modeling_gpt2_test.py
pytorch_transformers/tests/modeling_gpt2_test.py
+4
-9
pytorch_transformers/tests/modeling_openai_test.py
pytorch_transformers/tests/modeling_openai_test.py
+3
-4
pytorch_transformers/tests/modeling_tests_commons.py
pytorch_transformers/tests/modeling_tests_commons.py
+0
-446
pytorch_transformers/tests/modeling_transfo_xl_test.py
pytorch_transformers/tests/modeling_transfo_xl_test.py
+29
-27
pytorch_transformers/tests/modeling_utils_test.py
pytorch_transformers/tests/modeling_utils_test.py
+0
-47
pytorch_transformers/tests/modeling_xlm_test.py
pytorch_transformers/tests/modeling_xlm_test.py
+27
-24
pytorch_transformers/tests/modeling_xlnet_test.py
pytorch_transformers/tests/modeling_xlnet_test.py
+38
-33
No files found.
pytorch_transformers/modeling_bert.py
View file @
292140b9
...
@@ -507,23 +507,17 @@ class BertPredictionHeadTransform(nn.Module):
...
@@ -507,23 +507,17 @@ class BertPredictionHeadTransform(nn.Module):
class
BertLMPredictionHead
(
nn
.
Module
):
class
BertLMPredictionHead
(
nn
.
Module
):
def
__init__
(
self
,
config
,
bert_model_embedding_weights
):
def
__init__
(
self
,
config
):
super
(
BertLMPredictionHead
,
self
).
__init__
()
super
(
BertLMPredictionHead
,
self
).
__init__
()
self
.
transform
=
BertPredictionHeadTransform
(
config
)
self
.
transform
=
BertPredictionHeadTransform
(
config
)
self
.
torchscript
=
config
.
torchscript
# The output weights are the same as the input embeddings, but there is
# The output weights are the same as the input embeddings, but there is
# an output-only bias for each token.
# an output-only bias for each token.
self
.
decoder
=
nn
.
Linear
(
bert_model_embedding_weights
.
size
(
1
)
,
self
.
decoder
=
nn
.
Linear
(
config
.
hidden_
size
,
bert_model_embedding_weights
.
size
(
0
)
,
config
.
vocab_
size
,
bias
=
False
)
bias
=
False
)
if
self
.
torchscript
:
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
self
.
decoder
.
weight
=
nn
.
Parameter
(
bert_model_embedding_weights
.
clone
())
else
:
self
.
decoder
.
weight
=
bert_model_embedding_weights
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
bert_model_embedding_weights
.
size
(
0
)))
def
forward
(
self
,
hidden_states
):
def
forward
(
self
,
hidden_states
):
hidden_states
=
self
.
transform
(
hidden_states
)
hidden_states
=
self
.
transform
(
hidden_states
)
...
@@ -532,9 +526,9 @@ class BertLMPredictionHead(nn.Module):
...
@@ -532,9 +526,9 @@ class BertLMPredictionHead(nn.Module):
class
BertOnlyMLMHead
(
nn
.
Module
):
class
BertOnlyMLMHead
(
nn
.
Module
):
def
__init__
(
self
,
config
,
bert_model_embedding_weights
):
def
__init__
(
self
,
config
):
super
(
BertOnlyMLMHead
,
self
).
__init__
()
super
(
BertOnlyMLMHead
,
self
).
__init__
()
self
.
predictions
=
BertLMPredictionHead
(
config
,
bert_model_embedding_weights
)
self
.
predictions
=
BertLMPredictionHead
(
config
)
def
forward
(
self
,
sequence_output
):
def
forward
(
self
,
sequence_output
):
prediction_scores
=
self
.
predictions
(
sequence_output
)
prediction_scores
=
self
.
predictions
(
sequence_output
)
...
@@ -552,9 +546,9 @@ class BertOnlyNSPHead(nn.Module):
...
@@ -552,9 +546,9 @@ class BertOnlyNSPHead(nn.Module):
class
BertPreTrainingHeads
(
nn
.
Module
):
class
BertPreTrainingHeads
(
nn
.
Module
):
def
__init__
(
self
,
config
,
bert_model_embedding_weights
):
def
__init__
(
self
,
config
):
super
(
BertPreTrainingHeads
,
self
).
__init__
()
super
(
BertPreTrainingHeads
,
self
).
__init__
()
self
.
predictions
=
BertLMPredictionHead
(
config
,
bert_model_embedding_weights
)
self
.
predictions
=
BertLMPredictionHead
(
config
)
self
.
seq_relationship
=
nn
.
Linear
(
config
.
hidden_size
,
2
)
self
.
seq_relationship
=
nn
.
Linear
(
config
.
hidden_size
,
2
)
def
forward
(
self
,
sequence_output
,
pooled_output
):
def
forward
(
self
,
sequence_output
,
pooled_output
):
...
@@ -619,6 +613,12 @@ class BertModel(BertPreTrainedModel):
...
@@ -619,6 +613,12 @@ class BertModel(BertPreTrainedModel):
self
.
apply
(
self
.
init_weights
)
self
.
apply
(
self
.
init_weights
)
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
old_embeddings
=
self
.
embeddings
.
word_embeddings
new_embeddings
=
self
.
_get_resized_embeddings
(
old_embeddings
,
new_num_tokens
)
self
.
embeddings
.
word_embeddings
=
new_embeddings
return
self
.
embeddings
.
word_embeddings
def
_prune_heads
(
self
,
heads_to_prune
):
def
_prune_heads
(
self
,
heads_to_prune
):
""" Prunes heads of the model.
""" Prunes heads of the model.
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
...
@@ -750,9 +750,17 @@ class BertForPreTraining(BertPreTrainedModel):
...
@@ -750,9 +750,17 @@ class BertForPreTraining(BertPreTrainedModel):
super
(
BertForPreTraining
,
self
).
__init__
(
config
)
super
(
BertForPreTraining
,
self
).
__init__
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
cls
=
BertPreTrainingHeads
(
config
,
self
.
bert
.
embeddings
.
word_embeddings
.
weight
)
self
.
cls
=
BertPreTrainingHeads
(
config
)
self
.
apply
(
self
.
init_weights
)
self
.
apply
(
self
.
init_weights
)
self
.
tie_weights
()
def
tie_weights
(
self
):
""" Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
"""
self
.
_tie_or_clone_weights
(
self
.
cls
.
predictions
.
decoder
,
self
.
bert
.
embeddings
.
word_embeddings
)
def
forward
(
self
,
input_ids
,
token_type_ids
=
None
,
attention_mask
=
None
,
masked_lm_labels
=
None
,
def
forward
(
self
,
input_ids
,
token_type_ids
=
None
,
attention_mask
=
None
,
masked_lm_labels
=
None
,
next_sentence_label
=
None
,
head_mask
=
None
):
next_sentence_label
=
None
,
head_mask
=
None
):
...
@@ -845,9 +853,17 @@ class BertForMaskedLM(BertPreTrainedModel):
...
@@ -845,9 +853,17 @@ class BertForMaskedLM(BertPreTrainedModel):
super
(
BertForMaskedLM
,
self
).
__init__
(
config
)
super
(
BertForMaskedLM
,
self
).
__init__
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
cls
=
BertOnlyMLMHead
(
config
,
self
.
bert
.
embeddings
.
word_embeddings
.
weight
)
self
.
cls
=
BertOnlyMLMHead
(
config
)
self
.
apply
(
self
.
init_weights
)
self
.
apply
(
self
.
init_weights
)
self
.
tie_weights
()
def
tie_weights
(
self
):
""" Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
"""
self
.
_tie_or_clone_weights
(
self
.
cls
.
predictions
.
decoder
,
self
.
bert
.
embeddings
.
word_embeddings
)
def
forward
(
self
,
input_ids
,
token_type_ids
=
None
,
attention_mask
=
None
,
masked_lm_labels
=
None
,
head_mask
=
None
):
def
forward
(
self
,
input_ids
,
token_type_ids
=
None
,
attention_mask
=
None
,
masked_lm_labels
=
None
,
head_mask
=
None
):
"""
"""
...
...
pytorch_transformers/modeling_gpt2.py
View file @
292140b9
...
@@ -104,7 +104,6 @@ class GPT2Config(PretrainedConfig):
...
@@ -104,7 +104,6 @@ class GPT2Config(PretrainedConfig):
Args:
Args:
vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
n_special: The number of special tokens to learn during fine-tuning ('[SEP]', '[CLF]', ...)
n_positions: Number of positional embeddings.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
n_ctx: Size of the causal mask (usually same as n_positions).
n_embd: Dimensionality of the embeddings and hidden states.
n_embd: Dimensionality of the embeddings and hidden states.
...
@@ -119,14 +118,12 @@ class GPT2Config(PretrainedConfig):
...
@@ -119,14 +118,12 @@ class GPT2Config(PretrainedConfig):
embd_pdrop: The dropout ratio for the embeddings.
embd_pdrop: The dropout ratio for the embeddings.
initializer_range: The sttdev of the truncated_normal_initializer for
initializer_range: The sttdev of the truncated_normal_initializer for
initializing all weight matrices.
initializing all weight matrices.
predict_special_tokens: should we predict special tokens (when the model has a LM head)
"""
"""
pretrained_config_archive_map
=
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
pretrained_config_archive_map
=
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
def
__init__
(
self
,
self
,
vocab_size_or_config_json_file
=
50257
,
vocab_size_or_config_json_file
=
50257
,
n_special
=
0
,
n_positions
=
1024
,
n_positions
=
1024
,
n_ctx
=
1024
,
n_ctx
=
1024
,
n_embd
=
768
,
n_embd
=
768
,
...
@@ -137,7 +134,6 @@ class GPT2Config(PretrainedConfig):
...
@@ -137,7 +134,6 @@ class GPT2Config(PretrainedConfig):
attn_pdrop
=
0.1
,
attn_pdrop
=
0.1
,
layer_norm_epsilon
=
1e-5
,
layer_norm_epsilon
=
1e-5
,
initializer_range
=
0.02
,
initializer_range
=
0.02
,
predict_special_tokens
=
True
,
num_labels
=
1
,
num_labels
=
1
,
summary_type
=
'token_ids'
,
summary_type
=
'token_ids'
,
...
@@ -151,7 +147,6 @@ class GPT2Config(PretrainedConfig):
...
@@ -151,7 +147,6 @@ class GPT2Config(PretrainedConfig):
Args:
Args:
vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
n_special: The number of special tokens to learn during fine-tuning ('[SEP]', '[CLF]', ...)
n_positions: Number of positional embeddings.
n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions).
n_ctx: Size of the causal mask (usually same as n_positions).
n_embd: Dimensionality of the embeddings and hidden states.
n_embd: Dimensionality of the embeddings and hidden states.
...
@@ -166,7 +161,6 @@ class GPT2Config(PretrainedConfig):
...
@@ -166,7 +161,6 @@ class GPT2Config(PretrainedConfig):
embd_pdrop: The dropout ratio for the embeddings.
embd_pdrop: The dropout ratio for the embeddings.
initializer_range: The sttdev of the truncated_normal_initializer for
initializer_range: The sttdev of the truncated_normal_initializer for
initializing all weight matrices.
initializing all weight matrices.
predict_special_tokens: should we predict special tokens (when the model has a LM head)
"""
"""
super
(
GPT2Config
,
self
).
__init__
(
**
kwargs
)
super
(
GPT2Config
,
self
).
__init__
(
**
kwargs
)
...
@@ -178,7 +172,6 @@ class GPT2Config(PretrainedConfig):
...
@@ -178,7 +172,6 @@ class GPT2Config(PretrainedConfig):
self
.
__dict__
[
key
]
=
value
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
vocab_size
=
vocab_size_or_config_json_file
self
.
vocab_size
=
vocab_size_or_config_json_file
self
.
n_special
=
n_special
self
.
n_ctx
=
n_ctx
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_positions
=
n_positions
self
.
n_embd
=
n_embd
self
.
n_embd
=
n_embd
...
@@ -189,7 +182,6 @@ class GPT2Config(PretrainedConfig):
...
@@ -189,7 +182,6 @@ class GPT2Config(PretrainedConfig):
self
.
attn_pdrop
=
attn_pdrop
self
.
attn_pdrop
=
attn_pdrop
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
layer_norm_epsilon
=
layer_norm_epsilon
self
.
initializer_range
=
initializer_range
self
.
initializer_range
=
initializer_range
self
.
predict_special_tokens
=
predict_special_tokens
self
.
num_labels
=
num_labels
self
.
num_labels
=
num_labels
self
.
summary_type
=
summary_type
self
.
summary_type
=
summary_type
...
@@ -203,10 +195,6 @@ class GPT2Config(PretrainedConfig):
...
@@ -203,10 +195,6 @@ class GPT2Config(PretrainedConfig):
"or the path to a pretrained model config file (str)"
"or the path to a pretrained model config file (str)"
)
)
@
property
def
total_tokens_embeddings
(
self
):
return
self
.
vocab_size
+
self
.
n_special
@
property
@
property
def
hidden_size
(
self
):
def
hidden_size
(
self
):
return
self
.
n_embd
return
self
.
n_embd
...
@@ -347,34 +335,6 @@ class Block(nn.Module):
...
@@ -347,34 +335,6 @@ class Block(nn.Module):
return
outputs
# x, present, (attentions)
return
outputs
# x, present, (attentions)
class
GPT2LMHead
(
nn
.
Module
):
""" Language Model Head for the transformer """
def
__init__
(
self
,
model_embeddings_weights
,
config
):
super
(
GPT2LMHead
,
self
).
__init__
()
self
.
n_embd
=
config
.
n_embd
self
.
vocab_size
=
config
.
vocab_size
self
.
predict_special_tokens
=
config
.
predict_special_tokens
self
.
torchscript
=
config
.
torchscript
embed_shape
=
model_embeddings_weights
.
shape
self
.
decoder
=
nn
.
Linear
(
embed_shape
[
1
],
embed_shape
[
0
],
bias
=
False
)
self
.
set_embeddings_weights
(
model_embeddings_weights
)
def
set_embeddings_weights
(
self
,
model_embeddings_weights
,
predict_special_tokens
=
True
):
self
.
predict_special_tokens
=
predict_special_tokens
# Export to TorchScript can't handle parameter sharing so we are cloning them.
if
self
.
torchscript
:
self
.
decoder
.
weight
=
nn
.
Parameter
(
model_embeddings_weights
.
clone
())
else
:
self
.
decoder
.
weight
=
model_embeddings_weights
# Tied weights
def
forward
(
self
,
hidden_state
):
lm_logits
=
self
.
decoder
(
hidden_state
)
if
not
self
.
predict_special_tokens
:
lm_logits
=
lm_logits
[...,
:
self
.
vocab_size
]
return
lm_logits
class
GPT2PreTrainedModel
(
PreTrainedModel
):
class
GPT2PreTrainedModel
(
PreTrainedModel
):
""" An abstract class to handle weights initialization and
""" An abstract class to handle weights initialization and
a simple interface for dowloading and loading pretrained models.
a simple interface for dowloading and loading pretrained models.
...
@@ -400,36 +360,6 @@ class GPT2PreTrainedModel(PreTrainedModel):
...
@@ -400,36 +360,6 @@ class GPT2PreTrainedModel(PreTrainedModel):
module
.
bias
.
data
.
zero_
()
module
.
bias
.
data
.
zero_
()
module
.
weight
.
data
.
fill_
(
1.0
)
module
.
weight
.
data
.
fill_
(
1.0
)
@
classmethod
def
from_pretrained
(
cls
,
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
):
"""
Instantiate a GPT2PreTrainedModel from a pre-trained model file or a pytorch state dict.
Download and cache the pre-trained model file if needed.
Params:
pretrained_model_name_or_path: either:
- a str with the name of a pre-trained model to load selected in the list of:
. `gpt2`
- a path or url to a pretrained model archive containing:
. `gpt2_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a GPT2Model instance
- a path or url to a pretrained model archive containing:
. `gpt2_config.json` a configuration file for the model
. a TensorFlow checkpoint with trained weights
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific GPT2 class
"""
num_special_tokens
=
kwargs
.
pop
(
'num_special_tokens'
,
None
)
model
=
super
(
GPT2PreTrainedModel
,
cls
).
from_pretrained
(
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
)
# Add additional embeddings for special tokens if needed
# This step also make sure we are still sharing the output and input embeddings after loading weights
model
.
set_num_special_tokens
(
num_special_tokens
)
return
model
class
GPT2Model
(
GPT2PreTrainedModel
):
class
GPT2Model
(
GPT2PreTrainedModel
):
"""OpenAI GPT-2 model ("Language Models are Unsupervised Multitask Learners").
"""OpenAI GPT-2 model ("Language Models are Unsupervised Multitask Learners").
...
@@ -447,13 +377,13 @@ class GPT2Model(GPT2PreTrainedModel):
...
@@ -447,13 +377,13 @@ class GPT2Model(GPT2PreTrainedModel):
config.vocab_size - 1, ______________________
config.vocab_size - 1, ______________________
config.vocab_size,
config.vocab_size,
... -> special embeddings
... -> special embeddings
config.vocab_size +
config.
n_special - 1] ______________________
config.vocab_size + n_special - 1] ______________________
where total_tokens_embeddings
can be obtained as config.total_tokens_embeddings and
is equal to
where total_tokens_embeddings is equal to
::
::
total_tokens_embeddings =
config.
vocab_size +
config.
n_special
total_tokens_embeddings = vocab_size + n_special
You should use the associated indices to index the embeddings.
You should use the associated indices to index the embeddings.
...
@@ -474,7 +404,7 @@ class GPT2Model(GPT2PreTrainedModel):
...
@@ -474,7 +404,7 @@ class GPT2Model(GPT2PreTrainedModel):
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_attentions
=
config
.
output_attentions
self
.
output_attentions
=
config
.
output_attentions
self
.
wte
=
nn
.
Embedding
(
config
.
total_tokens_embeddings
,
config
.
n_embd
)
self
.
wte
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
n_embd
)
self
.
wpe
=
nn
.
Embedding
(
config
.
n_positions
,
config
.
n_embd
)
self
.
wpe
=
nn
.
Embedding
(
config
.
n_positions
,
config
.
n_embd
)
self
.
drop
=
nn
.
Dropout
(
config
.
embd_pdrop
)
self
.
drop
=
nn
.
Dropout
(
config
.
embd_pdrop
)
self
.
h
=
nn
.
ModuleList
([
Block
(
config
.
n_ctx
,
config
,
scale
=
True
)
for
_
in
range
(
config
.
n_layer
)])
self
.
h
=
nn
.
ModuleList
([
Block
(
config
.
n_ctx
,
config
,
scale
=
True
)
for
_
in
range
(
config
.
n_layer
)])
...
@@ -482,26 +412,9 @@ class GPT2Model(GPT2PreTrainedModel):
...
@@ -482,26 +412,9 @@ class GPT2Model(GPT2PreTrainedModel):
self
.
apply
(
self
.
init_weights
)
self
.
apply
(
self
.
init_weights
)
def
set_num_special_tokens
(
self
,
num_special_tokens
=
None
):
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
"""
self
.
wte
=
self
.
_get_resized_embeddings
(
self
.
wte
,
new_num_tokens
)
Update input embeddings with new embedding matrix if needed.
return
self
.
wte
Args:
num_special_tokens: Special tokens to be added to the embedding matrix
TODO Lysandre filled args
"""
if
num_special_tokens
is
None
or
self
.
config
.
n_special
==
num_special_tokens
:
return
# Update config
self
.
config
.
n_special
=
num_special_tokens
# Build new embeddings and initialize all new embeddings (in particular the special tokens)
old_embed
=
self
.
wte
self
.
wte
=
nn
.
Embedding
(
self
.
config
.
total_tokens_embeddings
,
self
.
config
.
n_embd
)
self
.
wte
.
to
(
old_embed
.
weight
.
device
)
self
.
init_weights
(
self
.
wte
)
# Copy word embeddings from the previous weights
self
.
wte
.
weight
.
data
[:
self
.
config
.
vocab_size
,
:]
=
old_embed
.
weight
.
data
[:
self
.
config
.
vocab_size
,
:]
def
_prune_heads
(
self
,
heads_to_prune
):
def
_prune_heads
(
self
,
heads_to_prune
):
""" Prunes heads of the model.
""" Prunes heads of the model.
...
@@ -641,23 +554,17 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
...
@@ -641,23 +554,17 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
GPT2LMHeadModel
,
self
).
__init__
(
config
)
super
(
GPT2LMHeadModel
,
self
).
__init__
(
config
)
self
.
transformer
=
GPT2Model
(
config
)
self
.
transformer
=
GPT2Model
(
config
)
self
.
lm_head
=
GPT2LMHead
(
self
.
transformer
.
wte
.
weight
,
config
)
self
.
lm_head
=
nn
.
Linear
(
config
.
n_embd
,
config
.
vocab_size
,
bias
=
False
)
self
.
apply
(
self
.
init_weights
)
def
set_num_special_tokens
(
self
,
num_special_tokens
,
predict_special_tokens
=
True
):
"""
Update input and output embeddings with new embedding matrix. Make sure we are sharing the embeddings.
Args:
self
.
apply
(
self
.
init_weights
)
num_special_tokens: Special tokens to be added to the embedding matrix
self
.
tie_weights
()
predict_special_tokens: if set to True, the model will try and predict the specified ``num_special_tokens``.
Defaults to True.
TODO Lysandre filled args
def
tie_weights
(
self
):
""" Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
"""
"""
self
.
config
.
predict_special_tokens
=
self
.
transformer
.
config
.
predict_special_tokens
=
predict_special_tokens
self
.
_tie_or_clone_weights
(
self
.
lm_head
,
self
.
transformer
.
set_num_special_tokens
(
num_special_tokens
)
self
.
transformer
.
wte
)
self
.
lm_head
.
set_embeddings_weights
(
self
.
transformer
.
wte
.
weight
,
predict_special_tokens
=
predict_special_tokens
)
def
forward
(
self
,
input_ids
,
position_ids
=
None
,
token_type_ids
=
None
,
lm_labels
=
None
,
past
=
None
,
head_mask
=
None
):
def
forward
(
self
,
input_ids
,
position_ids
=
None
,
token_type_ids
=
None
,
lm_labels
=
None
,
past
=
None
,
head_mask
=
None
):
"""
"""
...
@@ -740,25 +647,17 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
...
@@ -740,25 +647,17 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
GPT2DoubleHeadsModel
,
self
).
__init__
(
config
)
super
(
GPT2DoubleHeadsModel
,
self
).
__init__
(
config
)
self
.
transformer
=
GPT2Model
(
config
)
self
.
transformer
=
GPT2Model
(
config
)
self
.
lm_head
=
GPT2LMHead
(
self
.
transformer
.
wte
.
weight
,
config
)
self
.
lm_head
=
nn
.
Linear
(
config
.
n_embd
,
config
.
vocab_size
,
bias
=
False
)
self
.
multiple_choice_head
=
SequenceSummary
(
config
)
self
.
multiple_choice_head
=
SequenceSummary
(
config
)
self
.
apply
(
self
.
init_weights
)
self
.
apply
(
self
.
init_weights
)
def
set_num_special_tokens
(
self
,
num_special_tokens
,
predict_special_tokens
=
True
):
def
tie_weights
(
self
):
"""
""" Make sure we are sharing the input and output embeddings.
Update input and output embeddings with new embedding matrix.Make sure we are sharing the embeddings
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
Args:
num_special_tokens: Special tokens to be added to the embedding matrix
predict_special_tokens: if set to True, the model will try and predict the specified ``num_special_tokens``.
Defaults to True.
TODO Lysandre filled args
"""
"""
self
.
config
.
predict_special_tokens
=
self
.
transformer
.
config
.
predict_special_tokens
=
predict_special_tokens
self
.
_tie_or_clone_weights
(
self
.
lm_head
,
self
.
transformer
.
set_num_special_tokens
(
num_special_tokens
)
self
.
transformer
.
wte
)
self
.
lm_head
.
set_embeddings_weights
(
self
.
transformer
.
wte
.
weight
,
predict_special_tokens
=
predict_special_tokens
)
def
forward
(
self
,
input_ids
,
mc_token_ids
=
None
,
lm_labels
=
None
,
mc_labels
=
None
,
token_type_ids
=
None
,
def
forward
(
self
,
input_ids
,
mc_token_ids
=
None
,
lm_labels
=
None
,
mc_labels
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
past
=
None
,
head_mask
=
None
):
position_ids
=
None
,
past
=
None
,
head_mask
=
None
):
...
...
pytorch_transformers/modeling_openai.py
View file @
292140b9
...
@@ -156,7 +156,6 @@ class OpenAIGPTConfig(PretrainedConfig):
...
@@ -156,7 +156,6 @@ class OpenAIGPTConfig(PretrainedConfig):
def
__init__
(
def
__init__
(
self
,
self
,
vocab_size_or_config_json_file
=
40478
,
vocab_size_or_config_json_file
=
40478
,
n_special
=
0
,
n_positions
=
512
,
n_positions
=
512
,
n_ctx
=
512
,
n_ctx
=
512
,
n_embd
=
768
,
n_embd
=
768
,
...
@@ -190,7 +189,6 @@ class OpenAIGPTConfig(PretrainedConfig):
...
@@ -190,7 +189,6 @@ class OpenAIGPTConfig(PretrainedConfig):
self
.
__dict__
[
key
]
=
value
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
vocab_size
=
vocab_size_or_config_json_file
self
.
vocab_size
=
vocab_size_or_config_json_file
self
.
n_special
=
n_special
self
.
n_ctx
=
n_ctx
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_positions
=
n_positions
self
.
n_embd
=
n_embd
self
.
n_embd
=
n_embd
...
@@ -216,10 +214,6 @@ class OpenAIGPTConfig(PretrainedConfig):
...
@@ -216,10 +214,6 @@ class OpenAIGPTConfig(PretrainedConfig):
"or the path to a pretrained model config file (str)"
"or the path to a pretrained model config file (str)"
)
)
@
property
def
total_tokens_embeddings
(
self
):
return
self
.
vocab_size
+
self
.
n_special
@
property
@
property
def
hidden_size
(
self
):
def
hidden_size
(
self
):
return
self
.
n_embd
return
self
.
n_embd
...
@@ -355,34 +349,6 @@ class Block(nn.Module):
...
@@ -355,34 +349,6 @@ class Block(nn.Module):
return
outputs
return
outputs
class
OpenAIGPTLMHead
(
nn
.
Module
):
""" Language Model Head for the transformer """
def
__init__
(
self
,
model_embeddings_weights
,
config
):
super
(
OpenAIGPTLMHead
,
self
).
__init__
()
self
.
n_embd
=
config
.
n_embd
self
.
vocab_size
=
config
.
vocab_size
self
.
predict_special_tokens
=
config
.
predict_special_tokens
self
.
torchscript
=
config
.
torchscript
embed_shape
=
model_embeddings_weights
.
shape
self
.
decoder
=
nn
.
Linear
(
embed_shape
[
1
],
embed_shape
[
0
],
bias
=
False
)
self
.
set_embeddings_weights
(
model_embeddings_weights
)
def
set_embeddings_weights
(
self
,
model_embeddings_weights
,
predict_special_tokens
=
True
):
self
.
predict_special_tokens
=
predict_special_tokens
if
self
.
torchscript
:
self
.
decoder
.
weight
=
nn
.
Parameter
(
model_embeddings_weights
.
clone
())
else
:
self
.
decoder
.
weight
=
model_embeddings_weights
# Tied weights
def
forward
(
self
,
hidden_state
):
lm_logits
=
self
.
decoder
(
hidden_state
)
if
not
self
.
predict_special_tokens
:
lm_logits
=
lm_logits
[...,
:
self
.
vocab_size
]
return
lm_logits
class
OpenAIGPTPreTrainedModel
(
PreTrainedModel
):
class
OpenAIGPTPreTrainedModel
(
PreTrainedModel
):
""" An abstract class to handle weights initialization and
""" An abstract class to handle weights initialization and
a simple interface for dowloading and loading pretrained models.
a simple interface for dowloading and loading pretrained models.
...
@@ -408,36 +374,6 @@ class OpenAIGPTPreTrainedModel(PreTrainedModel):
...
@@ -408,36 +374,6 @@ class OpenAIGPTPreTrainedModel(PreTrainedModel):
module
.
bias
.
data
.
zero_
()
module
.
bias
.
data
.
zero_
()
module
.
weight
.
data
.
fill_
(
1.0
)
module
.
weight
.
data
.
fill_
(
1.0
)
@
classmethod
def
from_pretrained
(
cls
,
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
):
"""
Instantiate a OpenAIGPTPreTrainedModel from a pre-trained model file or a pytorch state dict.
Download and cache the pre-trained model file if needed.
Params:
pretrained_model_name_or_path: either:
- a str with the name of a pre-trained model to load selected in the list of:
- a path or url to a pretrained model archive containing:
. `config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a OpenAIGPTModel instance
- a path or url to a pretrained model archive containing:
. `config.json` a configuration file for the model
. a series of NumPy files containing OpenAI TensorFlow trained weights
from_tf: should we load the weights from a locally saved TensorFlow checkpoint
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific OpenAI-GPT class
"""
num_special_tokens
=
kwargs
.
get
(
'num_special_tokens'
,
None
)
kwargs
.
pop
(
'num_special_tokens'
,
None
)
model
=
super
(
OpenAIGPTPreTrainedModel
,
cls
).
from_pretrained
(
pretrained_model_name_or_path
,
pretrained_model_name_or_path
,
*
inputs
,
**
kwargs
)
# Add additional embeddings for special tokens if needed
# This step also make sure we are still sharing the output and input embeddings after loading weights
model
.
set_num_special_tokens
(
num_special_tokens
)
return
model
class
OpenAIGPTModel
(
OpenAIGPTPreTrainedModel
):
class
OpenAIGPTModel
(
OpenAIGPTPreTrainedModel
):
"""OpenAI GPT model ("Improving Language Understanding by Generative Pre-Training").
"""OpenAI GPT model ("Improving Language Understanding by Generative Pre-Training").
...
@@ -457,13 +393,13 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
...
@@ -457,13 +393,13 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
config.vocab_size - 1, ______________________
config.vocab_size - 1, ______________________
config.vocab_size,
config.vocab_size,
... -> special embeddings
... -> special embeddings
config.vocab_size +
config.
n_special - 1] ______________________
config.vocab_size + n_special - 1] ______________________
where ``total_tokens_embeddings``
can be obtained as ``config.total_tokens_embeddings`` and
is:
where ``total_tokens_embeddings`` is:
::
::
total_tokens_embeddings = config.vocab_size +
config.
n_special
total_tokens_embeddings = config.vocab_size + n_special
You should use the associated indices to index the embeddings.
You should use the associated indices to index the embeddings.
...
@@ -485,34 +421,16 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
...
@@ -485,34 +421,16 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
self
.
output_attentions
=
config
.
output_attentions
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
tokens_embed
=
nn
.
Embedding
(
config
.
total_tokens_embeddings
,
config
.
n_embd
)
self
.
tokens_embed
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
n_embd
)
self
.
positions_embed
=
nn
.
Embedding
(
config
.
n_positions
,
config
.
n_embd
)
self
.
positions_embed
=
nn
.
Embedding
(
config
.
n_positions
,
config
.
n_embd
)
self
.
drop
=
nn
.
Dropout
(
config
.
embd_pdrop
)
self
.
drop
=
nn
.
Dropout
(
config
.
embd_pdrop
)
self
.
h
=
nn
.
ModuleList
([
Block
(
config
.
n_ctx
,
config
,
scale
=
True
)
for
_
in
range
(
config
.
n_layer
)])
self
.
h
=
nn
.
ModuleList
([
Block
(
config
.
n_ctx
,
config
,
scale
=
True
)
for
_
in
range
(
config
.
n_layer
)])
self
.
apply
(
self
.
init_weights
)
self
.
apply
(
self
.
init_weights
)
def
set_num_special_tokens
(
self
,
num_special_tokens
=
None
):
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
"""
self
.
tokens_embed
=
self
.
_get_resized_embeddings
(
self
.
tokens_embed
,
new_num_tokens
)
Update input embeddings with new embedding matrice if needed
return
self
.
tokens_embed
Args:
num_special_tokens: Special tokens to be added to the embedding matrix
TODO Lysandre filled Args
"""
if
num_special_tokens
is
None
or
self
.
config
.
n_special
==
num_special_tokens
:
return
# Update config
self
.
config
.
n_special
=
num_special_tokens
# Build new embeddings and initialize all new embeddings (in particular the special tokens)
old_embed
=
self
.
tokens_embed
self
.
tokens_embed
=
nn
.
Embedding
(
self
.
config
.
total_tokens_embeddings
,
self
.
config
.
n_embd
)
self
.
tokens_embed
.
to
(
old_embed
.
weight
.
device
)
self
.
init_weights
(
self
.
tokens_embed
)
# Copy word embeddings from the previous weights
self
.
tokens_embed
.
weight
.
data
[:
self
.
config
.
vocab_size
,
:]
=
old_embed
.
weight
.
data
[:
self
.
config
.
vocab_size
,
:]
def
_prune_heads
(
self
,
heads_to_prune
):
def
_prune_heads
(
self
,
heads_to_prune
):
""" Prunes heads of the model.
""" Prunes heads of the model.
...
@@ -657,24 +575,17 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
...
@@ -657,24 +575,17 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
OpenAIGPTLMHeadModel
,
self
).
__init__
(
config
)
super
(
OpenAIGPTLMHeadModel
,
self
).
__init__
(
config
)
self
.
transformer
=
OpenAIGPTModel
(
config
)
self
.
transformer
=
OpenAIGPTModel
(
config
)
self
.
lm_head
=
OpenAIGPTLMHead
(
self
.
transformer
.
tokens_embed
.
weight
,
config
)
self
.
lm_head
=
nn
.
Linear
(
config
.
n_embd
,
config
.
vocab_size
,
bias
=
False
)
self
.
apply
(
self
.
init_weights
)
def
set_num_special_tokens
(
self
,
num_special_tokens
,
predict_special_tokens
=
True
):
"""
Update input and output embeddings with new embedding matrix. Make sure we are sharing the embeddings
Args:
self
.
apply
(
self
.
init_weights
)
num_special_tokens: Special tokens to be added to the embedding matrix
self
.
tie_weights
()
predict_special_tokens: if set to True, the model will try and predict the specified ``num_special_tokens``.
Defaults to True.
TODO Lysandre filled Args
def
tie_weights
(
self
):
""" Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
"""
"""
self
.
config
.
predict_special_tokens
=
self
.
transformer
.
config
.
predict_special_tokens
=
predict_special_tokens
self
.
_tie_or_clone_weights
(
self
.
lm_head
,
self
.
transformer
.
set_num_special_tokens
(
num_special_tokens
)
self
.
transformer
.
tokens_embed
)
self
.
lm_head
.
set_embeddings_weights
(
self
.
transformer
.
tokens_embed
.
weight
,
predict_special_tokens
=
predict_special_tokens
)
def
forward
(
self
,
input_ids
,
position_ids
=
None
,
token_type_ids
=
None
,
lm_labels
=
None
,
head_mask
=
None
):
def
forward
(
self
,
input_ids
,
position_ids
=
None
,
token_type_ids
=
None
,
lm_labels
=
None
,
head_mask
=
None
):
"""
"""
...
@@ -747,13 +658,13 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
...
@@ -747,13 +658,13 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
config.vocab_size - 1, ______________________
config.vocab_size - 1, ______________________
config.vocab_size,
config.vocab_size,
... -> special embeddings
... -> special embeddings
config.vocab_size +
config.
n_special - 1] ______________________
config.vocab_size + n_special - 1] ______________________
where ``total_tokens_embeddings``
can be obtained as ``config.total_tokens_embeddings`` and
is:
where ``total_tokens_embeddings`` is:
::
::
total_tokens_embeddings = config.vocab_size +
config
.n_special
total_tokens_embeddings = config.vocab_size + .n_special
You should use the associate indices to index the embeddings.
You should use the associate indices to index the embeddings.
...
@@ -773,24 +684,18 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
...
@@ -773,24 +684,18 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
super
(
OpenAIGPTDoubleHeadsModel
,
self
).
__init__
(
config
)
super
(
OpenAIGPTDoubleHeadsModel
,
self
).
__init__
(
config
)
self
.
transformer
=
OpenAIGPTModel
(
config
)
self
.
transformer
=
OpenAIGPTModel
(
config
)
self
.
lm_head
=
OpenAIGPTLMHead
(
self
.
transformer
.
tokens_embed
.
weight
,
config
)
self
.
lm_head
=
nn
.
Linear
(
config
.
n_embd
,
config
.
vocab_size
,
bias
=
False
)
self
.
multiple_choice_head
=
SequenceSummary
(
config
)
self
.
multiple_choice_head
=
SequenceSummary
(
config
)
self
.
apply
(
self
.
init_weights
)
self
.
apply
(
self
.
init_weights
)
self
.
tie_weights
()
def
set_num_special_tokens
(
self
,
num_special_tokens
,
predict_special_tokens
=
True
):
def
tie_weights
(
self
):
""" Update input and output embeddings with new embedding matrix. Make sure we are sharing the embeddings.
""" Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
Args:
num_special_tokens: Special tokens to be added to the embedding matrix
predict_special_tokens: if set to True, the model will try and predict the specified ``num_special_tokens``.
Defaults to True.
TODO Lysandre filled Args
"""
"""
self
.
config
.
predict_special_tokens
=
self
.
transformer
.
config
.
predict_special_tokens
=
predict_special_tokens
self
.
_tie_or_clone_weights
(
self
.
lm_head
,
self
.
transformer
.
set_num_special_tokens
(
num_special_tokens
)
self
.
transformer
.
tokens_embed
)
self
.
lm_head
.
set_embeddings_weights
(
self
.
transformer
.
tokens_embed
.
weight
,
predict_special_tokens
=
predict_special_tokens
)
def
forward
(
self
,
input_ids
,
mc_token_ids
=
None
,
lm_labels
=
None
,
mc_labels
=
None
,
token_type_ids
=
None
,
def
forward
(
self
,
input_ids
,
mc_token_ids
=
None
,
lm_labels
=
None
,
mc_labels
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
):
position_ids
=
None
,
head_mask
=
None
):
...
...
pytorch_transformers/modeling_transfo_xl.py
View file @
292140b9
...
@@ -287,6 +287,14 @@ class TransfoXLConfig(PretrainedConfig):
...
@@ -287,6 +287,14 @@ class TransfoXLConfig(PretrainedConfig):
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)"
)
"or the path to a pretrained model config file (str)"
)
@
property
def
vocab_size
(
self
):
return
self
.
n_token
@
vocab_size
.
setter
def
vocab_size
(
self
,
value
):
self
.
n_token
=
value
@
property
@
property
def
hidden_size
(
self
):
def
hidden_size
(
self
):
return
self
.
d_model
return
self
.
d_model
...
@@ -998,6 +1006,9 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
...
@@ -998,6 +1006,9 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
self
.
apply
(
self
.
init_weights
)
self
.
apply
(
self
.
init_weights
)
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
return
self
.
word_emb
def
backward_compatible
(
self
):
def
backward_compatible
(
self
):
self
.
sample_softmax
=
-
1
self
.
sample_softmax
=
-
1
...
@@ -1273,13 +1284,20 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
...
@@ -1273,13 +1284,20 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
else
:
else
:
if
self
.
config
.
tie_weight
:
if
self
.
config
.
tie_weight
:
for
i
in
range
(
len
(
self
.
crit
.
out_layers
)):
for
i
in
range
(
len
(
self
.
crit
.
out_layers
)):
self
.
crit
.
out_layers
[
i
].
weight
=
self
.
transformer
.
word_emb
.
emb_layers
[
i
].
weight
self
.
_tie_or_clone_weights
(
self
.
crit
.
out_layers
[
i
],
self
.
transformer
.
word_emb
.
emb_layers
[
i
])
if
self
.
config
.
tie_projs
:
if
self
.
config
.
tie_projs
:
for
i
,
tie_proj
in
enumerate
(
self
.
config
.
tie_projs
):
for
i
,
tie_proj
in
enumerate
(
self
.
config
.
tie_projs
):
if
tie_proj
and
self
.
config
.
div_val
==
1
and
self
.
config
.
d_model
!=
self
.
config
.
d_embed
:
if
tie_proj
and
self
.
config
.
div_val
==
1
and
self
.
config
.
d_model
!=
self
.
config
.
d_embed
:
self
.
crit
.
out_projs
[
i
]
=
self
.
transformer
.
word_emb
.
emb_projs
[
0
]
if
self
.
config
.
torchscript
:
self
.
crit
.
out_projs
[
i
]
=
nn
.
Parameter
(
self
.
transformer
.
word_emb
.
emb_projs
[
0
].
clone
())
else
:
self
.
crit
.
out_projs
[
i
]
=
self
.
transformer
.
word_emb
.
emb_projs
[
0
]
elif
tie_proj
and
self
.
config
.
div_val
!=
1
:
elif
tie_proj
and
self
.
config
.
div_val
!=
1
:
self
.
crit
.
out_projs
[
i
]
=
self
.
transformer
.
word_emb
.
emb_projs
[
i
]
if
self
.
config
.
torchscript
:
self
.
crit
.
out_projs
[
i
]
=
nn
.
Parameter
(
self
.
transformer
.
word_emb
.
emb_projs
[
i
].
clone
())
else
:
self
.
crit
.
out_projs
[
i
]
=
self
.
transformer
.
word_emb
.
emb_projs
[
i
]
def
reset_length
(
self
,
tgt_len
,
ext_len
,
mem_len
):
def
reset_length
(
self
,
tgt_len
,
ext_len
,
mem_len
):
self
.
transformer
.
reset_length
(
tgt_len
,
ext_len
,
mem_len
)
self
.
transformer
.
reset_length
(
tgt_len
,
ext_len
,
mem_len
)
...
...
pytorch_transformers/modeling_utils.py
View file @
292140b9
...
@@ -151,6 +151,7 @@ class PreTrainedModel(nn.Module):
...
@@ -151,6 +151,7 @@ class PreTrainedModel(nn.Module):
pretrained_model_archive_map
=
{}
pretrained_model_archive_map
=
{}
load_tf_weights
=
lambda
model
,
config
,
path
:
None
load_tf_weights
=
lambda
model
,
config
,
path
:
None
base_model_prefix
=
""
base_model_prefix
=
""
input_embeddings
=
None
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
PreTrainedModel
,
self
).
__init__
()
super
(
PreTrainedModel
,
self
).
__init__
()
...
@@ -164,12 +165,79 @@ class PreTrainedModel(nn.Module):
...
@@ -164,12 +165,79 @@ class PreTrainedModel(nn.Module):
# Save config in model
# Save config in model
self
.
config
=
config
self
.
config
=
config
def
_get_resized_embeddings
(
self
,
old_embeddings
,
new_num_tokens
=
None
):
""" Build a resized Embedding Module from a provided token Embedding Module.
Increasing the size will add newly initialized vectors at the end
Reducing the size will remove vectors from the end
Args:
new_num_tokens: (Optional) New number of tokens in the embedding matrix.
Increasing the size will add newly initialized vectors at the end
Reducing the size will remove vectors from the end
If not provided or None: return the provided token Embedding Module.
Return:
Pointer to the resized Embedding Module or the old Embedding Module if new_num_tokens is None
"""
if
new_num_tokens
is
None
:
return
old_embeddings
old_num_tokens
,
old_embedding_dim
=
old_embeddings
.
weight
.
size
()
if
old_num_tokens
==
new_num_tokens
:
return
old_embeddings
# Build new embeddings
new_embeddings
=
nn
.
Embedding
(
new_num_tokens
,
old_embedding_dim
)
new_embeddings
.
to
(
old_embeddings
.
weight
.
device
)
# initialize all new embeddings (in particular added tokens)
self
.
init_weights
(
new_embeddings
)
# Copy word embeddings from the previous weights
num_tokens_to_copy
=
min
(
old_num_tokens
,
new_num_tokens
)
new_embeddings
.
weight
.
data
[:
num_tokens_to_copy
,
:]
=
old_embeddings
.
weight
.
data
[:
num_tokens_to_copy
,
:]
return
new_embeddings
def
_tie_or_clone_weights
(
self
,
first_module
,
second_module
):
""" Tie or clone module weights depending of weither we are using TorchScript or not
"""
if
self
.
config
.
torchscript
:
first_module
.
weight
=
nn
.
Parameter
(
second_module
.
weight
.
clone
())
else
:
first_module
.
weight
=
second_module
.
weight
def
resize_token_embeddings
(
self
,
new_num_tokens
=
None
):
""" Resize input token embeddings matrix of the model if new_num_tokens != config.vocab_size.
Args:
new_num_tokens: (Optional) New number of tokens in the embedding matrix.
Increasing the size will add newly initialized vectors at the end
Reducing the size will remove vectors from the end
If not provided or None: does nothing.
Return:
Pointer to the input tokens Embedding Module of the model
"""
base_model
=
getattr
(
self
,
self
.
base_model_prefix
,
self
)
# get the base model if needed
model_embeds
=
base_model
.
_resize_token_embeddings
(
new_num_tokens
)
if
new_num_tokens
is
None
:
return
model_embeds
# Update base model and current model config
self
.
config
.
vocab_size
=
new_num_tokens
base_model
.
vocab_size
=
new_num_tokens
# Tie weights again if needed
if
hasattr
(
self
,
'tie_weights'
):
self
.
tie_weights
()
return
model_embeds
def
prune_heads
(
self
,
heads_to_prune
):
def
prune_heads
(
self
,
heads_to_prune
):
""" Prunes heads of the base model.
""" Prunes heads of the base model.
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
"""
"""
model
_to_prune
=
getattr
(
self
,
self
.
base_model_prefix
,
self
)
# get the base model if needed
base_
model
=
getattr
(
self
,
self
.
base_model_prefix
,
self
)
# get the base model if needed
model
_to_prune
.
_prune_heads
(
heads_to_prune
)
base_
model
.
_prune_heads
(
heads_to_prune
)
def
save_pretrained
(
self
,
save_directory
):
def
save_pretrained
(
self
,
save_directory
):
""" Save a model with its configuration file to a directory, so that it
""" Save a model with its configuration file to a directory, so that it
...
...
pytorch_transformers/modeling_xlm.py
View file @
292140b9
...
@@ -104,7 +104,6 @@ class XLMConfig(PretrainedConfig):
...
@@ -104,7 +104,6 @@ class XLMConfig(PretrainedConfig):
def
__init__
(
self
,
def
__init__
(
self
,
vocab_size_or_config_json_file
=
30145
,
vocab_size_or_config_json_file
=
30145
,
n_special
=
0
,
emb_dim
=
2048
,
emb_dim
=
2048
,
n_layers
=
12
,
n_layers
=
12
,
n_heads
=
16
,
n_heads
=
16
,
...
@@ -148,7 +147,6 @@ class XLMConfig(PretrainedConfig):
...
@@ -148,7 +147,6 @@ class XLMConfig(PretrainedConfig):
self
.
__dict__
[
key
]
=
value
self
.
__dict__
[
key
]
=
value
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
elif
isinstance
(
vocab_size_or_config_json_file
,
int
):
self
.
n_words
=
vocab_size_or_config_json_file
self
.
n_words
=
vocab_size_or_config_json_file
self
.
n_special
=
n_special
self
.
emb_dim
=
emb_dim
self
.
emb_dim
=
emb_dim
self
.
n_layers
=
n_layers
self
.
n_layers
=
n_layers
self
.
n_heads
=
n_heads
self
.
n_heads
=
n_heads
...
@@ -183,8 +181,12 @@ class XLMConfig(PretrainedConfig):
...
@@ -183,8 +181,12 @@ class XLMConfig(PretrainedConfig):
"or the path to a pretrained model config file (str)"
)
"or the path to a pretrained model config file (str)"
)
@
property
@
property
def
total_tokens_embeddings
(
self
):
def
vocab_size
(
self
):
return
self
.
n_words
+
self
.
n_special
return
self
.
n_words
@
vocab_size
.
setter
def
vocab_size
(
self
,
value
):
self
.
n_words
=
value
@
property
@
property
def
hidden_size
(
self
):
def
hidden_size
(
self
):
...
@@ -479,6 +481,10 @@ class XLMModel(XLMPreTrainedModel):
...
@@ -479,6 +481,10 @@ class XLMModel(XLMPreTrainedModel):
self
.
apply
(
self
.
init_weights
)
self
.
apply
(
self
.
init_weights
)
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
self
.
embeddings
=
self
.
_get_resized_embeddings
(
self
.
embeddings
,
new_num_tokens
)
return
self
.
embeddings
def
_prune_heads
(
self
,
heads_to_prune
):
def
_prune_heads
(
self
,
heads_to_prune
):
""" Prunes heads of the model.
""" Prunes heads of the model.
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
...
@@ -718,8 +724,6 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
...
@@ -718,8 +724,6 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
XLMWithLMHeadModel
,
self
).
__init__
(
config
)
super
(
XLMWithLMHeadModel
,
self
).
__init__
(
config
)
self
.
torchscript
=
config
.
torchscript
self
.
transformer
=
XLMModel
(
config
)
self
.
transformer
=
XLMModel
(
config
)
self
.
pred_layer
=
XLMPredLayer
(
config
)
self
.
pred_layer
=
XLMPredLayer
(
config
)
...
@@ -729,10 +733,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
...
@@ -729,10 +733,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
def
tie_weights
(
self
):
def
tie_weights
(
self
):
""" Make sure we are sharing the embeddings
""" Make sure we are sharing the embeddings
"""
"""
if
self
.
torchscript
:
self
.
_tie_or_clone_weights
(
self
.
pred_layer
.
proj
,
self
.
transformer
.
embeddings
)
self
.
pred_layer
.
proj
.
weight
=
nn
.
Parameter
(
self
.
transformer
.
embeddings
.
weight
.
clone
())
else
:
self
.
pred_layer
.
proj
.
weight
=
self
.
transformer
.
embeddings
.
weight
def
forward
(
self
,
input_ids
,
lengths
=
None
,
positions
=
None
,
langs
=
None
,
token_type_ids
=
None
,
def
forward
(
self
,
input_ids
,
lengths
=
None
,
positions
=
None
,
langs
=
None
,
token_type_ids
=
None
,
attention_mask
=
None
,
cache
=
None
,
labels
=
None
,
head_mask
=
None
):
attention_mask
=
None
,
cache
=
None
,
labels
=
None
,
head_mask
=
None
):
...
...
pytorch_transformers/modeling_xlnet.py
View file @
292140b9
...
@@ -312,6 +312,14 @@ class XLNetConfig(PretrainedConfig):
...
@@ -312,6 +312,14 @@ class XLNetConfig(PretrainedConfig):
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)"
)
"or the path to a pretrained model config file (str)"
)
@
property
def
vocab_size
(
self
):
return
self
.
n_token
@
vocab_size
.
setter
def
vocab_size
(
self
,
value
):
self
.
n_token
=
value
@
property
@
property
def
hidden_size
(
self
):
def
hidden_size
(
self
):
return
self
.
d_model
return
self
.
d_model
...
@@ -654,9 +662,12 @@ class XLNetModel(XLNetPreTrainedModel):
...
@@ -654,9 +662,12 @@ class XLNetModel(XLNetPreTrainedModel):
self
.
apply
(
self
.
init_weights
)
self
.
apply
(
self
.
init_weights
)
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
self
.
word_embedding
=
self
.
_get_resized_embeddings
(
self
.
word_embedding
,
new_num_tokens
)
return
self
.
word_embedding
def
_prune_heads
(
self
,
heads_to_prune
):
def
_prune_heads
(
self
,
heads_to_prune
):
logger
.
info
(
"Head pruning is not implemented for XLNet"
)
raise
NotImplementedError
pass
def
create_mask
(
self
,
qlen
,
mlen
):
def
create_mask
(
self
,
qlen
,
mlen
):
"""
"""
...
@@ -970,23 +981,17 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
...
@@ -970,23 +981,17 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
super
(
XLNetLMHeadModel
,
self
).
__init__
(
config
)
super
(
XLNetLMHeadModel
,
self
).
__init__
(
config
)
self
.
attn_type
=
config
.
attn_type
self
.
attn_type
=
config
.
attn_type
self
.
same_length
=
config
.
same_length
self
.
same_length
=
config
.
same_length
self
.
torchscript
=
config
.
torchscript
self
.
transformer
=
XLNetModel
(
config
)
self
.
transformer
=
XLNetModel
(
config
)
self
.
lm_loss
=
nn
.
Linear
(
config
.
d_model
,
config
.
n_token
,
bias
=
True
)
self
.
lm_loss
=
nn
.
Linear
(
config
.
d_model
,
config
.
n_token
,
bias
=
True
)
# Tie weights
self
.
apply
(
self
.
init_weights
)
self
.
apply
(
self
.
init_weights
)
self
.
tie_weights
()
self
.
tie_weights
()
def
tie_weights
(
self
):
def
tie_weights
(
self
):
""" Make sure we are sharing the embeddings
""" Make sure we are sharing the embeddings
"""
"""
if
self
.
torchscript
:
self
.
_tie_or_clone_weights
(
self
.
lm_loss
,
self
.
transformer
.
word_embedding
)
self
.
lm_loss
.
weight
=
nn
.
Parameter
(
self
.
transformer
.
word_embedding
.
weight
.
clone
())
else
:
self
.
lm_loss
.
weight
=
self
.
transformer
.
word_embedding
.
weight
def
forward
(
self
,
input_ids
,
token_type_ids
=
None
,
input_mask
=
None
,
attention_mask
=
None
,
def
forward
(
self
,
input_ids
,
token_type_ids
=
None
,
input_mask
=
None
,
attention_mask
=
None
,
mems
=
None
,
perm_mask
=
None
,
target_mapping
=
None
,
inp_q
=
None
,
mems
=
None
,
perm_mask
=
None
,
target_mapping
=
None
,
inp_q
=
None
,
...
...
pytorch_transformers/tests/modeling_bert_test.py
View file @
292140b9
...
@@ -26,10 +26,15 @@ from pytorch_transformers import (BertConfig, BertModel, BertForMaskedLM,
...
@@ -26,10 +26,15 @@ from pytorch_transformers import (BertConfig, BertModel, BertForMaskedLM,
BertForTokenClassification
,
BertForMultipleChoice
)
BertForTokenClassification
,
BertForMultipleChoice
)
from
pytorch_transformers.modeling_bert
import
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from
pytorch_transformers.modeling_bert
import
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from
.modeling_
tests_
common
s
import
(
create_and_check_common
s
,
ConfigTester
,
ids_tensor
)
from
.modeling_common
_test
import
(
CommonTestCase
s
,
ConfigTester
,
ids_tensor
)
class
BertModelTest
(
unittest
.
TestCase
):
class
BertModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
BertModel
,
BertForMaskedLM
,
BertForNextSentencePrediction
,
BertForPreTraining
,
BertForQuestionAnswering
,
BertForSequenceClassification
,
BertForTokenClassification
)
class
BertModelTester
(
object
):
class
BertModelTester
(
object
):
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -55,9 +60,6 @@ class BertModelTest(unittest.TestCase):
...
@@ -55,9 +60,6 @@ class BertModelTest(unittest.TestCase):
num_labels
=
3
,
num_labels
=
3
,
num_choices
=
4
,
num_choices
=
4
,
scope
=
None
,
scope
=
None
,
all_model_classes
=
(
BertModel
,
BertForMaskedLM
,
BertForNextSentencePrediction
,
BertForPreTraining
,
BertForQuestionAnswering
,
BertForSequenceClassification
,
BertForTokenClassification
),
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
batch_size
=
batch_size
...
@@ -81,7 +83,6 @@ class BertModelTest(unittest.TestCase):
...
@@ -81,7 +83,6 @@ class BertModelTest(unittest.TestCase):
self
.
num_labels
=
num_labels
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
num_choices
=
num_choices
self
.
scope
=
scope
self
.
scope
=
scope
self
.
all_model_classes
=
all_model_classes
def
prepare_config_and_inputs
(
self
):
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
...
@@ -253,52 +254,59 @@ class BertModelTest(unittest.TestCase):
...
@@ -253,52 +254,59 @@ class BertModelTest(unittest.TestCase):
self
.
check_loss_output
(
result
)
self
.
check_loss_output
(
result
)
def
create_and_check_bert_commons
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
)
=
config_and_inputs
inputs_dict
=
{
'input_ids'
:
input_ids
,
'token_type_ids'
:
token_type_ids
,
'attention_mask'
:
input_mask
}
inputs_dict
=
{
'input_ids'
:
input_ids
,
'token_type_ids'
:
token_type_ids
,
'attention_mask'
:
input_mask
}
c
re
ate_and_check_commons
(
self
,
config
,
inputs_dict
)
re
turn
config
,
inputs_dict
def
test_default
(
self
):
def
setUp
(
self
):
self
.
run_tester
(
BertModelTest
.
BertModelTester
(
self
))
self
.
model_tester
=
BertModelTest
.
BertModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
BertConfig
,
hidden_size
=
37
)
def
test_config
(
self
):
def
test_config
(
self
):
config_tester
=
ConfigTester
(
self
,
config_class
=
BertConfig
,
hidden_size
=
37
)
self
.
config_tester
.
run_common_tests
()
config_tester
.
run_common_tests
()
@
pytest
.
mark
.
slow
def
test_bert_model
(
self
):
def
test_model_from_pretrained
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
cache_dir
=
"/tmp/pytorch_transformers_test/"
self
.
model_tester
.
create_and_check_bert_model
(
*
config_and_inputs
)
for
model_name
in
list
(
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
BertModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
def
run_tester
(
self
,
tester
):
def
test_for_masked_lm
(
self
):
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_
tester
.
prepare_config_and_inputs
()
tester
.
create_and_check_bert_
model
(
*
config_and_inputs
)
self
.
model_
tester
.
create_and_check_bert_
for_masked_lm
(
*
config_and_inputs
)
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
def
test_for_multiple_choice
(
self
):
tester
.
create_and_check_bert_for_masked_lm
(
*
config_and_inputs
)
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_multiple_choice
(
*
config_and_inputs
)
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
def
test_for_next_sequence_prediction
(
self
):
tester
.
create_and_check_bert_for_multiple_choice
(
*
config_and_inputs
)
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_next_sequence_prediction
(
*
config_and_inputs
)
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
def
test_for_pretraining
(
self
):
tester
.
create_and_check_bert_for_next_sequence_prediction
(
*
config_and_inputs
)
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_pretraining
(
*
config_and_inputs
)
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
def
test_for_question_answering
(
self
):
tester
.
create_and_check_bert_for_pretraining
(
*
config_and_inputs
)
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_question_answering
(
*
config_and_inputs
)
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
def
test_for_sequence_classification
(
self
):
tester
.
create_and_check_bert_for_question_answering
(
*
config_and_inputs
)
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_sequence_classification
(
*
config_and_inputs
)
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
def
test_for_token_classification
(
self
):
tester
.
create_and_check_bert_for_sequence_classification
(
*
config_and_inputs
)
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_token_classification
(
*
config_and_inputs
)
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
@
pytest
.
mark
.
slow
tester
.
create_and_check_bert_for_token_classification
(
*
config_and_inputs
)
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/pytorch_transformers_test/"
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
for
model_name
in
list
(
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
tester
.
create_and_check_bert_commons
(
*
config_and_inputs
)
model
=
BertModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
pytorch_transformers/tests/modeling_common_test.py
0 → 100644
View file @
292140b9
This diff is collapsed.
Click to expand it.
pytorch_transformers/tests/modeling_gpt2_test.py
View file @
292140b9
...
@@ -16,19 +16,14 @@ from __future__ import absolute_import
...
@@ -16,19 +16,14 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
import
os
import
unittest
import
unittest
import
json
import
random
import
shutil
import
pytest
import
pytest
import
torch
from
pytorch_transformers
import
(
GPT2Config
,
GPT2Model
,
from
pytorch_transformers
import
(
GPT2Config
,
GPT2Model
,
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
)
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
)
from
.modeling_
tests_
common
s
import
(
create_and_check_commons
,
ConfigTester
,
GPTModelTester
)
from
.modeling_common
_test
import
CommonTestCases
,
ConfigTester
class
GPT2ModelTest
(
unittest
.
TestCase
):
class
GPT2ModelTest
(
unittest
.
TestCase
):
...
@@ -37,14 +32,14 @@ class GPT2ModelTest(unittest.TestCase):
...
@@ -37,14 +32,14 @@ class GPT2ModelTest(unittest.TestCase):
config_tester
.
run_common_tests
()
config_tester
.
run_common_tests
()
def
test_model
(
self
):
def
test_model
(
self
):
model_tester
=
GPTModelTester
(
self
,
config_class
=
GPT2Config
,
base_model_class
=
GPT2Model
,
model_tester
=
CommonTestCases
.
GPTModelTester
(
self
,
config_class
=
GPT2Config
,
base_model_class
=
GPT2Model
,
lm_head_model_class
=
GPT2LMHeadModel
,
lm_head_model_class
=
GPT2LMHeadModel
,
double_head_model_class
=
GPT2DoubleHeadsModel
)
double_head_model_class
=
GPT2DoubleHeadsModel
)
model_tester
.
run_common_tests
(
test_presents
=
True
)
model_tester
.
run_common_tests
(
test_presents
=
True
)
@
pytest
.
mark
.
slow
@
pytest
.
mark
.
slow
def
test_pretrained
(
self
):
def
test_pretrained
(
self
):
model_tester
=
GPTModelTester
(
self
,
config_class
=
GPT2Config
,
base_model_class
=
GPT2Model
,
model_tester
=
CommonTestCases
.
GPTModelTester
(
self
,
config_class
=
GPT2Config
,
base_model_class
=
GPT2Model
,
lm_head_model_class
=
GPT2LMHeadModel
,
lm_head_model_class
=
GPT2LMHeadModel
,
double_head_model_class
=
GPT2DoubleHeadsModel
)
double_head_model_class
=
GPT2DoubleHeadsModel
)
model_tester
.
run_slow_tests
()
model_tester
.
run_slow_tests
()
...
...
pytorch_transformers/tests/modeling_openai_test.py
View file @
292140b9
...
@@ -19,12 +19,11 @@ from __future__ import print_function
...
@@ -19,12 +19,11 @@ from __future__ import print_function
import
unittest
import
unittest
import
pytest
import
pytest
import
torch
from
pytorch_transformers
import
(
OpenAIGPTConfig
,
OpenAIGPTModel
,
from
pytorch_transformers
import
(
OpenAIGPTConfig
,
OpenAIGPTModel
,
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
)
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
)
from
.modeling_
tests_
common
s
import
(
create_and_check_commons
,
ConfigTester
,
GPTModelTester
)
from
.modeling_common
_test
import
CommonTestCases
,
ConfigTester
class
OpenAIModelTest
(
unittest
.
TestCase
):
class
OpenAIModelTest
(
unittest
.
TestCase
):
...
@@ -33,14 +32,14 @@ class OpenAIModelTest(unittest.TestCase):
...
@@ -33,14 +32,14 @@ class OpenAIModelTest(unittest.TestCase):
config_tester
.
run_common_tests
()
config_tester
.
run_common_tests
()
def
test_model
(
self
):
def
test_model
(
self
):
model_tester
=
GPTModelTester
(
self
,
config_class
=
OpenAIGPTConfig
,
base_model_class
=
OpenAIGPTModel
,
model_tester
=
CommonTestCases
.
GPTModelTester
(
self
,
config_class
=
OpenAIGPTConfig
,
base_model_class
=
OpenAIGPTModel
,
lm_head_model_class
=
OpenAIGPTLMHeadModel
,
lm_head_model_class
=
OpenAIGPTLMHeadModel
,
double_head_model_class
=
OpenAIGPTDoubleHeadsModel
)
double_head_model_class
=
OpenAIGPTDoubleHeadsModel
)
model_tester
.
run_common_tests
(
test_presents
=
False
)
model_tester
.
run_common_tests
(
test_presents
=
False
)
@
pytest
.
mark
.
slow
@
pytest
.
mark
.
slow
def
test_pretrained
(
self
):
def
test_pretrained
(
self
):
model_tester
=
GPTModelTester
(
self
,
config_class
=
OpenAIGPTConfig
,
base_model_class
=
OpenAIGPTModel
,
model_tester
=
CommonTestCases
.
GPTModelTester
(
self
,
config_class
=
OpenAIGPTConfig
,
base_model_class
=
OpenAIGPTModel
,
lm_head_model_class
=
OpenAIGPTLMHeadModel
,
lm_head_model_class
=
OpenAIGPTLMHeadModel
,
double_head_model_class
=
OpenAIGPTDoubleHeadsModel
)
double_head_model_class
=
OpenAIGPTDoubleHeadsModel
)
model_tester
.
run_slow_tests
()
model_tester
.
run_slow_tests
()
...
...
pytorch_transformers/tests/modeling_tests_commons.py
deleted
100644 → 0
View file @
3821ecbf
# coding=utf-8
# Copyright 2019 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
copy
import
os
import
shutil
import
json
import
random
import
torch
def
_config_zero_init
(
config
):
configs_no_init
=
copy
.
deepcopy
(
config
)
for
key
in
configs_no_init
.
__dict__
.
keys
():
if
'_range'
in
key
or
'_std'
in
key
:
setattr
(
configs_no_init
,
key
,
0.0
)
return
configs_no_init
def
_create_and_check_torchscript_output_attentions
(
tester
,
model_classes
,
config
,
inputs_dict
):
config
.
output_attentions
=
True
_create_and_check_torchscript
(
tester
,
model_classes
,
config
,
inputs_dict
)
def
_create_and_check_torchscript_output_hidden_state
(
tester
,
model_classes
,
config
,
inputs_dict
):
config
.
output_hidden_states
=
True
_create_and_check_torchscript
(
tester
,
model_classes
,
config
,
inputs_dict
)
def
_create_and_check_torchscript
(
tester
,
model_classes
,
config
,
inputs_dict
):
configs_no_init
=
_config_zero_init
(
config
)
# To be sure we have no Nan
configs_no_init
.
torchscript
=
True
for
model_class
in
model_classes
:
model
=
model_class
(
config
=
configs_no_init
)
model
.
eval
()
inputs
=
inputs_dict
[
'input_ids'
]
# Let's keep only input_ids
try
:
torch
.
jit
.
trace
(
model
,
inputs
)
except
RuntimeError
:
tester
.
parent
.
fail
(
"Couldn't trace module."
)
try
:
traced_gpt2
=
torch
.
jit
.
trace
(
model
,
inputs
)
torch
.
jit
.
save
(
traced_gpt2
,
"traced_model.pt"
)
except
RuntimeError
:
tester
.
parent
.
fail
(
"Couldn't save module."
)
try
:
loaded_model
=
torch
.
jit
.
load
(
"traced_model.pt"
)
os
.
remove
(
"traced_model.pt"
)
except
ValueError
:
tester
.
parent
.
fail
(
"Couldn't load module."
)
model
.
eval
()
loaded_model
.
eval
()
model_params
=
model
.
parameters
()
loaded_model_params
=
loaded_model
.
parameters
()
models_equal
=
True
for
p1
,
p2
in
zip
(
model_params
,
loaded_model_params
):
if
p1
.
data
.
ne
(
p2
.
data
).
sum
()
>
0
:
models_equal
=
False
tester
.
parent
.
assertTrue
(
models_equal
)
def
_create_and_check_initialization
(
tester
,
model_classes
,
config
,
inputs_dict
):
configs_no_init
=
_config_zero_init
(
config
)
for
model_class
in
model_classes
:
model
=
model_class
(
config
=
configs_no_init
)
for
name
,
param
in
model
.
named_parameters
():
if
param
.
requires_grad
:
tester
.
parent
.
assertIn
(
param
.
data
.
mean
().
item
(),
[
0.0
,
1.0
],
msg
=
"Parameter {} of model {} seems not properly initialized"
.
format
(
name
,
model_class
))
def
_create_and_check_for_headmasking
(
tester
,
model_classes
,
config
,
inputs_dict
):
configs_no_init
=
_config_zero_init
(
config
)
# To be sure we have no Nan
for
model_class
in
model_classes
:
config
.
output_attentions
=
True
config
.
output_hidden_states
=
True
model
=
model_class
(
config
=
configs_no_init
)
model
.
eval
()
# Prepare head_mask
# Set require_grad after having prepared the tensor to avoid error (leaf variable has been moved into the graph interior)
head_mask
=
torch
.
ones
(
tester
.
num_hidden_layers
,
tester
.
num_attention_heads
)
head_mask
[
0
,
0
]
=
0
head_mask
[
-
1
,
:
-
1
]
=
0
head_mask
.
requires_grad_
(
requires_grad
=
True
)
inputs
=
inputs_dict
.
copy
()
inputs
[
'head_mask'
]
=
head_mask
outputs
=
model
(
**
inputs
)
# Test that we can get a gradient back for importance score computation
output
=
sum
(
t
.
sum
()
for
t
in
outputs
[
0
])
output
=
output
.
sum
()
output
.
backward
()
multihead_outputs
=
head_mask
.
grad
attentions
=
outputs
[
-
1
]
hidden_states
=
outputs
[
-
2
]
# Remove Nan
tester
.
parent
.
assertIsNotNone
(
multihead_outputs
)
tester
.
parent
.
assertEqual
(
len
(
multihead_outputs
),
tester
.
num_hidden_layers
)
tester
.
parent
.
assertAlmostEqual
(
attentions
[
0
][...,
0
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
tester
.
parent
.
assertNotEqual
(
attentions
[
0
][...,
-
1
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
tester
.
parent
.
assertNotEqual
(
attentions
[
1
][...,
0
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
tester
.
parent
.
assertAlmostEqual
(
attentions
[
-
1
][...,
-
2
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
tester
.
parent
.
assertNotEqual
(
attentions
[
-
1
][...,
-
1
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
def
_create_and_check_for_head_pruning
(
tester
,
model_classes
,
config
,
inputs_dict
):
for
model_class
in
model_classes
:
config
.
output_attentions
=
True
config
.
output_hidden_states
=
False
model
=
model_class
(
config
=
config
)
model
.
eval
()
heads_to_prune
=
{
0
:
list
(
range
(
1
,
tester
.
num_attention_heads
)),
-
1
:
[
0
]}
model
.
prune_heads
(
heads_to_prune
)
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
tester
.
parent
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
1
)
tester
.
parent
.
assertEqual
(
attentions
[
1
].
shape
[
-
3
],
tester
.
num_attention_heads
)
tester
.
parent
.
assertEqual
(
attentions
[
-
1
].
shape
[
-
3
],
tester
.
num_attention_heads
-
1
)
def
_create_and_check_for_attentions
(
tester
,
model_classes
,
config
,
inputs_dict
):
for
model_class
in
model_classes
:
config
.
output_attentions
=
True
config
.
output_hidden_states
=
False
model
=
model_class
(
config
)
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
tester
.
parent
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
tester
.
parent
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
tester
.
parent
.
assertEqual
(
len
(
attentions
),
tester
.
num_hidden_layers
)
tester
.
parent
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
[
tester
.
num_attention_heads
,
tester
.
seq_length
,
tester
.
key_len
if
hasattr
(
tester
,
'key_len'
)
else
tester
.
seq_length
])
out_len
=
len
(
outputs
)
# Check attention is always last and order is fine
config
.
output_attentions
=
True
config
.
output_hidden_states
=
True
model
=
model_class
(
config
)
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
tester
.
parent
.
assertEqual
(
out_len
+
1
,
len
(
outputs
))
tester
.
parent
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
tester
.
parent
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
attentions
=
outputs
[
-
1
]
tester
.
parent
.
assertEqual
(
len
(
attentions
),
tester
.
num_hidden_layers
)
tester
.
parent
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
[
tester
.
num_attention_heads
,
tester
.
seq_length
,
tester
.
key_len
if
hasattr
(
tester
,
'key_len'
)
else
tester
.
seq_length
])
def
_create_and_check_for_hidden_states
(
tester
,
model_classes
,
config
,
inputs_dict
):
for
model_class
in
model_classes
:
config
.
output_hidden_states
=
True
config
.
output_attentions
=
False
model
=
model_class
(
config
)
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
hidden_states
=
outputs
[
-
1
]
tester
.
parent
.
assertEqual
(
model
.
config
.
output_attentions
,
False
)
tester
.
parent
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
tester
.
parent
.
assertEqual
(
len
(
hidden_states
),
tester
.
num_hidden_layers
+
1
)
tester
.
parent
.
assertListEqual
(
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
tester
.
seq_length
,
tester
.
hidden_size
])
def
create_and_check_commons
(
tester
,
config
,
inputs_dict
,
test_pruning
=
True
,
test_torchscript
=
True
):
_create_and_check_initialization
(
tester
,
tester
.
all_model_classes
,
config
,
inputs_dict
)
_create_and_check_for_attentions
(
tester
,
tester
.
all_model_classes
,
config
,
inputs_dict
)
_create_and_check_for_headmasking
(
tester
,
tester
.
all_model_classes
,
config
,
inputs_dict
)
_create_and_check_for_hidden_states
(
tester
,
tester
.
all_model_classes
,
config
,
inputs_dict
)
if
test_torchscript
:
_create_and_check_torchscript
(
tester
,
tester
.
all_model_classes
,
config
,
inputs_dict
)
_create_and_check_torchscript_output_attentions
(
tester
,
tester
.
all_model_classes
,
config
,
inputs_dict
)
_create_and_check_torchscript_output_hidden_state
(
tester
,
tester
.
all_model_classes
,
config
,
inputs_dict
)
if
test_pruning
:
_create_and_check_for_head_pruning
(
tester
,
tester
.
all_model_classes
,
config
,
inputs_dict
)
def
ids_tensor
(
shape
,
vocab_size
,
rng
=
None
,
name
=
None
):
"""Creates a random int32 tensor of the shape within the vocab size."""
if
rng
is
None
:
rng
=
random
.
Random
()
total_dims
=
1
for
dim
in
shape
:
total_dims
*=
dim
values
=
[]
for
_
in
range
(
total_dims
):
values
.
append
(
rng
.
randint
(
0
,
vocab_size
-
1
))
return
torch
.
tensor
(
data
=
values
,
dtype
=
torch
.
long
).
view
(
shape
).
contiguous
()
class
ConfigTester
(
object
):
def
__init__
(
self
,
parent
,
config_class
=
None
,
**
kwargs
):
self
.
parent
=
parent
self
.
config_class
=
config_class
self
.
inputs_dict
=
kwargs
def
create_and_test_config_common_properties
(
self
):
config
=
self
.
config_class
(
**
self
.
inputs_dict
)
self
.
parent
.
assertTrue
(
hasattr
(
config
,
'hidden_size'
))
self
.
parent
.
assertTrue
(
hasattr
(
config
,
'num_attention_heads'
))
self
.
parent
.
assertTrue
(
hasattr
(
config
,
'num_hidden_layers'
))
def
create_and_test_config_to_json_string
(
self
):
config
=
self
.
config_class
(
**
self
.
inputs_dict
)
obj
=
json
.
loads
(
config
.
to_json_string
())
for
key
,
value
in
self
.
inputs_dict
.
items
():
self
.
parent
.
assertEqual
(
obj
[
key
],
value
)
def
create_and_test_config_to_json_file
(
self
):
config_first
=
self
.
config_class
(
**
self
.
inputs_dict
)
json_file_path
=
"/tmp/config.json"
config_first
.
to_json_file
(
json_file_path
)
config_second
=
self
.
config_class
.
from_json_file
(
json_file_path
)
os
.
remove
(
json_file_path
)
self
.
parent
.
assertEqual
(
config_second
.
to_dict
(),
config_first
.
to_dict
())
def
run_common_tests
(
self
):
self
.
create_and_test_config_common_properties
()
self
.
create_and_test_config_to_json_string
()
self
.
create_and_test_config_to_json_file
()
class
GPTModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_position_ids
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
n_special
=
1
,
n_positions
=
33
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
n_choices
=
3
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
scope
=
None
,
config_class
=
None
,
base_model_class
=
None
,
lm_head_model_class
=
None
,
double_head_model_class
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_position_ids
=
use_position_ids
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
n_special
=
n_special
self
.
n_positions
=
n_positions
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
n_choices
=
n_choices
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
scope
=
scope
self
.
config_class
=
config_class
self
.
base_model_class
=
base_model_class
self
.
lm_head_model_class
=
lm_head_model_class
self
.
double_head_model_class
=
double_head_model_class
self
.
all_model_classes
=
(
base_model_class
,
lm_head_model_class
,
double_head_model_class
)
def
prepare_config_and_inputs
(
self
):
total_num_tokens
=
self
.
vocab_size
+
self
.
n_special
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
total_num_tokens
)
position_ids
=
None
if
self
.
use_position_ids
:
position_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
self
.
n_positions
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
total_voc
=
self
.
vocab_size
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
total_voc
)
mc_labels
=
None
lm_labels
=
None
mc_token_ids
=
None
if
self
.
use_labels
:
mc_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
self
.
num_labels
)
mc_token_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
],
self
.
seq_length
)
config
=
self
.
config_class
(
vocab_size_or_config_json_file
=
self
.
vocab_size
,
n_special
=
self
.
n_special
,
n_positions
=
self
.
n_positions
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
initializer_range
=
self
.
initializer_range
)
return
(
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
)
def
create_and_check_base_model
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
):
model
=
self
.
base_model_class
(
config
)
model
.
eval
()
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
)
outputs
=
model
(
input_ids
,
position_ids
)
outputs
=
model
(
input_ids
)
hidden_state
=
outputs
[
0
]
self
.
parent
.
assertListEqual
(
list
(
hidden_state
.
size
()),
[
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
,
self
.
hidden_size
])
def
create_and_check_lm_head
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
):
model
=
self
.
lm_head_model_class
(
config
)
model
.
eval
()
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
,
lm_labels
)
loss
,
lm_logits
=
outputs
[:
2
]
total_voc
=
self
.
n_special
+
self
.
vocab_size
self
.
parent
.
assertListEqual
(
list
(
lm_logits
.
size
()),
[
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
,
total_voc
])
self
.
parent
.
assertListEqual
(
list
(
loss
.
size
()),
[])
def
create_and_check_presents
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
):
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
.
eval
()
outputs
=
model
(
input_ids
)
presents
=
outputs
[
-
1
]
self
.
parent
.
assertEqual
(
self
.
num_hidden_layers
,
len
(
presents
))
self
.
parent
.
assertListEqual
(
list
(
presents
[
0
].
size
()),
[
2
,
self
.
batch_size
*
self
.
n_choices
,
self
.
num_attention_heads
,
self
.
seq_length
,
self
.
hidden_size
//
self
.
num_attention_heads
])
def
create_and_check_double_heads
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
):
model
=
self
.
double_head_model_class
(
config
)
model
.
eval
()
outputs
=
model
(
input_ids
,
mc_token_ids
,
lm_labels
=
lm_labels
,
mc_labels
=
mc_labels
,
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
)
lm_loss
,
mc_loss
,
lm_logits
,
mc_logits
=
outputs
[:
4
]
loss
=
[
lm_loss
,
mc_loss
]
total_voc
=
self
.
n_special
+
self
.
vocab_size
self
.
parent
.
assertListEqual
(
list
(
lm_logits
.
size
()),
[
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
,
total_voc
])
self
.
parent
.
assertListEqual
(
list
(
mc_logits
.
size
()),
[
self
.
batch_size
,
self
.
n_choices
])
self
.
parent
.
assertListEqual
(
[
list
(
l
.
size
())
for
l
in
loss
],
[[],
[]])
def
create_and_check_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/pytorch_transformers_test/"
for
model_name
in
list
(
self
.
base_model_class
.
pretrained_model_archive_map
.
keys
())[:
1
]:
model
=
self
.
base_model_class
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
parent
.
assertIsNotNone
(
model
)
def
create_and_check_commons
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
):
inputs_dict
=
{
'input_ids'
:
input_ids
}
create_and_check_commons
(
self
,
config
,
inputs_dict
)
def
run_common_tests
(
self
,
test_presents
=
False
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
self
.
create_and_check_base_model
(
*
config_and_inputs
)
config_and_inputs
=
self
.
prepare_config_and_inputs
()
self
.
create_and_check_lm_head
(
*
config_and_inputs
)
config_and_inputs
=
self
.
prepare_config_and_inputs
()
self
.
create_and_check_double_heads
(
*
config_and_inputs
)
if
test_presents
:
config_and_inputs
=
self
.
prepare_config_and_inputs
()
self
.
create_and_check_presents
(
*
config_and_inputs
)
config_and_inputs
=
self
.
prepare_config_and_inputs
()
self
.
create_and_check_commons
(
*
config_and_inputs
)
def
run_slow_tests
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
self
.
create_and_check_model_from_pretrained
(
*
config_and_inputs
)
pytorch_transformers/tests/modeling_transfo_xl_test.py
View file @
292140b9
...
@@ -28,9 +28,15 @@ import torch
...
@@ -28,9 +28,15 @@ import torch
from
pytorch_transformers
import
(
TransfoXLConfig
,
TransfoXLModel
,
TransfoXLLMHeadModel
)
from
pytorch_transformers
import
(
TransfoXLConfig
,
TransfoXLModel
,
TransfoXLLMHeadModel
)
from
pytorch_transformers.modeling_transfo_xl
import
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
from
pytorch_transformers.modeling_transfo_xl
import
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
from
.modeling_tests_commons
import
ConfigTester
,
create_and_check_commons
,
ids_tensor
from
.modeling_common_test
import
ConfigTester
,
CommonTestCases
,
ids_tensor
class
TransfoXLModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
TransfoXLModel
,
TransfoXLLMHeadModel
)
test_pruning
=
False
test_torchscript
=
False
test_resize_embeddings
=
False
class
TransfoXLModelTest
(
unittest
.
TestCase
):
class
TransfoXLModelTester
(
object
):
class
TransfoXLModelTester
(
object
):
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -52,7 +58,6 @@ class TransfoXLModelTest(unittest.TestCase):
...
@@ -52,7 +58,6 @@ class TransfoXLModelTest(unittest.TestCase):
num_hidden_layers
=
5
,
num_hidden_layers
=
5
,
scope
=
None
,
scope
=
None
,
seed
=
1
,
seed
=
1
,
all_model_classes
=
(
TransfoXLModel
,
TransfoXLLMHeadModel
),
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
batch_size
=
batch_size
...
@@ -73,7 +78,6 @@ class TransfoXLModelTest(unittest.TestCase):
...
@@ -73,7 +78,6 @@ class TransfoXLModelTest(unittest.TestCase):
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_hidden_layers
=
num_hidden_layers
self
.
scope
=
scope
self
.
scope
=
scope
self
.
seed
=
seed
self
.
seed
=
seed
self
.
all_model_classes
=
all_model_classes
def
prepare_config_and_inputs
(
self
):
def
prepare_config_and_inputs
(
self
):
input_ids_1
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_ids_1
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
...
@@ -171,16 +175,31 @@ class TransfoXLModelTest(unittest.TestCase):
...
@@ -171,16 +175,31 @@ class TransfoXLModelTest(unittest.TestCase):
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_2"
]),
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_2"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
)
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
)
def
create_and_check_transfo_xl_commons
(
self
,
config
,
input_ids_1
,
input_ids_2
,
lm_labels
):
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids_1
,
input_ids_2
,
lm_labels
)
=
config_and_inputs
inputs_dict
=
{
'input_ids'
:
input_ids_1
}
inputs_dict
=
{
'input_ids'
:
input_ids_1
}
create_and_check_commons
(
self
,
config
,
inputs_dict
,
test_pruning
=
False
,
test_torchscript
=
False
)
return
config
,
inputs_dict
def
test_default
(
self
):
def
setUp
(
self
):
self
.
run_tester
(
TransfoXLModelTest
.
TransfoXLModelTester
(
self
))
self
.
model_tester
=
TransfoXLModelTest
.
TransfoXLModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
TransfoXLConfig
,
d_embed
=
37
)
def
test_config
(
self
):
def
test_config
(
self
):
config_tester
=
ConfigTester
(
self
,
config_class
=
TransfoXLConfig
,
d_embed
=
37
)
self
.
config_tester
.
run_common_tests
()
config_tester
.
run_common_tests
()
def
test_transfo_xl_model
(
self
):
self
.
model_tester
.
set_seed
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
output_result
=
self
.
model_tester
.
create_transfo_xl_model
(
*
config_and_inputs
)
self
.
model_tester
.
check_transfo_xl_model_output
(
output_result
)
def
test_transfo_xl_lm_head
(
self
):
self
.
model_tester
.
set_seed
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
output_result
=
self
.
model_tester
.
create_transfo_xl_lm_head
(
*
config_and_inputs
)
self
.
model_tester
.
check_transfo_xl_lm_head_output
(
output_result
)
@
pytest
.
mark
.
slow
@
pytest
.
mark
.
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
...
@@ -190,23 +209,6 @@ class TransfoXLModelTest(unittest.TestCase):
...
@@ -190,23 +209,6 @@ class TransfoXLModelTest(unittest.TestCase):
shutil
.
rmtree
(
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
def
run_tester
(
self
,
tester
):
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
tester
.
set_seed
()
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
output_result
=
tester
.
create_transfo_xl_model
(
*
config_and_inputs
)
tester
.
check_transfo_xl_model_output
(
output_result
)
tester
.
set_seed
()
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
output_result
=
tester
.
create_transfo_xl_lm_head
(
*
config_and_inputs
)
tester
.
check_transfo_xl_lm_head_output
(
output_result
)
tester
.
set_seed
()
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
tester
.
create_and_check_transfo_xl_commons
(
*
config_and_inputs
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
pytorch_transformers/tests/modeling_utils_test.py
deleted
100644 → 0
View file @
3821ecbf
# coding=utf-8
# Copyright 2018 HuggingFace Inc..
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
unittest
import
logging
from
pytorch_transformers
import
PretrainedConfig
,
PreTrainedModel
from
pytorch_transformers.modeling_bert
import
BertModel
,
BertConfig
,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
class
ModelUtilsTest
(
unittest
.
TestCase
):
def
test_model_from_pretrained
(
self
):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
for
model_name
in
list
(
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
config
=
BertConfig
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
config
)
self
.
assertIsInstance
(
config
,
PretrainedConfig
)
model
=
BertModel
.
from_pretrained
(
model_name
)
model
,
loading_info
=
BertModel
.
from_pretrained
(
model_name
,
output_loading_info
=
True
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsInstance
(
model
,
PreTrainedModel
)
for
value
in
loading_info
.
values
():
self
.
assertEqual
(
len
(
value
),
0
)
config
=
BertConfig
.
from_pretrained
(
model_name
,
output_attentions
=
True
,
output_hidden_states
=
True
)
model
=
BertModel
.
from_pretrained
(
model_name
,
output_attentions
=
True
,
output_hidden_states
=
True
)
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
model
.
config
,
config
)
if
__name__
==
"__main__"
:
unittest
.
main
()
pytorch_transformers/tests/modeling_xlm_test.py
View file @
292140b9
...
@@ -23,10 +23,15 @@ import pytest
...
@@ -23,10 +23,15 @@ import pytest
from
pytorch_transformers
import
(
XLMConfig
,
XLMModel
,
XLMWithLMHeadModel
,
XLMForQuestionAnswering
,
XLMForSequenceClassification
)
from
pytorch_transformers
import
(
XLMConfig
,
XLMModel
,
XLMWithLMHeadModel
,
XLMForQuestionAnswering
,
XLMForSequenceClassification
)
from
pytorch_transformers.modeling_xlm
import
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
from
pytorch_transformers.modeling_xlm
import
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
from
.modeling_
tests_
common
s
import
(
create_and_check_common
s
,
ConfigTester
,
ids_tensor
)
from
.modeling_common
_test
import
(
CommonTestCase
s
,
ConfigTester
,
ids_tensor
)
class
XLMModelTest
(
unittest
.
TestCase
):
class
XLMModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
XLMModel
,
XLMWithLMHeadModel
,
XLMForQuestionAnswering
,
XLMForSequenceClassification
)
# , XLMForSequenceClassification, XLMForTokenClassification),
class
XLMModelTester
(
object
):
class
XLMModelTester
(
object
):
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -58,8 +63,6 @@ class XLMModelTest(unittest.TestCase):
...
@@ -58,8 +63,6 @@ class XLMModelTest(unittest.TestCase):
summary_type
=
"last"
,
summary_type
=
"last"
,
use_proj
=
True
,
use_proj
=
True
,
scope
=
None
,
scope
=
None
,
all_model_classes
=
(
XLMModel
,
XLMWithLMHeadModel
,
XLMForQuestionAnswering
,
XLMForSequenceClassification
),
# , XLMForSequenceClassification, XLMForTokenClassification),
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
batch_size
=
batch_size
...
@@ -90,7 +93,6 @@ class XLMModelTest(unittest.TestCase):
...
@@ -90,7 +93,6 @@ class XLMModelTest(unittest.TestCase):
self
.
num_labels
=
num_labels
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
num_choices
=
num_choices
self
.
scope
=
scope
self
.
scope
=
scope
self
.
all_model_classes
=
all_model_classes
def
prepare_config_and_inputs
(
self
):
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
...
@@ -237,28 +239,23 @@ class XLMModelTest(unittest.TestCase):
...
@@ -237,28 +239,23 @@ class XLMModelTest(unittest.TestCase):
[
self
.
batch_size
,
self
.
type_sequence_label_size
])
[
self
.
batch_size
,
self
.
type_sequence_label_size
])
def
create_and_check_xlm_commons
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
):
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
)
=
config_and_inputs
inputs_dict
=
{
'input_ids'
:
input_ids
,
'token_type_ids'
:
token_type_ids
,
'lengths'
:
input_lengths
}
inputs_dict
=
{
'input_ids'
:
input_ids
,
'token_type_ids'
:
token_type_ids
,
'lengths'
:
input_lengths
}
c
re
ate_and_check_commons
(
self
,
config
,
inputs_dict
)
re
turn
config
,
inputs_dict
def
test_default
(
self
):
def
setUp
(
self
):
self
.
run_tester
(
XLMModelTest
.
XLMModelTester
(
self
))
self
.
model_tester
=
XLMModelTest
.
XLMModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
XLMConfig
,
emb_dim
=
37
)
def
test_config
(
self
):
def
test_config
(
self
):
config_tester
=
ConfigTester
(
self
,
config_class
=
XLMConfig
,
emb_dim
=
37
)
self
.
config_tester
.
run_common_tests
()
config_tester
.
run_common_tests
()
@
pytest
.
mark
.
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/pytorch_transformers_test/"
for
model_name
in
list
(
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
XLMModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
def
run_tester
(
self
,
tester
):
def
test_xlm_model
(
self
):
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_
tester
.
prepare_config_and_inputs
()
tester
.
create_and_check_xlm_model
(
*
config_and_inputs
)
self
.
model_
tester
.
create_and_check_xlm_model
(
*
config_and_inputs
)
# config_and_inputs = tester.prepare_config_and_inputs()
# config_and_inputs = tester.prepare_config_and_inputs()
# tester.create_and_check_xlm_for_masked_lm(*config_and_inputs)
# tester.create_and_check_xlm_for_masked_lm(*config_and_inputs)
...
@@ -275,8 +272,14 @@ class XLMModelTest(unittest.TestCase):
...
@@ -275,8 +272,14 @@ class XLMModelTest(unittest.TestCase):
# config_and_inputs = tester.prepare_config_and_inputs()
# config_and_inputs = tester.prepare_config_and_inputs()
# tester.create_and_check_xlm_for_token_classification(*config_and_inputs)
# tester.create_and_check_xlm_for_token_classification(*config_and_inputs)
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
@
pytest
.
mark
.
slow
tester
.
create_and_check_xlm_commons
(
*
config_and_inputs
)
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/pytorch_transformers_test/"
for
model_name
in
list
(
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
XLMModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
pytorch_transformers/tests/modeling_xlnet_test.py
View file @
292140b9
...
@@ -28,9 +28,14 @@ import torch
...
@@ -28,9 +28,14 @@ import torch
from
pytorch_transformers
import
(
XLNetConfig
,
XLNetModel
,
XLNetLMHeadModel
,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
)
from
pytorch_transformers
import
(
XLNetConfig
,
XLNetModel
,
XLNetLMHeadModel
,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
)
from
pytorch_transformers.modeling_xlnet
import
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
from
pytorch_transformers.modeling_xlnet
import
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
from
.modeling_tests_commons
import
ConfigTester
,
create_and_check_commons
,
ids_tensor
from
.modeling_common_test
import
ConfigTester
,
CommonTestCases
,
ids_tensor
class
XLNetModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
XLNetModel
,
XLNetLMHeadModel
,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
)
test_pruning
=
False
class
XLNetModelTest
(
unittest
.
TestCase
):
class
XLNetModelTester
(
object
):
class
XLNetModelTester
(
object
):
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -56,8 +61,6 @@ class XLNetModelTest(unittest.TestCase):
...
@@ -56,8 +61,6 @@ class XLNetModelTest(unittest.TestCase):
initializer_range
=
0.05
,
initializer_range
=
0.05
,
seed
=
1
,
seed
=
1
,
type_vocab_size
=
2
,
type_vocab_size
=
2
,
all_model_classes
=
(
XLNetModel
,
XLNetLMHeadModel
,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
),
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
batch_size
=
batch_size
...
@@ -82,7 +85,6 @@ class XLNetModelTest(unittest.TestCase):
...
@@ -82,7 +85,6 @@ class XLNetModelTest(unittest.TestCase):
self
.
seed
=
seed
self
.
seed
=
seed
self
.
type_vocab_size
=
type_vocab_size
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
all_model_classes
=
all_model_classes
def
prepare_config_and_inputs
(
self
):
def
prepare_config_and_inputs
(
self
):
input_ids_1
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_ids_1
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
...
@@ -264,17 +266,41 @@ class XLNetModelTest(unittest.TestCase):
...
@@ -264,17 +266,41 @@ class XLNetModelTest(unittest.TestCase):
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_1"
]),
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_1"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
)
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
)
def
create_and_check_xlnet_commons
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
def
prepare_config_and_inputs_for_common
(
self
):
target_mapping
,
inp_q
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
inp_q
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
)
=
config_and_inputs
inputs_dict
=
{
'input_ids'
:
input_ids_1
}
inputs_dict
=
{
'input_ids'
:
input_ids_1
}
create_and_check_commons
(
self
,
config
,
inputs_dict
,
test_pruning
=
False
)
return
config
,
inputs_dict
def
test_default
(
self
):
def
setUp
(
self
):
self
.
run_tester
(
XLNetModelTest
.
XLNetModelTester
(
self
))
self
.
model_tester
=
XLNetModelTest
.
XLNetModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
XLNetConfig
,
d_inner
=
37
)
def
test_config
(
self
):
def
test_config
(
self
):
config_tester
=
ConfigTester
(
self
,
config_class
=
XLNetConfig
,
d_inner
=
37
)
self
.
config_tester
.
run_common_tests
()
config_tester
.
run_common_tests
()
def
test_xlnet_base_model
(
self
):
self
.
model_tester
.
set_seed
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_xlnet_base_model
(
*
config_and_inputs
)
def
test_xlnet_lm_head
(
self
):
self
.
model_tester
.
set_seed
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_xlnet_lm_head
(
*
config_and_inputs
)
def
test_xlnet_sequence_classif
(
self
):
self
.
model_tester
.
set_seed
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_xlnet_sequence_classif
(
*
config_and_inputs
)
def
test_xlnet_qa
(
self
):
self
.
model_tester
.
set_seed
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_xlnet_qa
(
*
config_and_inputs
)
@
pytest
.
mark
.
slow
@
pytest
.
mark
.
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
...
@@ -284,27 +310,6 @@ class XLNetModelTest(unittest.TestCase):
...
@@ -284,27 +310,6 @@ class XLNetModelTest(unittest.TestCase):
shutil
.
rmtree
(
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
def
run_tester
(
self
,
tester
):
tester
.
set_seed
()
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
tester
.
create_and_check_xlnet_base_model
(
*
config_and_inputs
)
tester
.
set_seed
()
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
tester
.
create_and_check_xlnet_lm_head
(
*
config_and_inputs
)
tester
.
set_seed
()
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
tester
.
create_and_check_xlnet_sequence_classif
(
*
config_and_inputs
)
tester
.
set_seed
()
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
tester
.
create_and_check_xlnet_qa
(
*
config_and_inputs
)
tester
.
set_seed
()
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
tester
.
create_and_check_xlnet_commons
(
*
config_and_inputs
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment