Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
2918b7d2
Commit
2918b7d2
authored
Jul 12, 2019
by
thomwolf
Browse files
updating tests
parent
3fbceed8
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
701 additions
and
625 deletions
+701
-625
pytorch_transformers/modeling_bert.py
pytorch_transformers/modeling_bert.py
+5
-10
pytorch_transformers/modeling_gpt2.py
pytorch_transformers/modeling_gpt2.py
+5
-10
pytorch_transformers/modeling_openai.py
pytorch_transformers/modeling_openai.py
+5
-10
pytorch_transformers/modeling_transfo_xl.py
pytorch_transformers/modeling_transfo_xl.py
+15
-4
pytorch_transformers/modeling_utils.py
pytorch_transformers/modeling_utils.py
+39
-8
pytorch_transformers/modeling_xlm.py
pytorch_transformers/modeling_xlm.py
+6
-4
pytorch_transformers/modeling_xlnet.py
pytorch_transformers/modeling_xlnet.py
+7
-6
pytorch_transformers/tests/modeling_bert_test.py
pytorch_transformers/tests/modeling_bert_test.py
+47
-39
pytorch_transformers/tests/modeling_common_test.py
pytorch_transformers/tests/modeling_common_test.py
+471
-437
pytorch_transformers/tests/modeling_gpt2_test.py
pytorch_transformers/tests/modeling_gpt2_test.py
+4
-9
pytorch_transformers/tests/modeling_openai_test.py
pytorch_transformers/tests/modeling_openai_test.py
+3
-4
pytorch_transformers/tests/modeling_transfo_xl_test.py
pytorch_transformers/tests/modeling_transfo_xl_test.py
+29
-27
pytorch_transformers/tests/modeling_xlm_test.py
pytorch_transformers/tests/modeling_xlm_test.py
+27
-24
pytorch_transformers/tests/modeling_xlnet_test.py
pytorch_transformers/tests/modeling_xlnet_test.py
+38
-33
No files found.
pytorch_transformers/modeling_bert.py
View file @
2918b7d2
...
@@ -617,6 +617,7 @@ class BertModel(BertPreTrainedModel):
...
@@ -617,6 +617,7 @@ class BertModel(BertPreTrainedModel):
old_embeddings
=
self
.
embeddings
.
word_embeddings
old_embeddings
=
self
.
embeddings
.
word_embeddings
new_embeddings
=
self
.
_get_resized_embeddings
(
old_embeddings
,
new_num_tokens
)
new_embeddings
=
self
.
_get_resized_embeddings
(
old_embeddings
,
new_num_tokens
)
self
.
embeddings
.
word_embeddings
=
new_embeddings
self
.
embeddings
.
word_embeddings
=
new_embeddings
return
self
.
embeddings
.
word_embeddings
def
_prune_heads
(
self
,
heads_to_prune
):
def
_prune_heads
(
self
,
heads_to_prune
):
""" Prunes heads of the model.
""" Prunes heads of the model.
...
@@ -758,11 +759,8 @@ class BertForPreTraining(BertPreTrainedModel):
...
@@ -758,11 +759,8 @@ class BertForPreTraining(BertPreTrainedModel):
""" Make sure we are sharing the input and output embeddings.
""" Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
"""
"""
input_embeddings
=
self
.
bert
.
embeddings
.
word_embeddings
.
weight
self
.
_tie_or_clone_weights
(
self
.
cls
.
predictions
.
decoder
,
if
self
.
config
.
torchscript
:
self
.
bert
.
embeddings
.
word_embeddings
)
self
.
cls
.
predictions
.
decoder
.
weight
=
nn
.
Parameter
(
input_embeddings
.
clone
())
else
:
self
.
cls
.
predictions
.
decoder
.
weight
=
input_embeddings
# Tied weights
def
forward
(
self
,
input_ids
,
token_type_ids
=
None
,
attention_mask
=
None
,
masked_lm_labels
=
None
,
def
forward
(
self
,
input_ids
,
token_type_ids
=
None
,
attention_mask
=
None
,
masked_lm_labels
=
None
,
next_sentence_label
=
None
,
head_mask
=
None
):
next_sentence_label
=
None
,
head_mask
=
None
):
...
@@ -864,11 +862,8 @@ class BertForMaskedLM(BertPreTrainedModel):
...
@@ -864,11 +862,8 @@ class BertForMaskedLM(BertPreTrainedModel):
""" Make sure we are sharing the input and output embeddings.
""" Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
"""
"""
input_embeddings
=
self
.
bert
.
embeddings
.
word_embeddings
.
weight
self
.
_tie_or_clone_weights
(
self
.
cls
.
predictions
.
decoder
,
if
self
.
config
.
torchscript
:
self
.
bert
.
embeddings
.
word_embeddings
)
self
.
cls
.
predictions
.
decoder
.
weight
=
nn
.
Parameter
(
input_embeddings
.
clone
())
else
:
self
.
cls
.
predictions
.
decoder
.
weight
=
input_embeddings
# Tied weights
def
forward
(
self
,
input_ids
,
token_type_ids
=
None
,
attention_mask
=
None
,
masked_lm_labels
=
None
,
head_mask
=
None
):
def
forward
(
self
,
input_ids
,
token_type_ids
=
None
,
attention_mask
=
None
,
masked_lm_labels
=
None
,
head_mask
=
None
):
"""
"""
...
...
pytorch_transformers/modeling_gpt2.py
View file @
2918b7d2
...
@@ -414,6 +414,7 @@ class GPT2Model(GPT2PreTrainedModel):
...
@@ -414,6 +414,7 @@ class GPT2Model(GPT2PreTrainedModel):
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
self
.
wte
=
self
.
_get_resized_embeddings
(
self
.
wte
,
new_num_tokens
)
self
.
wte
=
self
.
_get_resized_embeddings
(
self
.
wte
,
new_num_tokens
)
return
self
.
wte
def
_prune_heads
(
self
,
heads_to_prune
):
def
_prune_heads
(
self
,
heads_to_prune
):
""" Prunes heads of the model.
""" Prunes heads of the model.
...
@@ -562,11 +563,8 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
...
@@ -562,11 +563,8 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
""" Make sure we are sharing the input and output embeddings.
""" Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
"""
"""
input_embeddings
=
self
.
transformer
.
wte
.
weight
self
.
_tie_or_clone_weights
(
self
.
lm_head
,
if
self
.
config
.
torchscript
:
self
.
transformer
.
wte
)
self
.
lm_head
.
weight
=
nn
.
Parameter
(
input_embeddings
.
clone
())
else
:
self
.
lm_head
.
weight
=
input_embeddings
# Tied weights
def
forward
(
self
,
input_ids
,
position_ids
=
None
,
token_type_ids
=
None
,
lm_labels
=
None
,
past
=
None
,
head_mask
=
None
):
def
forward
(
self
,
input_ids
,
position_ids
=
None
,
token_type_ids
=
None
,
lm_labels
=
None
,
past
=
None
,
head_mask
=
None
):
"""
"""
...
@@ -658,11 +656,8 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
...
@@ -658,11 +656,8 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
""" Make sure we are sharing the input and output embeddings.
""" Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
"""
"""
input_embeddings
=
self
.
transformer
.
wte
.
weight
self
.
_tie_or_clone_weights
(
self
.
lm_head
,
if
self
.
config
.
torchscript
:
self
.
transformer
.
wte
)
self
.
lm_head
.
weight
=
nn
.
Parameter
(
input_embeddings
.
clone
())
else
:
self
.
lm_head
.
weight
=
input_embeddings
# Tied weights
def
forward
(
self
,
input_ids
,
mc_token_ids
=
None
,
lm_labels
=
None
,
mc_labels
=
None
,
token_type_ids
=
None
,
def
forward
(
self
,
input_ids
,
mc_token_ids
=
None
,
lm_labels
=
None
,
mc_labels
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
past
=
None
,
head_mask
=
None
):
position_ids
=
None
,
past
=
None
,
head_mask
=
None
):
...
...
pytorch_transformers/modeling_openai.py
View file @
2918b7d2
...
@@ -430,6 +430,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
...
@@ -430,6 +430,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
self
.
tokens_embed
=
self
.
_get_resized_embeddings
(
self
.
tokens_embed
,
new_num_tokens
)
self
.
tokens_embed
=
self
.
_get_resized_embeddings
(
self
.
tokens_embed
,
new_num_tokens
)
return
self
.
tokens_embed
def
_prune_heads
(
self
,
heads_to_prune
):
def
_prune_heads
(
self
,
heads_to_prune
):
""" Prunes heads of the model.
""" Prunes heads of the model.
...
@@ -583,11 +584,8 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
...
@@ -583,11 +584,8 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
""" Make sure we are sharing the input and output embeddings.
""" Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
"""
"""
input_embeddings
=
self
.
transformer
.
tokens_embed
.
weight
self
.
_tie_or_clone_weights
(
self
.
lm_head
,
if
self
.
config
.
torchscript
:
self
.
transformer
.
tokens_embed
)
self
.
lm_head
.
weight
=
nn
.
Parameter
(
input_embeddings
.
clone
())
else
:
self
.
lm_head
.
weight
=
input_embeddings
# Tied weights
def
forward
(
self
,
input_ids
,
position_ids
=
None
,
token_type_ids
=
None
,
lm_labels
=
None
,
head_mask
=
None
):
def
forward
(
self
,
input_ids
,
position_ids
=
None
,
token_type_ids
=
None
,
lm_labels
=
None
,
head_mask
=
None
):
"""
"""
...
@@ -696,11 +694,8 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
...
@@ -696,11 +694,8 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
""" Make sure we are sharing the input and output embeddings.
""" Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
"""
"""
input_embeddings
=
self
.
transformer
.
tokens_embed
.
weight
self
.
_tie_or_clone_weights
(
self
.
lm_head
,
if
self
.
config
.
torchscript
:
self
.
transformer
.
tokens_embed
)
self
.
lm_head
.
weight
=
nn
.
Parameter
(
input_embeddings
.
clone
())
else
:
self
.
lm_head
.
weight
=
input_embeddings
# Tied weights
def
forward
(
self
,
input_ids
,
mc_token_ids
=
None
,
lm_labels
=
None
,
mc_labels
=
None
,
token_type_ids
=
None
,
def
forward
(
self
,
input_ids
,
mc_token_ids
=
None
,
lm_labels
=
None
,
mc_labels
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
):
position_ids
=
None
,
head_mask
=
None
):
...
...
pytorch_transformers/modeling_transfo_xl.py
View file @
2918b7d2
...
@@ -291,6 +291,10 @@ class TransfoXLConfig(PretrainedConfig):
...
@@ -291,6 +291,10 @@ class TransfoXLConfig(PretrainedConfig):
def
vocab_size
(
self
):
def
vocab_size
(
self
):
return
self
.
n_token
return
self
.
n_token
@
vocab_size
.
setter
def
vocab_size
(
self
,
value
):
self
.
n_token
=
value
@
property
@
property
def
hidden_size
(
self
):
def
hidden_size
(
self
):
return
self
.
d_model
return
self
.
d_model
...
@@ -1003,7 +1007,7 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
...
@@ -1003,7 +1007,7 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
self
.
apply
(
self
.
init_weights
)
self
.
apply
(
self
.
init_weights
)
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
r
aise
NotImplementedError
r
eturn
self
.
word_emb
def
backward_compatible
(
self
):
def
backward_compatible
(
self
):
self
.
sample_softmax
=
-
1
self
.
sample_softmax
=
-
1
...
@@ -1280,13 +1284,20 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
...
@@ -1280,13 +1284,20 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
else
:
else
:
if
self
.
config
.
tie_weight
:
if
self
.
config
.
tie_weight
:
for
i
in
range
(
len
(
self
.
crit
.
out_layers
)):
for
i
in
range
(
len
(
self
.
crit
.
out_layers
)):
self
.
crit
.
out_layers
[
i
].
weight
=
self
.
transformer
.
word_emb
.
emb_layers
[
i
].
weight
self
.
_tie_or_clone_weights
(
self
.
crit
.
out_layers
[
i
],
self
.
transformer
.
word_emb
.
emb_layers
[
i
])
if
self
.
config
.
tie_projs
:
if
self
.
config
.
tie_projs
:
for
i
,
tie_proj
in
enumerate
(
self
.
config
.
tie_projs
):
for
i
,
tie_proj
in
enumerate
(
self
.
config
.
tie_projs
):
if
tie_proj
and
self
.
config
.
div_val
==
1
and
self
.
config
.
d_model
!=
self
.
config
.
d_embed
:
if
tie_proj
and
self
.
config
.
div_val
==
1
and
self
.
config
.
d_model
!=
self
.
config
.
d_embed
:
self
.
crit
.
out_projs
[
i
]
=
self
.
transformer
.
word_emb
.
emb_projs
[
0
]
if
self
.
config
.
torchscript
:
self
.
crit
.
out_projs
[
i
]
=
nn
.
Parameter
(
self
.
transformer
.
word_emb
.
emb_projs
[
0
].
clone
())
else
:
self
.
crit
.
out_projs
[
i
]
=
self
.
transformer
.
word_emb
.
emb_projs
[
0
]
elif
tie_proj
and
self
.
config
.
div_val
!=
1
:
elif
tie_proj
and
self
.
config
.
div_val
!=
1
:
self
.
crit
.
out_projs
[
i
]
=
self
.
transformer
.
word_emb
.
emb_projs
[
i
]
if
self
.
config
.
torchscript
:
self
.
crit
.
out_projs
[
i
]
=
nn
.
Parameter
(
self
.
transformer
.
word_emb
.
emb_projs
[
i
].
clone
())
else
:
self
.
crit
.
out_projs
[
i
]
=
self
.
transformer
.
word_emb
.
emb_projs
[
i
]
def
reset_length
(
self
,
tgt_len
,
ext_len
,
mem_len
):
def
reset_length
(
self
,
tgt_len
,
ext_len
,
mem_len
):
self
.
transformer
.
reset_length
(
tgt_len
,
ext_len
,
mem_len
)
self
.
transformer
.
reset_length
(
tgt_len
,
ext_len
,
mem_len
)
...
...
pytorch_transformers/modeling_utils.py
View file @
2918b7d2
...
@@ -165,9 +165,27 @@ class PreTrainedModel(nn.Module):
...
@@ -165,9 +165,27 @@ class PreTrainedModel(nn.Module):
# Save config in model
# Save config in model
self
.
config
=
config
self
.
config
=
config
def
_get_resized_embeddings
(
self
,
old_embeddings
,
new_num_tokens
):
def
_get_resized_embeddings
(
self
,
old_embeddings
,
new_num_tokens
=
None
):
# Build new embeddings
""" Build a resized Embedding Module from a provided token Embedding Module.
Increasing the size will add newly initialized vectors at the end
Reducing the size will remove vectors from the end
Args:
new_num_tokens: (Optional) New number of tokens in the embedding matrix.
Increasing the size will add newly initialized vectors at the end
Reducing the size will remove vectors from the end
If not provided or None: return the provided token Embedding Module.
Return:
Pointer to the resized Embedding Module or the old Embedding Module if new_num_tokens is None
"""
if
new_num_tokens
is
None
:
return
old_embeddings
old_num_tokens
,
old_embedding_dim
=
old_embeddings
.
weight
.
size
()
old_num_tokens
,
old_embedding_dim
=
old_embeddings
.
weight
.
size
()
if
old_num_tokens
==
new_num_tokens
:
return
old_embeddings
# Build new embeddings
new_embeddings
=
nn
.
Embedding
(
new_num_tokens
,
old_embedding_dim
)
new_embeddings
=
nn
.
Embedding
(
new_num_tokens
,
old_embedding_dim
)
new_embeddings
.
to
(
old_embeddings
.
weight
.
device
)
new_embeddings
.
to
(
old_embeddings
.
weight
.
device
)
...
@@ -180,18 +198,29 @@ class PreTrainedModel(nn.Module):
...
@@ -180,18 +198,29 @@ class PreTrainedModel(nn.Module):
return
new_embeddings
return
new_embeddings
def
resize_token_embeddings
(
self
,
new_num_tokens
):
def
_tie_or_clone_weights
(
self
,
first_module
,
second_module
):
""" Resize input token embeddings matrix.
""" Tie or clone module weights depending of weither we are using TorchScript or not
"""
if
self
.
config
.
torchscript
:
first_module
.
weight
=
nn
.
Parameter
(
second_module
.
weight
.
clone
())
else
:
first_module
.
weight
=
second_module
.
weight
def
resize_token_embeddings
(
self
,
new_num_tokens
=
None
):
""" Resize input token embeddings matrix of the model if new_num_tokens != config.vocab_size.
Args:
Args:
new_num_tokens: New number of tokens in the embedding matrix.
new_num_tokens:
(Optional)
New number of tokens in the embedding matrix.
Increasing the size will add newly initialized vectors at the end
Increasing the size will add newly initialized vectors at the end
Reducing the size will remove vectors from the end
Reducing the size will remove vectors from the end
If not provided or None: does nothing.
Return:
Pointer to the input tokens Embedding Module of the model
"""
"""
if
new_num_tokens
==
self
.
config
.
vocab_size
:
return
base_model
=
getattr
(
self
,
self
.
base_model_prefix
,
self
)
# get the base model if needed
base_model
=
getattr
(
self
,
self
.
base_model_prefix
,
self
)
# get the base model if needed
base_model
.
_resize_token_embeddings
(
new_num_tokens
)
model_embeds
=
base_model
.
_resize_token_embeddings
(
new_num_tokens
)
if
new_num_tokens
is
None
:
return
model_embeds
# Update base model and current model config
# Update base model and current model config
self
.
config
.
vocab_size
=
new_num_tokens
self
.
config
.
vocab_size
=
new_num_tokens
...
@@ -201,6 +230,8 @@ class PreTrainedModel(nn.Module):
...
@@ -201,6 +230,8 @@ class PreTrainedModel(nn.Module):
if
hasattr
(
self
,
'tie_weights'
):
if
hasattr
(
self
,
'tie_weights'
):
self
.
tie_weights
()
self
.
tie_weights
()
return
model_embeds
def
prune_heads
(
self
,
heads_to_prune
):
def
prune_heads
(
self
,
heads_to_prune
):
""" Prunes heads of the base model.
""" Prunes heads of the base model.
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
...
...
pytorch_transformers/modeling_xlm.py
View file @
2918b7d2
...
@@ -184,6 +184,10 @@ class XLMConfig(PretrainedConfig):
...
@@ -184,6 +184,10 @@ class XLMConfig(PretrainedConfig):
def
vocab_size
(
self
):
def
vocab_size
(
self
):
return
self
.
n_words
return
self
.
n_words
@
vocab_size
.
setter
def
vocab_size
(
self
,
value
):
self
.
n_words
=
value
@
property
@
property
def
hidden_size
(
self
):
def
hidden_size
(
self
):
return
self
.
emb_dim
return
self
.
emb_dim
...
@@ -479,6 +483,7 @@ class XLMModel(XLMPreTrainedModel):
...
@@ -479,6 +483,7 @@ class XLMModel(XLMPreTrainedModel):
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
self
.
embeddings
=
self
.
_get_resized_embeddings
(
self
.
embeddings
,
new_num_tokens
)
self
.
embeddings
=
self
.
_get_resized_embeddings
(
self
.
embeddings
,
new_num_tokens
)
return
self
.
embeddings
def
_prune_heads
(
self
,
heads_to_prune
):
def
_prune_heads
(
self
,
heads_to_prune
):
""" Prunes heads of the model.
""" Prunes heads of the model.
...
@@ -728,10 +733,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
...
@@ -728,10 +733,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
def
tie_weights
(
self
):
def
tie_weights
(
self
):
""" Make sure we are sharing the embeddings
""" Make sure we are sharing the embeddings
"""
"""
if
self
.
config
.
torchscript
:
self
.
_tie_or_clone_weights
(
self
.
pred_layer
.
proj
,
self
.
transformer
.
embeddings
)
self
.
pred_layer
.
proj
.
weight
=
nn
.
Parameter
(
self
.
transformer
.
embeddings
.
weight
.
clone
())
else
:
self
.
pred_layer
.
proj
.
weight
=
self
.
transformer
.
embeddings
.
weight
def
forward
(
self
,
input_ids
,
lengths
=
None
,
positions
=
None
,
langs
=
None
,
token_type_ids
=
None
,
def
forward
(
self
,
input_ids
,
lengths
=
None
,
positions
=
None
,
langs
=
None
,
token_type_ids
=
None
,
attention_mask
=
None
,
cache
=
None
,
labels
=
None
,
head_mask
=
None
):
attention_mask
=
None
,
cache
=
None
,
labels
=
None
,
head_mask
=
None
):
...
...
pytorch_transformers/modeling_xlnet.py
View file @
2918b7d2
...
@@ -316,6 +316,10 @@ class XLNetConfig(PretrainedConfig):
...
@@ -316,6 +316,10 @@ class XLNetConfig(PretrainedConfig):
def
vocab_size
(
self
):
def
vocab_size
(
self
):
return
self
.
n_token
return
self
.
n_token
@
vocab_size
.
setter
def
vocab_size
(
self
,
value
):
self
.
n_token
=
value
@
property
@
property
def
hidden_size
(
self
):
def
hidden_size
(
self
):
return
self
.
d_model
return
self
.
d_model
...
@@ -660,10 +664,10 @@ class XLNetModel(XLNetPreTrainedModel):
...
@@ -660,10 +664,10 @@ class XLNetModel(XLNetPreTrainedModel):
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
self
.
word_embedding
=
self
.
_get_resized_embeddings
(
self
.
word_embedding
,
new_num_tokens
)
self
.
word_embedding
=
self
.
_get_resized_embeddings
(
self
.
word_embedding
,
new_num_tokens
)
return
self
.
word_embedding
def
_prune_heads
(
self
,
heads_to_prune
):
def
_prune_heads
(
self
,
heads_to_prune
):
logger
.
info
(
"Head pruning is not implemented for XLNet"
)
raise
NotImplementedError
pass
def
create_mask
(
self
,
qlen
,
mlen
):
def
create_mask
(
self
,
qlen
,
mlen
):
"""
"""
...
@@ -987,10 +991,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
...
@@ -987,10 +991,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
def
tie_weights
(
self
):
def
tie_weights
(
self
):
""" Make sure we are sharing the embeddings
""" Make sure we are sharing the embeddings
"""
"""
if
self
.
config
.
torchscript
:
self
.
_tie_or_clone_weights
(
self
.
lm_loss
,
self
.
transformer
.
word_embedding
)
self
.
lm_loss
.
weight
=
nn
.
Parameter
(
self
.
transformer
.
word_embedding
.
weight
.
clone
())
else
:
self
.
lm_loss
.
weight
=
self
.
transformer
.
word_embedding
.
weight
def
forward
(
self
,
input_ids
,
token_type_ids
=
None
,
input_mask
=
None
,
attention_mask
=
None
,
def
forward
(
self
,
input_ids
,
token_type_ids
=
None
,
input_mask
=
None
,
attention_mask
=
None
,
mems
=
None
,
perm_mask
=
None
,
target_mapping
=
None
,
inp_q
=
None
,
mems
=
None
,
perm_mask
=
None
,
target_mapping
=
None
,
inp_q
=
None
,
...
...
pytorch_transformers/tests/modeling_bert_test.py
View file @
2918b7d2
...
@@ -26,10 +26,15 @@ from pytorch_transformers import (BertConfig, BertModel, BertForMaskedLM,
...
@@ -26,10 +26,15 @@ from pytorch_transformers import (BertConfig, BertModel, BertForMaskedLM,
BertForTokenClassification
,
BertForMultipleChoice
)
BertForTokenClassification
,
BertForMultipleChoice
)
from
pytorch_transformers.modeling_bert
import
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from
pytorch_transformers.modeling_bert
import
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from
.modeling_common_test
import
(
create_and_check_common
s
,
ConfigTester
,
ids_tensor
)
from
.modeling_common_test
import
(
CommonTestCase
s
,
ConfigTester
,
ids_tensor
)
class
BertModelTest
(
unittest
.
TestCase
):
class
BertModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
BertModel
,
BertForMaskedLM
,
BertForNextSentencePrediction
,
BertForPreTraining
,
BertForQuestionAnswering
,
BertForSequenceClassification
,
BertForTokenClassification
)
class
BertModelTester
(
object
):
class
BertModelTester
(
object
):
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -55,9 +60,6 @@ class BertModelTest(unittest.TestCase):
...
@@ -55,9 +60,6 @@ class BertModelTest(unittest.TestCase):
num_labels
=
3
,
num_labels
=
3
,
num_choices
=
4
,
num_choices
=
4
,
scope
=
None
,
scope
=
None
,
all_model_classes
=
(
BertModel
,
BertForMaskedLM
,
BertForNextSentencePrediction
,
BertForPreTraining
,
BertForQuestionAnswering
,
BertForSequenceClassification
,
BertForTokenClassification
),
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
batch_size
=
batch_size
...
@@ -81,7 +83,6 @@ class BertModelTest(unittest.TestCase):
...
@@ -81,7 +83,6 @@ class BertModelTest(unittest.TestCase):
self
.
num_labels
=
num_labels
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
num_choices
=
num_choices
self
.
scope
=
scope
self
.
scope
=
scope
self
.
all_model_classes
=
all_model_classes
def
prepare_config_and_inputs
(
self
):
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
...
@@ -253,52 +254,59 @@ class BertModelTest(unittest.TestCase):
...
@@ -253,52 +254,59 @@ class BertModelTest(unittest.TestCase):
self
.
check_loss_output
(
result
)
self
.
check_loss_output
(
result
)
def
create_and_check_bert_commons
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
)
=
config_and_inputs
inputs_dict
=
{
'input_ids'
:
input_ids
,
'token_type_ids'
:
token_type_ids
,
'attention_mask'
:
input_mask
}
inputs_dict
=
{
'input_ids'
:
input_ids
,
'token_type_ids'
:
token_type_ids
,
'attention_mask'
:
input_mask
}
c
re
ate_and_check_commons
(
self
,
config
,
inputs_dict
)
re
turn
config
,
inputs_dict
def
test_default
(
self
):
def
setUp
(
self
):
self
.
run_tester
(
BertModelTest
.
BertModelTester
(
self
))
self
.
model_tester
=
BertModelTest
.
BertModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
BertConfig
,
hidden_size
=
37
)
def
test_config
(
self
):
def
test_config
(
self
):
config_tester
=
ConfigTester
(
self
,
config_class
=
BertConfig
,
hidden_size
=
37
)
self
.
config_tester
.
run_common_tests
()
config_tester
.
run_common_tests
()
@
pytest
.
mark
.
slow
def
test_bert_model
(
self
):
def
test_model_from_pretrained
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
cache_dir
=
"/tmp/pytorch_transformers_test/"
self
.
model_tester
.
create_and_check_bert_model
(
*
config_and_inputs
)
for
model_name
in
list
(
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
BertModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
def
run_tester
(
self
,
tester
):
def
test_for_masked_lm
(
self
):
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_
tester
.
prepare_config_and_inputs
()
tester
.
create_and_check_bert_
model
(
*
config_and_inputs
)
self
.
model_
tester
.
create_and_check_bert_
for_masked_lm
(
*
config_and_inputs
)
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
def
test_for_multiple_choice
(
self
):
tester
.
create_and_check_bert_for_masked_lm
(
*
config_and_inputs
)
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_multiple_choice
(
*
config_and_inputs
)
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
def
test_for_next_sequence_prediction
(
self
):
tester
.
create_and_check_bert_for_multiple_choice
(
*
config_and_inputs
)
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_next_sequence_prediction
(
*
config_and_inputs
)
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
def
test_for_pretraining
(
self
):
tester
.
create_and_check_bert_for_next_sequence_prediction
(
*
config_and_inputs
)
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_pretraining
(
*
config_and_inputs
)
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
def
test_for_question_answering
(
self
):
tester
.
create_and_check_bert_for_pretraining
(
*
config_and_inputs
)
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_question_answering
(
*
config_and_inputs
)
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
def
test_for_sequence_classification
(
self
):
tester
.
create_and_check_bert_for_question_answering
(
*
config_and_inputs
)
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_sequence_classification
(
*
config_and_inputs
)
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
def
test_for_token_classification
(
self
):
tester
.
create_and_check_bert_for_sequence_classification
(
*
config_and_inputs
)
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_bert_for_token_classification
(
*
config_and_inputs
)
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
@
pytest
.
mark
.
slow
tester
.
create_and_check_bert_for_token_classification
(
*
config_and_inputs
)
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/pytorch_transformers_test/"
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
for
model_name
in
list
(
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
tester
.
create_and_check_bert_commons
(
*
config_and_inputs
)
model
=
BertModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
pytorch_transformers/tests/modeling_common_test.py
View file @
2918b7d2
...
@@ -39,207 +39,471 @@ def _config_zero_init(config):
...
@@ -39,207 +39,471 @@ def _config_zero_init(config):
setattr
(
configs_no_init
,
key
,
0.0
)
setattr
(
configs_no_init
,
key
,
0.0
)
return
configs_no_init
return
configs_no_init
def
_create_and_check_torchscript_output_attentions
(
tester
,
model_classes
,
config
,
inputs_dict
):
class
CommonTestCases
:
config
.
output_attentions
=
True
_create_and_check_torchscript
(
tester
,
model_classes
,
config
,
inputs_dict
)
class
CommonModelTester
(
unittest
.
TestCase
):
def
_create_and_check_torchscript_output_hidden_state
(
tester
,
model_classes
,
config
,
inputs_dict
):
model_tester
=
None
config
.
output_hidden_states
=
True
all_model_classes
=
()
_create_and_check_torchscript
(
tester
,
model_classes
,
config
,
inputs_dict
)
test_torchscript
=
True
test_pruning
=
True
def
_create_and_check_torchscript
(
tester
,
model_classes
,
config
,
inputs_dict
):
test_resize_embeddings
=
True
configs_no_init
=
_config_zero_init
(
config
)
# To be sure we have no Nan
configs_no_init
.
torchscript
=
True
def
test_initialization
(
self
):
for
model_class
in
model_classes
:
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
model
=
model_class
(
config
=
configs_no_init
)
model
.
eval
()
configs_no_init
=
_config_zero_init
(
config
)
inputs
=
inputs_dict
[
'input_ids'
]
# Let's keep only input_ids
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
=
configs_no_init
)
try
:
for
name
,
param
in
model
.
named_parameters
():
torch
.
jit
.
trace
(
model
,
inputs
)
if
param
.
requires_grad
:
except
RuntimeError
:
self
.
assertIn
(
param
.
data
.
mean
().
item
(),
[
0.0
,
1.0
],
tester
.
parent
.
fail
(
"Couldn't trace module."
)
msg
=
"Parameter {} of model {} seems not properly initialized"
.
format
(
name
,
model_class
))
try
:
def
test_attention_outputs
(
self
):
traced_gpt2
=
torch
.
jit
.
trace
(
model
,
inputs
)
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
torch
.
jit
.
save
(
traced_gpt2
,
"traced_model.pt"
)
except
RuntimeError
:
for
model_class
in
self
.
all_model_classes
:
tester
.
parent
.
fail
(
"Couldn't save module."
)
config
.
output_attentions
=
True
config
.
output_hidden_states
=
False
try
:
model
=
model_class
(
config
)
loaded_model
=
torch
.
jit
.
load
(
"traced_model.pt"
)
model
.
eval
()
os
.
remove
(
"traced_model.pt"
)
outputs
=
model
(
**
inputs_dict
)
except
ValueError
:
attentions
=
outputs
[
-
1
]
tester
.
parent
.
fail
(
"Couldn't load module."
)
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
model
.
eval
()
self
.
assertEqual
(
len
(
attentions
),
self
.
model_tester
.
num_hidden_layers
)
loaded_model
.
eval
()
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
model_params
=
model
.
parameters
()
[
self
.
model_tester
.
num_attention_heads
,
loaded_model_params
=
loaded_model
.
parameters
()
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
key_len
if
hasattr
(
self
.
model_tester
,
'key_len'
)
else
self
.
model_tester
.
seq_length
])
models_equal
=
True
out_len
=
len
(
outputs
)
for
p1
,
p2
in
zip
(
model_params
,
loaded_model_params
):
if
p1
.
data
.
ne
(
p2
.
data
).
sum
()
>
0
:
# Check attention is always last and order is fine
models_equal
=
False
config
.
output_attentions
=
True
config
.
output_hidden_states
=
True
tester
.
parent
.
assertTrue
(
models_equal
)
model
=
model_class
(
config
)
model
.
eval
()
def
_create_and_check_initialization
(
tester
,
model_classes
,
config
,
inputs_dict
):
outputs
=
model
(
**
inputs_dict
)
configs_no_init
=
_config_zero_init
(
config
)
self
.
assertEqual
(
out_len
+
1
,
len
(
outputs
))
for
model_class
in
model_classes
:
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
model
=
model_class
(
config
=
configs_no_init
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
for
name
,
param
in
model
.
named_parameters
():
if
param
.
requires_grad
:
attentions
=
outputs
[
-
1
]
tester
.
parent
.
assertIn
(
param
.
data
.
mean
().
item
(),
[
0.0
,
1.0
],
self
.
assertEqual
(
len
(
attentions
),
self
.
model_tester
.
num_hidden_layers
)
msg
=
"Parameter {} of model {} seems not properly initialized"
.
format
(
name
,
model_class
))
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
def
_create_and_check_for_headmasking
(
tester
,
model_classes
,
config
,
inputs_dict
):
[
self
.
model_tester
.
num_attention_heads
,
configs_no_init
=
_config_zero_init
(
config
)
# To be sure we have no Nan
self
.
model_tester
.
seq_length
,
for
model_class
in
model_classes
:
self
.
model_tester
.
key_len
if
hasattr
(
self
.
model_tester
,
'key_len'
)
else
self
.
model_tester
.
seq_length
])
config
.
output_attentions
=
True
config
.
output_hidden_states
=
True
def
test_torchscript
(
self
):
model
=
model_class
(
config
=
configs_no_init
)
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
model
.
eval
()
self
.
_create_and_check_torchscript
(
config
,
inputs_dict
)
# Prepare head_mask
# Set require_grad after having prepared the tensor to avoid error (leaf variable has been moved into the graph interior)
def
test_torchscript_output_attentions
(
self
):
head_mask
=
torch
.
ones
(
tester
.
num_hidden_layers
,
tester
.
num_attention_heads
)
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
head_mask
[
0
,
0
]
=
0
head_mask
[
-
1
,
:
-
1
]
=
0
config
.
output_attentions
=
True
head_mask
.
requires_grad_
(
requires_grad
=
True
)
self
.
_create_and_check_torchscript
(
config
,
inputs_dict
)
inputs
=
inputs_dict
.
copy
()
inputs
[
'head_mask'
]
=
head_mask
def
test_torchscript_output_hidden_state
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
outputs
=
model
(
**
inputs
)
config
.
output_hidden_states
=
True
# Test that we can get a gradient back for importance score computation
self
.
_create_and_check_torchscript
(
config
,
inputs_dict
)
output
=
sum
(
t
.
sum
()
for
t
in
outputs
[
0
])
output
=
output
.
sum
()
def
_create_and_check_torchscript
(
self
,
config
,
inputs_dict
):
output
.
backward
()
if
not
self
.
test_torchscript
:
multihead_outputs
=
head_mask
.
grad
return
attentions
=
outputs
[
-
1
]
configs_no_init
=
_config_zero_init
(
config
)
# To be sure we have no Nan
hidden_states
=
outputs
[
-
2
]
configs_no_init
.
torchscript
=
True
for
model_class
in
self
.
all_model_classes
:
# Remove Nan
model
=
model_class
(
config
=
configs_no_init
)
model
.
eval
()
tester
.
parent
.
assertIsNotNone
(
multihead_outputs
)
inputs
=
inputs_dict
[
'input_ids'
]
# Let's keep only input_ids
tester
.
parent
.
assertEqual
(
len
(
multihead_outputs
),
tester
.
num_hidden_layers
)
tester
.
parent
.
assertAlmostEqual
(
try
:
attentions
[
0
][...,
0
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
torch
.
jit
.
trace
(
model
,
inputs
)
tester
.
parent
.
assertNotEqual
(
except
RuntimeError
:
attentions
[
0
][...,
-
1
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
self
.
fail
(
"Couldn't trace module."
)
tester
.
parent
.
assertNotEqual
(
attentions
[
1
][...,
0
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
try
:
tester
.
parent
.
assertAlmostEqual
(
traced_gpt2
=
torch
.
jit
.
trace
(
model
,
inputs
)
attentions
[
-
1
][...,
-
2
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
torch
.
jit
.
save
(
traced_gpt2
,
"traced_model.pt"
)
tester
.
parent
.
assertNotEqual
(
except
RuntimeError
:
attentions
[
-
1
][...,
-
1
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
self
.
fail
(
"Couldn't save module."
)
try
:
def
_create_and_check_for_head_pruning
(
tester
,
model_classes
,
config
,
inputs_dict
):
loaded_model
=
torch
.
jit
.
load
(
"traced_model.pt"
)
for
model_class
in
model_classes
:
os
.
remove
(
"traced_model.pt"
)
config
.
output_attentions
=
True
except
ValueError
:
config
.
output_hidden_states
=
False
self
.
fail
(
"Couldn't load module."
)
model
=
model_class
(
config
=
config
)
model
.
eval
()
model
.
eval
()
heads_to_prune
=
{
0
:
list
(
range
(
1
,
tester
.
num_attention_heads
)),
loaded_model
.
eval
()
-
1
:
[
0
]}
model
.
prune_heads
(
heads_to_prune
)
model_params
=
model
.
parameters
()
outputs
=
model
(
**
inputs_dict
)
loaded_model_params
=
loaded_model
.
parameters
()
attentions
=
outputs
[
-
1
]
models_equal
=
True
for
p1
,
p2
in
zip
(
model_params
,
loaded_model_params
):
tester
.
parent
.
assertEqual
(
if
p1
.
data
.
ne
(
p2
.
data
).
sum
()
>
0
:
attentions
[
0
].
shape
[
-
3
],
1
)
models_equal
=
False
tester
.
parent
.
assertEqual
(
attentions
[
1
].
shape
[
-
3
],
tester
.
num_attention_heads
)
self
.
assertTrue
(
models_equal
)
tester
.
parent
.
assertEqual
(
attentions
[
-
1
].
shape
[
-
3
],
tester
.
num_attention_heads
-
1
)
def
test_headmasking
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
def
_create_and_check_for_attentions
(
tester
,
model_classes
,
config
,
inputs_dict
):
for
model_class
in
model_classes
:
config
.
output_attentions
=
True
config
.
output_attentions
=
True
config
.
output_hidden_states
=
True
config
.
output_hidden_states
=
False
configs_no_init
=
_config_zero_init
(
config
)
# To be sure we have no Nan
model
=
model_class
(
config
)
for
model_class
in
self
.
all_model_classes
:
model
.
eval
()
model
=
model_class
(
config
=
configs_no_init
)
outputs
=
model
(
**
inputs_dict
)
model
.
eval
()
attentions
=
outputs
[
-
1
]
tester
.
parent
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
# Prepare head_mask
tester
.
parent
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
# Set require_grad after having prepared the tensor to avoid error (leaf variable has been moved into the graph interior)
tester
.
parent
.
assertEqual
(
len
(
attentions
),
tester
.
num_hidden_layers
)
head_mask
=
torch
.
ones
(
self
.
model_tester
.
num_hidden_layers
,
self
.
model_tester
.
num_attention_heads
)
tester
.
parent
.
assertListEqual
(
head_mask
[
0
,
0
]
=
0
list
(
attentions
[
0
].
shape
[
-
3
:]),
head_mask
[
-
1
,
:
-
1
]
=
0
[
tester
.
num_attention_heads
,
head_mask
.
requires_grad_
(
requires_grad
=
True
)
tester
.
seq_length
,
inputs
=
inputs_dict
.
copy
()
tester
.
key_len
if
hasattr
(
tester
,
'key_len'
)
else
tester
.
seq_length
])
inputs
[
'head_mask'
]
=
head_mask
out_len
=
len
(
outputs
)
outputs
=
model
(
**
inputs
)
# Check attention is always last and order is fine
config
.
output_attentions
=
True
# Test that we can get a gradient back for importance score computation
config
.
output_hidden_states
=
True
output
=
sum
(
t
.
sum
()
for
t
in
outputs
[
0
])
model
=
model_class
(
config
)
output
=
output
.
sum
()
model
.
eval
()
output
.
backward
()
outputs
=
model
(
**
inputs_dict
)
multihead_outputs
=
head_mask
.
grad
tester
.
parent
.
assertEqual
(
out_len
+
1
,
len
(
outputs
))
tester
.
parent
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
attentions
=
outputs
[
-
1
]
tester
.
parent
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
hidden_states
=
outputs
[
-
2
]
attentions
=
outputs
[
-
1
]
# Remove Nan
tester
.
parent
.
assertEqual
(
len
(
attentions
),
tester
.
num_hidden_layers
)
tester
.
parent
.
assertListEqual
(
self
.
assertIsNotNone
(
multihead_outputs
)
list
(
attentions
[
0
].
shape
[
-
3
:]),
self
.
assertEqual
(
len
(
multihead_outputs
),
self
.
model_tester
.
num_hidden_layers
)
[
tester
.
num_attention_heads
,
self
.
assertAlmostEqual
(
tester
.
seq_length
,
attentions
[
0
][...,
0
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
tester
.
key_len
if
hasattr
(
tester
,
'key_len'
)
else
tester
.
seq_length
])
self
.
assertNotEqual
(
attentions
[
0
][...,
-
1
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
def
_create_and_check_for_hidden_states
(
tester
,
model_classes
,
config
,
inputs_dict
):
self
.
assertNotEqual
(
for
model_class
in
model_classes
:
attentions
[
1
][...,
0
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
config
.
output_hidden_states
=
True
self
.
assertAlmostEqual
(
config
.
output_attentions
=
False
attentions
[
-
1
][...,
-
2
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
model
=
model_class
(
config
)
self
.
assertNotEqual
(
model
.
eval
()
attentions
[
-
1
][...,
-
1
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
outputs
=
model
(
**
inputs_dict
)
hidden_states
=
outputs
[
-
1
]
tester
.
parent
.
assertEqual
(
model
.
config
.
output_attentions
,
False
)
def
test_head_pruning
(
self
):
tester
.
parent
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
if
not
self
.
test_pruning
:
tester
.
parent
.
assertEqual
(
len
(
hidden_states
),
tester
.
num_hidden_layers
+
1
)
return
tester
.
parent
.
assertListEqual
(
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
[
tester
.
seq_length
,
tester
.
hidden_size
])
for
model_class
in
self
.
all_model_classes
:
config
.
output_attentions
=
True
def
create_and_check_commons
(
tester
,
config
,
inputs_dict
,
test_pruning
=
True
,
test_torchscript
=
True
):
config
.
output_hidden_states
=
False
_create_and_check_initialization
(
tester
,
tester
.
all_model_classes
,
config
,
inputs_dict
)
model
=
model_class
(
config
=
config
)
_create_and_check_for_attentions
(
tester
,
tester
.
all_model_classes
,
config
,
inputs_dict
)
model
.
eval
()
_create_and_check_for_headmasking
(
tester
,
tester
.
all_model_classes
,
config
,
inputs_dict
)
heads_to_prune
=
{
0
:
list
(
range
(
1
,
self
.
model_tester
.
num_attention_heads
)),
_create_and_check_for_hidden_states
(
tester
,
tester
.
all_model_classes
,
config
,
inputs_dict
)
-
1
:
[
0
]}
model
.
prune_heads
(
heads_to_prune
)
if
test_torchscript
:
outputs
=
model
(
**
inputs_dict
)
_create_and_check_torchscript
(
tester
,
tester
.
all_model_classes
,
config
,
inputs_dict
)
_create_and_check_torchscript_output_attentions
(
tester
,
tester
.
all_model_classes
,
config
,
inputs_dict
)
attentions
=
outputs
[
-
1
]
_create_and_check_torchscript_output_hidden_state
(
tester
,
tester
.
all_model_classes
,
config
,
inputs_dict
)
self
.
assertEqual
(
if
test_pruning
:
attentions
[
0
].
shape
[
-
3
],
1
)
_create_and_check_for_head_pruning
(
tester
,
tester
.
all_model_classes
,
config
,
inputs_dict
)
self
.
assertEqual
(
attentions
[
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
self
.
assertEqual
(
attentions
[
-
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
def
test_hidden_states_output
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
for
model_class
in
self
.
all_model_classes
:
config
.
output_hidden_states
=
True
config
.
output_attentions
=
False
model
=
model_class
(
config
)
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
hidden_states
=
outputs
[
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
False
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
len
(
hidden_states
),
self
.
model_tester
.
num_hidden_layers
+
1
)
self
.
assertListEqual
(
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
])
def
test_resize_tokens_embeddings
(
self
):
original_config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
not
self
.
test_resize_embeddings
:
return
for
model_class
in
self
.
all_model_classes
:
config
=
copy
.
deepcopy
(
original_config
)
model
=
model_class
(
config
)
model_vocab_size
=
config
.
vocab_size
# Retrieve the embeddings and clone theme
model_embed
=
model
.
resize_token_embeddings
(
model_vocab_size
)
cloned_embeddings
=
model_embed
.
weight
.
clone
()
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
model_embed
=
model
.
resize_token_embeddings
(
model_vocab_size
+
10
)
self
.
assertEqual
(
model
.
config
.
vocab_size
,
model_vocab_size
+
10
)
# Check that it actually resizes the embeddings matrix
self
.
assertEqual
(
model_embed
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
+
10
)
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
model_embed
=
model
.
resize_token_embeddings
(
model_vocab_size
-
15
)
self
.
assertEqual
(
model
.
config
.
vocab_size
,
model_vocab_size
-
15
)
# Check that it actually resizes the embeddings matrix
self
.
assertEqual
(
model_embed
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
-
15
)
# Check that adding and removing tokens has not modified the first part of the embedding matrix.
models_equal
=
True
for
p1
,
p2
in
zip
(
cloned_embeddings
,
model_embed
.
weight
):
if
p1
.
data
.
ne
(
p2
.
data
).
sum
()
>
0
:
models_equal
=
False
self
.
assertTrue
(
models_equal
)
def
test_tie_model_weights
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
def
check_same_values
(
layer_1
,
layer_2
):
equal
=
True
for
p1
,
p2
in
zip
(
layer_1
.
weight
,
layer_2
.
weight
):
if
p1
.
data
.
ne
(
p2
.
data
).
sum
()
>
0
:
equal
=
False
return
equal
for
model_class
in
self
.
all_model_classes
:
if
not
hasattr
(
model_class
,
'tie_weights'
):
continue
config
.
torchscript
=
True
model_not_tied
=
model_class
(
config
)
params_not_tied
=
list
(
model_not_tied
.
parameters
())
config_tied
=
copy
.
deepcopy
(
config
)
config_tied
.
torchscript
=
False
model_tied
=
model_class
(
config_tied
)
params_tied
=
list
(
model_tied
.
parameters
())
# Check that the embedding layer and decoding layer are the same in size and in value
self
.
assertGreater
(
len
(
params_not_tied
),
len
(
params_tied
))
# self.assertTrue(check_same_values(embeddings, decoding))
# # Check that after modification, they remain the same.
# embeddings.weight.data.div_(2)
# # Check that the embedding layer and decoding layer are the same in size and in value
# self.assertTrue(embeddings.weight.shape, decoding.weight.shape)
# self.assertTrue(check_same_values(embeddings, decoding))
# # Check that after modification, they remain the same.
# decoding.weight.data.div_(4)
# # Check that the embedding layer and decoding layer are the same in size and in value
# self.assertTrue(embeddings.weight.shape, decoding.weight.shape)
# self.assertTrue(check_same_values(embeddings, decoding))
# Check that after resize they remain tied.
model_tied
.
resize_token_embeddings
(
config
.
vocab_size
+
10
)
params_tied_2
=
list
(
model_tied
.
parameters
())
self
.
assertGreater
(
len
(
params_not_tied
),
len
(
params_tied
))
self
.
assertEqual
(
len
(
params_tied_2
),
len
(
params_tied
))
# decoding.weight.data.mul_(20)
# # Check that the embedding layer and decoding layer are the same in size and in value
# self.assertTrue(model.transformer.wte.weight.shape, model.lm_head.weight.shape)
# self.assertTrue(check_same_values(model.transformer.wte, model.lm_head))
class
GPTModelTester
(
CommonModelTester
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_position_ids
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
n_positions
=
33
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
n_choices
=
3
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
scope
=
None
,
config_class
=
None
,
base_model_class
=
None
,
lm_head_model_class
=
None
,
double_head_model_class
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_position_ids
=
use_position_ids
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
n_positions
=
n_positions
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
n_choices
=
n_choices
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
scope
=
scope
self
.
config_class
=
config_class
self
.
base_model_class
=
base_model_class
self
.
lm_head_model_class
=
lm_head_model_class
self
.
double_head_model_class
=
double_head_model_class
self
.
all_model_classes
=
(
base_model_class
,
lm_head_model_class
,
double_head_model_class
)
def
prepare_config_and_inputs
(
self
):
total_num_tokens
=
self
.
vocab_size
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
total_num_tokens
)
position_ids
=
None
if
self
.
use_position_ids
:
position_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
self
.
n_positions
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
total_voc
=
self
.
vocab_size
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
total_voc
)
mc_labels
=
None
lm_labels
=
None
mc_token_ids
=
None
if
self
.
use_labels
:
mc_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
self
.
num_labels
)
mc_token_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
],
self
.
seq_length
)
config
=
self
.
config_class
(
vocab_size_or_config_json_file
=
self
.
vocab_size
,
n_positions
=
self
.
n_positions
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
initializer_range
=
self
.
initializer_range
)
return
(
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
)
def
create_and_check_base_model
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
):
model
=
self
.
base_model_class
(
config
)
model
.
eval
()
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
)
outputs
=
model
(
input_ids
,
position_ids
)
outputs
=
model
(
input_ids
)
def
ids_tensor
(
shape
,
vocab_size
,
rng
=
None
,
name
=
None
):
hidden_state
=
outputs
[
0
]
"""Creates a random int32 tensor of the shape within the vocab size."""
self
.
parent
.
assertListEqual
(
if
rng
is
None
:
list
(
hidden_state
.
size
()),
rng
=
random
.
Random
(
)
[
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
,
self
.
hidden_size
]
)
total_dims
=
1
for
dim
in
shape
:
total_dims
*=
dim
values
=
[]
def
create_and_check_lm_head
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
for
_
in
range
(
total_dims
):
mc_labels
,
lm_labels
,
mc_token_ids
):
values
.
append
(
rng
.
randint
(
0
,
vocab_size
-
1
))
model
=
self
.
lm_head_model_class
(
config
)
model
.
eval
()
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
,
lm_labels
)
loss
,
lm_logits
=
outputs
[:
2
]
return
torch
.
tensor
(
data
=
values
,
dtype
=
torch
.
long
).
view
(
shape
).
contiguous
()
total_voc
=
self
.
vocab_size
self
.
parent
.
assertListEqual
(
list
(
lm_logits
.
size
()),
[
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
,
total_voc
])
self
.
parent
.
assertListEqual
(
list
(
loss
.
size
()),
[])
def
create_and_check_presents
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
):
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
.
eval
()
outputs
=
model
(
input_ids
)
presents
=
outputs
[
-
1
]
self
.
parent
.
assertEqual
(
self
.
num_hidden_layers
,
len
(
presents
))
self
.
parent
.
assertListEqual
(
list
(
presents
[
0
].
size
()),
[
2
,
self
.
batch_size
*
self
.
n_choices
,
self
.
num_attention_heads
,
self
.
seq_length
,
self
.
hidden_size
//
self
.
num_attention_heads
])
def
create_and_check_double_heads
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
):
model
=
self
.
double_head_model_class
(
config
)
model
.
eval
()
outputs
=
model
(
input_ids
,
mc_token_ids
,
lm_labels
=
lm_labels
,
mc_labels
=
mc_labels
,
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
)
lm_loss
,
mc_loss
,
lm_logits
,
mc_logits
=
outputs
[:
4
]
loss
=
[
lm_loss
,
mc_loss
]
total_voc
=
self
.
vocab_size
self
.
parent
.
assertListEqual
(
list
(
lm_logits
.
size
()),
[
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
,
total_voc
])
self
.
parent
.
assertListEqual
(
list
(
mc_logits
.
size
()),
[
self
.
batch_size
,
self
.
n_choices
])
self
.
parent
.
assertListEqual
(
[
list
(
l
.
size
())
for
l
in
loss
],
[[],
[]])
def
create_and_check_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/pytorch_transformers_test/"
for
model_name
in
list
(
self
.
base_model_class
.
pretrained_model_archive_map
.
keys
())[:
1
]:
model
=
self
.
base_model_class
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
parent
.
assertIsNotNone
(
model
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
)
=
config_and_inputs
inputs_dict
=
{
'input_ids'
:
input_ids
}
return
config
,
inputs_dict
def
run_common_tests
(
self
,
test_presents
=
False
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
self
.
create_and_check_base_model
(
*
config_and_inputs
)
config_and_inputs
=
self
.
prepare_config_and_inputs
()
self
.
create_and_check_lm_head
(
*
config_and_inputs
)
config_and_inputs
=
self
.
prepare_config_and_inputs
()
self
.
create_and_check_double_heads
(
*
config_and_inputs
)
if
test_presents
:
config_and_inputs
=
self
.
prepare_config_and_inputs
()
self
.
create_and_check_presents
(
*
config_and_inputs
)
def
run_slow_tests
(
self
):
self
.
create_and_check_model_from_pretrained
()
class
ConfigTester
(
object
):
class
ConfigTester
(
object
):
...
@@ -275,179 +539,22 @@ class ConfigTester(object):
...
@@ -275,179 +539,22 @@ class ConfigTester(object):
self
.
create_and_test_config_to_json_file
()
self
.
create_and_test_config_to_json_file
()
class
GPTModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_position_ids
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
n_positions
=
33
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
n_choices
=
3
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
scope
=
None
,
config_class
=
None
,
base_model_class
=
None
,
lm_head_model_class
=
None
,
double_head_model_class
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_position_ids
=
use_position_ids
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
n_positions
=
n_positions
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
n_choices
=
n_choices
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
scope
=
scope
self
.
config_class
=
config_class
self
.
base_model_class
=
base_model_class
self
.
lm_head_model_class
=
lm_head_model_class
self
.
double_head_model_class
=
double_head_model_class
self
.
all_model_classes
=
(
base_model_class
,
lm_head_model_class
,
double_head_model_class
)
def
prepare_config_and_inputs
(
self
):
total_num_tokens
=
self
.
vocab_size
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
total_num_tokens
)
position_ids
=
None
def
ids_tensor
(
shape
,
vocab_size
,
rng
=
None
,
name
=
None
):
if
self
.
use_position_ids
:
"""Creates a random int32 tensor of the shape within the vocab size."""
position_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
self
.
n_positions
)
if
rng
is
None
:
rng
=
random
.
Random
()
token_type_ids
=
None
total_dims
=
1
if
self
.
use_token_type_ids
:
for
dim
in
shape
:
total_voc
=
self
.
vocab_size
total_dims
*=
dim
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
total_voc
)
mc_labels
=
None
lm_labels
=
None
mc_token_ids
=
None
if
self
.
use_labels
:
mc_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
self
.
num_labels
)
mc_token_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
],
self
.
seq_length
)
config
=
self
.
config_class
(
vocab_size_or_config_json_file
=
self
.
vocab_size
,
n_positions
=
self
.
n_positions
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
initializer_range
=
self
.
initializer_range
)
return
(
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
)
def
create_and_check_base_model
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
):
model
=
self
.
base_model_class
(
config
)
model
.
eval
()
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
)
outputs
=
model
(
input_ids
,
position_ids
)
outputs
=
model
(
input_ids
)
hidden_state
=
outputs
[
0
]
self
.
parent
.
assertListEqual
(
list
(
hidden_state
.
size
()),
[
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
,
self
.
hidden_size
])
def
create_and_check_lm_head
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
):
model
=
self
.
lm_head_model_class
(
config
)
model
.
eval
()
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
,
lm_labels
)
loss
,
lm_logits
=
outputs
[:
2
]
total_voc
=
self
.
vocab_size
self
.
parent
.
assertListEqual
(
list
(
lm_logits
.
size
()),
[
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
,
total_voc
])
self
.
parent
.
assertListEqual
(
list
(
loss
.
size
()),
[])
def
create_and_check_presents
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
):
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
.
eval
()
outputs
=
model
(
input_ids
)
presents
=
outputs
[
-
1
]
self
.
parent
.
assertEqual
(
self
.
num_hidden_layers
,
len
(
presents
))
self
.
parent
.
assertListEqual
(
list
(
presents
[
0
].
size
()),
[
2
,
self
.
batch_size
*
self
.
n_choices
,
self
.
num_attention_heads
,
self
.
seq_length
,
self
.
hidden_size
//
self
.
num_attention_heads
])
def
create_and_check_double_heads
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
):
model
=
self
.
double_head_model_class
(
config
)
model
.
eval
()
outputs
=
model
(
input_ids
,
mc_token_ids
,
lm_labels
=
lm_labels
,
mc_labels
=
mc_labels
,
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
)
lm_loss
,
mc_loss
,
lm_logits
,
mc_logits
=
outputs
[:
4
]
loss
=
[
lm_loss
,
mc_loss
]
total_voc
=
self
.
vocab_size
self
.
parent
.
assertListEqual
(
list
(
lm_logits
.
size
()),
[
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
,
total_voc
])
self
.
parent
.
assertListEqual
(
list
(
mc_logits
.
size
()),
[
self
.
batch_size
,
self
.
n_choices
])
self
.
parent
.
assertListEqual
(
[
list
(
l
.
size
())
for
l
in
loss
],
[[],
[]])
def
create_and_check_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/pytorch_transformers_test/"
for
model_name
in
list
(
self
.
base_model_class
.
pretrained_model_archive_map
.
keys
())[:
1
]:
model
=
self
.
base_model_class
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
parent
.
assertIsNotNone
(
model
)
def
create_and_check_commons
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
mc_labels
,
lm_labels
,
mc_token_ids
):
inputs_dict
=
{
'input_ids'
:
input_ids
}
create_and_check_commons
(
self
,
config
,
inputs_dict
)
def
run_common_tests
(
self
,
test_presents
=
False
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
self
.
create_and_check_base_model
(
*
config_and_inputs
)
config_and_inputs
=
self
.
prepare_config_and_inputs
()
self
.
create_and_check_lm_head
(
*
config_and_inputs
)
config_and_inputs
=
self
.
prepare_config_and_inputs
()
self
.
create_and_check_double_heads
(
*
config_and_inputs
)
if
test_presents
:
config_and_inputs
=
self
.
prepare_config_and_inputs
()
self
.
create_and_check_presents
(
*
config_and_inputs
)
config_and_inputs
=
self
.
prepare_config_and_inputs
()
values
=
[]
self
.
create_and_check_commons
(
*
config_and_inputs
)
for
_
in
range
(
total_dims
):
values
.
append
(
rng
.
randint
(
0
,
vocab_size
-
1
))
def
run_slow_tests
(
self
):
return
torch
.
tensor
(
data
=
values
,
dtype
=
torch
.
long
).
view
(
shape
).
contiguous
()
self
.
create_and_check_model_from_pretrained
()
class
ModelUtilsTest
(
unittest
.
TestCase
):
class
ModelUtilsTest
(
unittest
.
TestCase
):
...
@@ -471,79 +578,6 @@ class ModelUtilsTest(unittest.TestCase):
...
@@ -471,79 +578,6 @@ class ModelUtilsTest(unittest.TestCase):
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
model
.
config
,
config
)
self
.
assertEqual
(
model
.
config
,
config
)
def
test_resize_tokens_embeddings
(
self
):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
for
model_name
in
list
(
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
config
=
BertConfig
.
from_pretrained
(
model_name
)
model
=
BertModel
.
from_pretrained
(
model_name
)
model_vocab_size
=
config
.
vocab_size
# Retrieve the embeddings and clone theme
cloned_embeddings
=
model
.
embeddings
.
word_embeddings
.
weight
.
clone
()
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
model
.
resize_token_embeddings
(
model_vocab_size
+
10
)
self
.
assertEqual
(
model
.
config
.
vocab_size
,
model_vocab_size
+
10
)
# Check that it actually resizes the embeddings matrix
self
.
assertEqual
(
model
.
embeddings
.
word_embeddings
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
+
10
)
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
model
.
resize_token_embeddings
(
model_vocab_size
)
self
.
assertEqual
(
model
.
config
.
vocab_size
,
model_vocab_size
)
# Check that it actually resizes the embeddings matrix
self
.
assertEqual
(
model
.
embeddings
.
word_embeddings
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
])
# Check that adding and removing tokens has not modified the first part of the embedding matrix.
models_equal
=
True
for
p1
,
p2
in
zip
(
cloned_embeddings
,
model
.
embeddings
.
word_embeddings
.
weight
):
if
p1
.
data
.
ne
(
p2
.
data
).
sum
()
>
0
:
models_equal
=
False
self
.
assertTrue
(
models_equal
)
def
test_tie_model_weights
(
self
):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
def
check_same_values
(
layer_1
,
layer_2
):
equal
=
True
for
p1
,
p2
in
zip
(
layer_1
.
weight
,
layer_2
.
weight
):
if
p1
.
data
.
ne
(
p2
.
data
).
sum
()
>
0
:
equal
=
False
return
equal
for
model_name
in
list
(
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
config
=
GPT2Config
.
from_pretrained
(
model_name
)
model
=
GPT2LMHeadModel
.
from_pretrained
(
model_name
)
# Get the embeddings and decoding layer
embeddings
=
model
.
transformer
.
wte
decoding
=
model
.
lm_head
# Check that the embedding layer and decoding layer are the same in size and in value
self
.
assertTrue
(
embeddings
.
weight
.
shape
,
decoding
.
weight
.
shape
)
self
.
assertTrue
(
check_same_values
(
embeddings
,
decoding
))
# Check that after modification, they remain the same.
embeddings
.
weight
.
data
.
div_
(
2
)
# Check that the embedding layer and decoding layer are the same in size and in value
self
.
assertTrue
(
embeddings
.
weight
.
shape
,
decoding
.
weight
.
shape
)
self
.
assertTrue
(
check_same_values
(
embeddings
,
decoding
))
# Check that after modification, they remain the same.
decoding
.
weight
.
data
.
div_
(
4
)
# Check that the embedding layer and decoding layer are the same in size and in value
self
.
assertTrue
(
embeddings
.
weight
.
shape
,
decoding
.
weight
.
shape
)
self
.
assertTrue
(
check_same_values
(
embeddings
,
decoding
))
# Check that after resize they remain tied.
model
.
resize_token_embeddings
(
config
.
vocab_size
+
10
)
decoding
.
weight
.
data
.
mul_
(
20
)
# Check that the embedding layer and decoding layer are the same in size and in value
self
.
assertTrue
(
model
.
transformer
.
wte
.
weight
.
shape
,
model
.
lm_head
.
weight
.
shape
)
self
.
assertTrue
(
check_same_values
(
model
.
transformer
.
wte
,
model
.
lm_head
))
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
pytorch_transformers/tests/modeling_gpt2_test.py
View file @
2918b7d2
...
@@ -16,19 +16,14 @@ from __future__ import absolute_import
...
@@ -16,19 +16,14 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
import
os
import
unittest
import
unittest
import
json
import
random
import
shutil
import
pytest
import
pytest
import
torch
from
pytorch_transformers
import
(
GPT2Config
,
GPT2Model
,
from
pytorch_transformers
import
(
GPT2Config
,
GPT2Model
,
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
)
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
)
from
.modeling_common_test
import
(
create_and_check_commons
,
ConfigTester
,
GPTModelTester
)
from
.modeling_common_test
import
CommonTestCases
,
ConfigTester
class
GPT2ModelTest
(
unittest
.
TestCase
):
class
GPT2ModelTest
(
unittest
.
TestCase
):
...
@@ -37,14 +32,14 @@ class GPT2ModelTest(unittest.TestCase):
...
@@ -37,14 +32,14 @@ class GPT2ModelTest(unittest.TestCase):
config_tester
.
run_common_tests
()
config_tester
.
run_common_tests
()
def
test_model
(
self
):
def
test_model
(
self
):
model_tester
=
GPTModelTester
(
self
,
config_class
=
GPT2Config
,
base_model_class
=
GPT2Model
,
model_tester
=
CommonTestCases
.
GPTModelTester
(
self
,
config_class
=
GPT2Config
,
base_model_class
=
GPT2Model
,
lm_head_model_class
=
GPT2LMHeadModel
,
lm_head_model_class
=
GPT2LMHeadModel
,
double_head_model_class
=
GPT2DoubleHeadsModel
)
double_head_model_class
=
GPT2DoubleHeadsModel
)
model_tester
.
run_common_tests
(
test_presents
=
True
)
model_tester
.
run_common_tests
(
test_presents
=
True
)
@
pytest
.
mark
.
slow
@
pytest
.
mark
.
slow
def
test_pretrained
(
self
):
def
test_pretrained
(
self
):
model_tester
=
GPTModelTester
(
self
,
config_class
=
GPT2Config
,
base_model_class
=
GPT2Model
,
model_tester
=
CommonTestCases
.
GPTModelTester
(
self
,
config_class
=
GPT2Config
,
base_model_class
=
GPT2Model
,
lm_head_model_class
=
GPT2LMHeadModel
,
lm_head_model_class
=
GPT2LMHeadModel
,
double_head_model_class
=
GPT2DoubleHeadsModel
)
double_head_model_class
=
GPT2DoubleHeadsModel
)
model_tester
.
run_slow_tests
()
model_tester
.
run_slow_tests
()
...
...
pytorch_transformers/tests/modeling_openai_test.py
View file @
2918b7d2
...
@@ -19,12 +19,11 @@ from __future__ import print_function
...
@@ -19,12 +19,11 @@ from __future__ import print_function
import
unittest
import
unittest
import
pytest
import
pytest
import
torch
from
pytorch_transformers
import
(
OpenAIGPTConfig
,
OpenAIGPTModel
,
from
pytorch_transformers
import
(
OpenAIGPTConfig
,
OpenAIGPTModel
,
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
)
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
)
from
.modeling_common_test
import
(
create_and_check_commons
,
ConfigTester
,
GPTModelTester
)
from
.modeling_common_test
import
CommonTestCases
,
ConfigTester
class
OpenAIModelTest
(
unittest
.
TestCase
):
class
OpenAIModelTest
(
unittest
.
TestCase
):
...
@@ -33,14 +32,14 @@ class OpenAIModelTest(unittest.TestCase):
...
@@ -33,14 +32,14 @@ class OpenAIModelTest(unittest.TestCase):
config_tester
.
run_common_tests
()
config_tester
.
run_common_tests
()
def
test_model
(
self
):
def
test_model
(
self
):
model_tester
=
GPTModelTester
(
self
,
config_class
=
OpenAIGPTConfig
,
base_model_class
=
OpenAIGPTModel
,
model_tester
=
CommonTestCases
.
GPTModelTester
(
self
,
config_class
=
OpenAIGPTConfig
,
base_model_class
=
OpenAIGPTModel
,
lm_head_model_class
=
OpenAIGPTLMHeadModel
,
lm_head_model_class
=
OpenAIGPTLMHeadModel
,
double_head_model_class
=
OpenAIGPTDoubleHeadsModel
)
double_head_model_class
=
OpenAIGPTDoubleHeadsModel
)
model_tester
.
run_common_tests
(
test_presents
=
False
)
model_tester
.
run_common_tests
(
test_presents
=
False
)
@
pytest
.
mark
.
slow
@
pytest
.
mark
.
slow
def
test_pretrained
(
self
):
def
test_pretrained
(
self
):
model_tester
=
GPTModelTester
(
self
,
config_class
=
OpenAIGPTConfig
,
base_model_class
=
OpenAIGPTModel
,
model_tester
=
CommonTestCases
.
GPTModelTester
(
self
,
config_class
=
OpenAIGPTConfig
,
base_model_class
=
OpenAIGPTModel
,
lm_head_model_class
=
OpenAIGPTLMHeadModel
,
lm_head_model_class
=
OpenAIGPTLMHeadModel
,
double_head_model_class
=
OpenAIGPTDoubleHeadsModel
)
double_head_model_class
=
OpenAIGPTDoubleHeadsModel
)
model_tester
.
run_slow_tests
()
model_tester
.
run_slow_tests
()
...
...
pytorch_transformers/tests/modeling_transfo_xl_test.py
View file @
2918b7d2
...
@@ -28,9 +28,15 @@ import torch
...
@@ -28,9 +28,15 @@ import torch
from
pytorch_transformers
import
(
TransfoXLConfig
,
TransfoXLModel
,
TransfoXLLMHeadModel
)
from
pytorch_transformers
import
(
TransfoXLConfig
,
TransfoXLModel
,
TransfoXLLMHeadModel
)
from
pytorch_transformers.modeling_transfo_xl
import
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
from
pytorch_transformers.modeling_transfo_xl
import
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
from
.modeling_common_test
import
ConfigTester
,
create_and_check_commons
,
ids_tensor
from
.modeling_common_test
import
ConfigTester
,
CommonTestCases
,
ids_tensor
class
TransfoXLModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
TransfoXLModel
,
TransfoXLLMHeadModel
)
test_pruning
=
False
test_torchscript
=
False
test_resize_embeddings
=
False
class
TransfoXLModelTest
(
unittest
.
TestCase
):
class
TransfoXLModelTester
(
object
):
class
TransfoXLModelTester
(
object
):
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -52,7 +58,6 @@ class TransfoXLModelTest(unittest.TestCase):
...
@@ -52,7 +58,6 @@ class TransfoXLModelTest(unittest.TestCase):
num_hidden_layers
=
5
,
num_hidden_layers
=
5
,
scope
=
None
,
scope
=
None
,
seed
=
1
,
seed
=
1
,
all_model_classes
=
(
TransfoXLModel
,
TransfoXLLMHeadModel
),
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
batch_size
=
batch_size
...
@@ -73,7 +78,6 @@ class TransfoXLModelTest(unittest.TestCase):
...
@@ -73,7 +78,6 @@ class TransfoXLModelTest(unittest.TestCase):
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_hidden_layers
=
num_hidden_layers
self
.
scope
=
scope
self
.
scope
=
scope
self
.
seed
=
seed
self
.
seed
=
seed
self
.
all_model_classes
=
all_model_classes
def
prepare_config_and_inputs
(
self
):
def
prepare_config_and_inputs
(
self
):
input_ids_1
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_ids_1
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
...
@@ -171,16 +175,31 @@ class TransfoXLModelTest(unittest.TestCase):
...
@@ -171,16 +175,31 @@ class TransfoXLModelTest(unittest.TestCase):
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_2"
]),
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_2"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
)
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
)
def
create_and_check_transfo_xl_commons
(
self
,
config
,
input_ids_1
,
input_ids_2
,
lm_labels
):
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids_1
,
input_ids_2
,
lm_labels
)
=
config_and_inputs
inputs_dict
=
{
'input_ids'
:
input_ids_1
}
inputs_dict
=
{
'input_ids'
:
input_ids_1
}
create_and_check_commons
(
self
,
config
,
inputs_dict
,
test_pruning
=
False
,
test_torchscript
=
False
)
return
config
,
inputs_dict
def
test_default
(
self
):
def
setUp
(
self
):
self
.
run_tester
(
TransfoXLModelTest
.
TransfoXLModelTester
(
self
))
self
.
model_tester
=
TransfoXLModelTest
.
TransfoXLModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
TransfoXLConfig
,
d_embed
=
37
)
def
test_config
(
self
):
def
test_config
(
self
):
config_tester
=
ConfigTester
(
self
,
config_class
=
TransfoXLConfig
,
d_embed
=
37
)
self
.
config_tester
.
run_common_tests
()
config_tester
.
run_common_tests
()
def
test_transfo_xl_model
(
self
):
self
.
model_tester
.
set_seed
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
output_result
=
self
.
model_tester
.
create_transfo_xl_model
(
*
config_and_inputs
)
self
.
model_tester
.
check_transfo_xl_model_output
(
output_result
)
def
test_transfo_xl_lm_head
(
self
):
self
.
model_tester
.
set_seed
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
output_result
=
self
.
model_tester
.
create_transfo_xl_lm_head
(
*
config_and_inputs
)
self
.
model_tester
.
check_transfo_xl_lm_head_output
(
output_result
)
@
pytest
.
mark
.
slow
@
pytest
.
mark
.
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
...
@@ -190,23 +209,6 @@ class TransfoXLModelTest(unittest.TestCase):
...
@@ -190,23 +209,6 @@ class TransfoXLModelTest(unittest.TestCase):
shutil
.
rmtree
(
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
def
run_tester
(
self
,
tester
):
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
tester
.
set_seed
()
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
output_result
=
tester
.
create_transfo_xl_model
(
*
config_and_inputs
)
tester
.
check_transfo_xl_model_output
(
output_result
)
tester
.
set_seed
()
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
output_result
=
tester
.
create_transfo_xl_lm_head
(
*
config_and_inputs
)
tester
.
check_transfo_xl_lm_head_output
(
output_result
)
tester
.
set_seed
()
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
tester
.
create_and_check_transfo_xl_commons
(
*
config_and_inputs
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
pytorch_transformers/tests/modeling_xlm_test.py
View file @
2918b7d2
...
@@ -23,10 +23,15 @@ import pytest
...
@@ -23,10 +23,15 @@ import pytest
from
pytorch_transformers
import
(
XLMConfig
,
XLMModel
,
XLMWithLMHeadModel
,
XLMForQuestionAnswering
,
XLMForSequenceClassification
)
from
pytorch_transformers
import
(
XLMConfig
,
XLMModel
,
XLMWithLMHeadModel
,
XLMForQuestionAnswering
,
XLMForSequenceClassification
)
from
pytorch_transformers.modeling_xlm
import
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
from
pytorch_transformers.modeling_xlm
import
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
from
.modeling_common_test
import
(
create_and_check_common
s
,
ConfigTester
,
ids_tensor
)
from
.modeling_common_test
import
(
CommonTestCase
s
,
ConfigTester
,
ids_tensor
)
class
XLMModelTest
(
unittest
.
TestCase
):
class
XLMModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
XLMModel
,
XLMWithLMHeadModel
,
XLMForQuestionAnswering
,
XLMForSequenceClassification
)
# , XLMForSequenceClassification, XLMForTokenClassification),
class
XLMModelTester
(
object
):
class
XLMModelTester
(
object
):
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -58,8 +63,6 @@ class XLMModelTest(unittest.TestCase):
...
@@ -58,8 +63,6 @@ class XLMModelTest(unittest.TestCase):
summary_type
=
"last"
,
summary_type
=
"last"
,
use_proj
=
True
,
use_proj
=
True
,
scope
=
None
,
scope
=
None
,
all_model_classes
=
(
XLMModel
,
XLMWithLMHeadModel
,
XLMForQuestionAnswering
,
XLMForSequenceClassification
),
# , XLMForSequenceClassification, XLMForTokenClassification),
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
batch_size
=
batch_size
...
@@ -90,7 +93,6 @@ class XLMModelTest(unittest.TestCase):
...
@@ -90,7 +93,6 @@ class XLMModelTest(unittest.TestCase):
self
.
num_labels
=
num_labels
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
num_choices
=
num_choices
self
.
scope
=
scope
self
.
scope
=
scope
self
.
all_model_classes
=
all_model_classes
def
prepare_config_and_inputs
(
self
):
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
...
@@ -237,28 +239,23 @@ class XLMModelTest(unittest.TestCase):
...
@@ -237,28 +239,23 @@ class XLMModelTest(unittest.TestCase):
[
self
.
batch_size
,
self
.
type_sequence_label_size
])
[
self
.
batch_size
,
self
.
type_sequence_label_size
])
def
create_and_check_xlm_commons
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
):
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
)
=
config_and_inputs
inputs_dict
=
{
'input_ids'
:
input_ids
,
'token_type_ids'
:
token_type_ids
,
'lengths'
:
input_lengths
}
inputs_dict
=
{
'input_ids'
:
input_ids
,
'token_type_ids'
:
token_type_ids
,
'lengths'
:
input_lengths
}
c
re
ate_and_check_commons
(
self
,
config
,
inputs_dict
)
re
turn
config
,
inputs_dict
def
test_default
(
self
):
def
setUp
(
self
):
self
.
run_tester
(
XLMModelTest
.
XLMModelTester
(
self
))
self
.
model_tester
=
XLMModelTest
.
XLMModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
XLMConfig
,
emb_dim
=
37
)
def
test_config
(
self
):
def
test_config
(
self
):
config_tester
=
ConfigTester
(
self
,
config_class
=
XLMConfig
,
emb_dim
=
37
)
self
.
config_tester
.
run_common_tests
()
config_tester
.
run_common_tests
()
@
pytest
.
mark
.
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/pytorch_transformers_test/"
for
model_name
in
list
(
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
XLMModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
def
run_tester
(
self
,
tester
):
def
test_xlm_model
(
self
):
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
model_
tester
.
prepare_config_and_inputs
()
tester
.
create_and_check_xlm_model
(
*
config_and_inputs
)
self
.
model_
tester
.
create_and_check_xlm_model
(
*
config_and_inputs
)
# config_and_inputs = tester.prepare_config_and_inputs()
# config_and_inputs = tester.prepare_config_and_inputs()
# tester.create_and_check_xlm_for_masked_lm(*config_and_inputs)
# tester.create_and_check_xlm_for_masked_lm(*config_and_inputs)
...
@@ -275,8 +272,14 @@ class XLMModelTest(unittest.TestCase):
...
@@ -275,8 +272,14 @@ class XLMModelTest(unittest.TestCase):
# config_and_inputs = tester.prepare_config_and_inputs()
# config_and_inputs = tester.prepare_config_and_inputs()
# tester.create_and_check_xlm_for_token_classification(*config_and_inputs)
# tester.create_and_check_xlm_for_token_classification(*config_and_inputs)
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
@
pytest
.
mark
.
slow
tester
.
create_and_check_xlm_commons
(
*
config_and_inputs
)
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/pytorch_transformers_test/"
for
model_name
in
list
(
XLM_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
XLMModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
pytorch_transformers/tests/modeling_xlnet_test.py
View file @
2918b7d2
...
@@ -28,9 +28,14 @@ import torch
...
@@ -28,9 +28,14 @@ import torch
from
pytorch_transformers
import
(
XLNetConfig
,
XLNetModel
,
XLNetLMHeadModel
,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
)
from
pytorch_transformers
import
(
XLNetConfig
,
XLNetModel
,
XLNetLMHeadModel
,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
)
from
pytorch_transformers.modeling_xlnet
import
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
from
pytorch_transformers.modeling_xlnet
import
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
from
.modeling_common_test
import
ConfigTester
,
create_and_check_commons
,
ids_tensor
from
.modeling_common_test
import
ConfigTester
,
CommonTestCases
,
ids_tensor
class
XLNetModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
XLNetModel
,
XLNetLMHeadModel
,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
)
test_pruning
=
False
class
XLNetModelTest
(
unittest
.
TestCase
):
class
XLNetModelTester
(
object
):
class
XLNetModelTester
(
object
):
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -56,8 +61,6 @@ class XLNetModelTest(unittest.TestCase):
...
@@ -56,8 +61,6 @@ class XLNetModelTest(unittest.TestCase):
initializer_range
=
0.05
,
initializer_range
=
0.05
,
seed
=
1
,
seed
=
1
,
type_vocab_size
=
2
,
type_vocab_size
=
2
,
all_model_classes
=
(
XLNetModel
,
XLNetLMHeadModel
,
XLNetForSequenceClassification
,
XLNetForQuestionAnswering
),
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
batch_size
=
batch_size
...
@@ -82,7 +85,6 @@ class XLNetModelTest(unittest.TestCase):
...
@@ -82,7 +85,6 @@ class XLNetModelTest(unittest.TestCase):
self
.
seed
=
seed
self
.
seed
=
seed
self
.
type_vocab_size
=
type_vocab_size
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
all_model_classes
=
all_model_classes
def
prepare_config_and_inputs
(
self
):
def
prepare_config_and_inputs
(
self
):
input_ids_1
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_ids_1
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
...
@@ -264,17 +266,41 @@ class XLNetModelTest(unittest.TestCase):
...
@@ -264,17 +266,41 @@ class XLNetModelTest(unittest.TestCase):
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_1"
]),
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_1"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
)
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
)
def
create_and_check_xlnet_commons
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
def
prepare_config_and_inputs_for_common
(
self
):
target_mapping
,
inp_q
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
inp_q
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
)
=
config_and_inputs
inputs_dict
=
{
'input_ids'
:
input_ids_1
}
inputs_dict
=
{
'input_ids'
:
input_ids_1
}
create_and_check_commons
(
self
,
config
,
inputs_dict
,
test_pruning
=
False
)
return
config
,
inputs_dict
def
test_default
(
self
):
def
setUp
(
self
):
self
.
run_tester
(
XLNetModelTest
.
XLNetModelTester
(
self
))
self
.
model_tester
=
XLNetModelTest
.
XLNetModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
XLNetConfig
,
d_inner
=
37
)
def
test_config
(
self
):
def
test_config
(
self
):
config_tester
=
ConfigTester
(
self
,
config_class
=
XLNetConfig
,
d_inner
=
37
)
self
.
config_tester
.
run_common_tests
()
config_tester
.
run_common_tests
()
def
test_xlnet_base_model
(
self
):
self
.
model_tester
.
set_seed
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_xlnet_base_model
(
*
config_and_inputs
)
def
test_xlnet_lm_head
(
self
):
self
.
model_tester
.
set_seed
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_xlnet_lm_head
(
*
config_and_inputs
)
def
test_xlnet_sequence_classif
(
self
):
self
.
model_tester
.
set_seed
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_xlnet_sequence_classif
(
*
config_and_inputs
)
def
test_xlnet_qa
(
self
):
self
.
model_tester
.
set_seed
()
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_xlnet_qa
(
*
config_and_inputs
)
@
pytest
.
mark
.
slow
@
pytest
.
mark
.
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
...
@@ -284,27 +310,6 @@ class XLNetModelTest(unittest.TestCase):
...
@@ -284,27 +310,6 @@ class XLNetModelTest(unittest.TestCase):
shutil
.
rmtree
(
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
def
run_tester
(
self
,
tester
):
tester
.
set_seed
()
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
tester
.
create_and_check_xlnet_base_model
(
*
config_and_inputs
)
tester
.
set_seed
()
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
tester
.
create_and_check_xlnet_lm_head
(
*
config_and_inputs
)
tester
.
set_seed
()
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
tester
.
create_and_check_xlnet_sequence_classif
(
*
config_and_inputs
)
tester
.
set_seed
()
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
tester
.
create_and_check_xlnet_qa
(
*
config_and_inputs
)
tester
.
set_seed
()
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
tester
.
create_and_check_xlnet_commons
(
*
config_and_inputs
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment