Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
1ab25c49
Commit
1ab25c49
authored
Dec 21, 2019
by
thomwolf
Browse files
Merge branch 'master' into pr/2115
parents
df396112
18601c3b
Changes
143
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
653 additions
and
205 deletions
+653
-205
transformers/tests/modeling_common_test.py
transformers/tests/modeling_common_test.py
+110
-70
transformers/tests/modeling_ctrl_test.py
transformers/tests/modeling_ctrl_test.py
+3
-6
transformers/tests/modeling_distilbert_test.py
transformers/tests/modeling_distilbert_test.py
+3
-5
transformers/tests/modeling_gpt2_test.py
transformers/tests/modeling_gpt2_test.py
+3
-6
transformers/tests/modeling_openai_test.py
transformers/tests/modeling_openai_test.py
+3
-6
transformers/tests/modeling_roberta_test.py
transformers/tests/modeling_roberta_test.py
+55
-6
transformers/tests/modeling_t5_test.py
transformers/tests/modeling_t5_test.py
+182
-0
transformers/tests/modeling_tf_albert_test.py
transformers/tests/modeling_tf_albert_test.py
+4
-9
transformers/tests/modeling_tf_auto_test.py
transformers/tests/modeling_tf_auto_test.py
+14
-9
transformers/tests/modeling_tf_bert_test.py
transformers/tests/modeling_tf_bert_test.py
+3
-6
transformers/tests/modeling_tf_common_test.py
transformers/tests/modeling_tf_common_test.py
+79
-31
transformers/tests/modeling_tf_ctrl_test.py
transformers/tests/modeling_tf_ctrl_test.py
+3
-6
transformers/tests/modeling_tf_distilbert_test.py
transformers/tests/modeling_tf_distilbert_test.py
+3
-5
transformers/tests/modeling_tf_gpt2_test.py
transformers/tests/modeling_tf_gpt2_test.py
+3
-6
transformers/tests/modeling_tf_openai_gpt_test.py
transformers/tests/modeling_tf_openai_gpt_test.py
+3
-6
transformers/tests/modeling_tf_roberta_test.py
transformers/tests/modeling_tf_roberta_test.py
+3
-6
transformers/tests/modeling_tf_t5_test.py
transformers/tests/modeling_tf_t5_test.py
+169
-0
transformers/tests/modeling_tf_transfo_xl_test.py
transformers/tests/modeling_tf_transfo_xl_test.py
+4
-7
transformers/tests/modeling_tf_xlm_test.py
transformers/tests/modeling_tf_xlm_test.py
+3
-6
transformers/tests/modeling_tf_xlnet_test.py
transformers/tests/modeling_tf_xlnet_test.py
+3
-9
No files found.
transformers/tests/modeling_common_test.py
View file @
1ab25c49
...
@@ -18,7 +18,7 @@ from __future__ import print_function
...
@@ -18,7 +18,7 @@ from __future__ import print_function
import
copy
import
copy
import
sys
import
sys
import
os
import
os
.path
import
shutil
import
shutil
import
tempfile
import
tempfile
import
json
import
json
...
@@ -30,7 +30,7 @@ import logging
...
@@ -30,7 +30,7 @@ import logging
from
transformers
import
is_torch_available
from
transformers
import
is_torch_available
from
.utils
import
require_torch
,
slow
,
torch_device
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
if
is_torch_available
():
if
is_torch_available
():
import
torch
import
torch
...
@@ -58,7 +58,7 @@ else:
...
@@ -58,7 +58,7 @@ else:
def
_config_zero_init
(
config
):
def
_config_zero_init
(
config
):
configs_no_init
=
copy
.
deepcopy
(
config
)
configs_no_init
=
copy
.
deepcopy
(
config
)
for
key
in
configs_no_init
.
__dict__
.
keys
():
for
key
in
configs_no_init
.
__dict__
.
keys
():
if
'_range'
in
key
or
'_std'
in
key
:
if
'_range'
in
key
or
'_std'
in
key
or
'initializer_factor'
in
key
:
setattr
(
configs_no_init
,
key
,
0.0
)
setattr
(
configs_no_init
,
key
,
0.0
)
return
configs_no_init
return
configs_no_init
...
@@ -73,6 +73,7 @@ class CommonTestCases:
...
@@ -73,6 +73,7 @@ class CommonTestCases:
test_pruning
=
True
test_pruning
=
True
test_resize_embeddings
=
True
test_resize_embeddings
=
True
test_head_masking
=
True
test_head_masking
=
True
is_encoder_decoder
=
False
def
test_save_load
(
self
):
def
test_save_load
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
@@ -83,6 +84,8 @@ class CommonTestCases:
...
@@ -83,6 +84,8 @@ class CommonTestCases:
model
.
eval
()
model
.
eval
()
with
torch
.
no_grad
():
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
outputs
=
model
(
**
inputs_dict
)
out_2
=
outputs
[
0
].
numpy
()
out_2
[
np
.
isnan
(
out_2
)]
=
0
with
TemporaryDirectory
()
as
tmpdirname
:
with
TemporaryDirectory
()
as
tmpdirname
:
model
.
save_pretrained
(
tmpdirname
)
model
.
save_pretrained
(
tmpdirname
)
...
@@ -93,9 +96,7 @@ class CommonTestCases:
...
@@ -93,9 +96,7 @@ class CommonTestCases:
# Make sure we don't have nans
# Make sure we don't have nans
out_1
=
after_outputs
[
0
].
cpu
().
numpy
()
out_1
=
after_outputs
[
0
].
cpu
().
numpy
()
out_2
=
outputs
[
0
].
cpu
().
numpy
()
out_1
[
np
.
isnan
(
out_1
)]
=
0
out_1
=
out_1
[
~
np
.
isnan
(
out_1
)]
out_2
=
out_2
[
~
np
.
isnan
(
out_2
)]
max_diff
=
np
.
amax
(
np
.
abs
(
out_1
-
out_2
))
max_diff
=
np
.
amax
(
np
.
abs
(
out_1
-
out_2
))
self
.
assertLessEqual
(
max_diff
,
1e-5
)
self
.
assertLessEqual
(
max_diff
,
1e-5
)
...
@@ -117,20 +118,32 @@ class CommonTestCases:
...
@@ -117,20 +118,32 @@ class CommonTestCases:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
first
,
second
=
model
(
inputs_dict
[
"input_ids"
])[
0
],
model
(
inputs_dict
[
"input_ids"
])[
0
]
with
torch
.
no_grad
():
self
.
assertEqual
(
first
.
ne
(
second
).
sum
().
item
(),
0
)
first
=
model
(
**
inputs_dict
)[
0
]
second
=
model
(
**
inputs_dict
)[
0
]
out_1
=
first
.
cpu
().
numpy
()
out_2
=
second
.
cpu
().
numpy
()
out_1
=
out_1
[
~
np
.
isnan
(
out_1
)]
out_2
=
out_2
[
~
np
.
isnan
(
out_2
)]
max_diff
=
np
.
amax
(
np
.
abs
(
out_1
-
out_2
))
self
.
assertLessEqual
(
max_diff
,
1e-5
)
def
test_attention_outputs
(
self
):
def
test_attention_outputs
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
decoder_seq_length
=
self
.
model_tester
.
decoder_seq_length
if
hasattr
(
self
.
model_tester
,
'decoder_seq_length'
)
else
self
.
model_tester
.
seq_length
encoder_seq_length
=
self
.
model_tester
.
encoder_seq_length
if
hasattr
(
self
.
model_tester
,
'encoder_seq_length'
)
else
self
.
model_tester
.
seq_length
decoder_key_length
=
self
.
model_tester
.
key_length
if
hasattr
(
self
.
model_tester
,
'key_length'
)
else
decoder_seq_length
encoder_key_length
=
self
.
model_tester
.
key_length
if
hasattr
(
self
.
model_tester
,
'key_length'
)
else
encoder_seq_length
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
config
.
output_attentions
=
True
config
.
output_attentions
=
True
config
.
output_hidden_states
=
False
config
.
output_hidden_states
=
False
model
=
model_class
(
config
)
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
...
@@ -138,28 +151,42 @@ class CommonTestCases:
...
@@ -138,28 +151,42 @@ class CommonTestCases:
self
.
assertListEqual
(
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
list
(
attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
[
self
.
model_tester
.
num_attention_heads
,
self
.
model_tester
.
seq_length
,
encoder_
seq_length
,
self
.
model_tester
.
key_len
if
hasattr
(
self
.
model_tester
,
'key_len'
)
else
self
.
model_tester
.
seq
_length
])
encoder_key
_length
])
out_len
=
len
(
outputs
)
out_len
=
len
(
outputs
)
if
self
.
is_encoder_decoder
:
self
.
assertEqual
(
out_len
%
2
,
0
)
decoder_attentions
=
outputs
[(
out_len
//
2
)
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
self
.
assertEqual
(
len
(
decoder_attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertListEqual
(
list
(
decoder_attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
decoder_seq_length
,
decoder_key_length
])
# Check attention is always last and order is fine
# Check attention is always last and order is fine
config
.
output_attentions
=
True
config
.
output_attentions
=
True
config
.
output_hidden_states
=
True
config
.
output_hidden_states
=
True
model
=
model_class
(
config
)
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
self
.
assertEqual
(
out_len
+
1
,
len
(
outputs
))
outputs
=
model
(
**
inputs_dict
)
self
.
assertEqual
(
out_len
+
(
2
if
self
.
is_encoder_decoder
else
1
),
len
(
outputs
))
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
attentions
=
outputs
[
-
1
]
self_
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
len
(
attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertEqual
(
len
(
self_
attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertListEqual
(
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
list
(
self_
attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
[
self
.
model_tester
.
num_attention_heads
,
self
.
model_tester
.
seq_length
,
encoder_
seq_length
,
self
.
model_tester
.
key_len
if
hasattr
(
self
.
model_tester
,
'key_len'
)
else
self
.
model_tester
.
seq
_length
])
encoder_key
_length
])
def
test_torchscript
(
self
):
def
test_torchscript
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
@@ -191,21 +218,22 @@ class CommonTestCases:
...
@@ -191,21 +218,22 @@ class CommonTestCases:
inputs
=
inputs_dict
[
'input_ids'
]
# Let's keep only input_ids
inputs
=
inputs_dict
[
'input_ids'
]
# Let's keep only input_ids
try
:
try
:
torch
.
jit
.
trace
(
model
,
inputs
)
traced_gpt2
=
torch
.
jit
.
trace
(
model
,
inputs
)
except
RuntimeError
:
except
RuntimeError
:
self
.
fail
(
"Couldn't trace module."
)
self
.
fail
(
"Couldn't trace module."
)
try
:
with
TemporaryDirectory
()
as
tmp_dir_name
:
traced_gpt2
=
torch
.
jit
.
trace
(
model
,
inputs
)
pt_file_name
=
os
.
path
.
join
(
tmp_dir_name
,
"traced_model.pt"
)
torch
.
jit
.
save
(
traced_gpt2
,
"traced_model.pt"
)
except
RuntimeError
:
self
.
fail
(
"Couldn't save module."
)
try
:
try
:
loaded_model
=
torch
.
jit
.
load
(
"traced_model.pt"
)
torch
.
jit
.
save
(
traced_gpt2
,
pt_file_name
)
os
.
remove
(
"traced_model.pt"
)
except
Exception
:
except
ValueError
:
self
.
fail
(
"Couldn't save module."
)
self
.
fail
(
"Couldn't load module."
)
try
:
loaded_model
=
torch
.
jit
.
load
(
pt_file_name
)
except
Exception
:
self
.
fail
(
"Couldn't load module."
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
...
@@ -223,7 +251,6 @@ class CommonTestCases:
...
@@ -223,7 +251,6 @@ class CommonTestCases:
self
.
assertTrue
(
models_equal
)
self
.
assertTrue
(
models_equal
)
def
test_headmasking
(
self
):
def
test_headmasking
(
self
):
if
not
self
.
test_head_masking
:
if
not
self
.
test_head_masking
:
return
return
...
@@ -278,7 +305,6 @@ class CommonTestCases:
...
@@ -278,7 +305,6 @@ class CommonTestCases:
self
.
assertNotEqual
(
self
.
assertNotEqual
(
attentions
[
-
1
][...,
-
1
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
attentions
[
-
1
][...,
-
1
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
def
test_head_pruning
(
self
):
def
test_head_pruning
(
self
):
if
not
self
.
test_pruning
:
if
not
self
.
test_pruning
:
return
return
...
@@ -297,7 +323,8 @@ class CommonTestCases:
...
@@ -297,7 +323,8 @@ class CommonTestCases:
heads_to_prune
=
{
0
:
list
(
range
(
1
,
self
.
model_tester
.
num_attention_heads
)),
heads_to_prune
=
{
0
:
list
(
range
(
1
,
self
.
model_tester
.
num_attention_heads
)),
-
1
:
[
0
]}
-
1
:
[
0
]}
model
.
prune_heads
(
heads_to_prune
)
model
.
prune_heads
(
heads_to_prune
)
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
attentions
=
outputs
[
-
1
]
...
@@ -326,20 +353,19 @@ class CommonTestCases:
...
@@ -326,20 +353,19 @@ class CommonTestCases:
heads_to_prune
=
{
0
:
list
(
range
(
1
,
self
.
model_tester
.
num_attention_heads
)),
heads_to_prune
=
{
0
:
list
(
range
(
1
,
self
.
model_tester
.
num_attention_heads
)),
-
1
:
[
0
]}
-
1
:
[
0
]}
model
.
prune_heads
(
heads_to_prune
)
model
.
prune_heads
(
heads_to_prune
)
directory
=
"pruned_model"
if
not
os
.
path
.
exists
(
directory
):
os
.
makedirs
(
directory
)
model
.
save_pretrained
(
directory
)
model
=
model_class
.
from_pretrained
(
directory
)
model
.
to
(
torch_device
)
outputs
=
model
(
**
inputs_dict
)
with
TemporaryDirectory
()
as
temp_dir_name
:
model
.
save_pretrained
(
temp_dir_name
)
model
=
model_class
.
from_pretrained
(
temp_dir_name
)
model
.
to
(
torch_device
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
1
)
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
1
)
self
.
assertEqual
(
attentions
[
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
self
.
assertEqual
(
attentions
[
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
self
.
assertEqual
(
attentions
[
-
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
self
.
assertEqual
(
attentions
[
-
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
shutil
.
rmtree
(
directory
)
def
test_head_pruning_save_load_from_config_init
(
self
):
def
test_head_pruning_save_load_from_config_init
(
self
):
if
not
self
.
test_pruning
:
if
not
self
.
test_pruning
:
...
@@ -362,7 +388,8 @@ class CommonTestCases:
...
@@ -362,7 +388,8 @@ class CommonTestCases:
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
1
)
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
1
)
...
@@ -389,7 +416,8 @@ class CommonTestCases:
...
@@ -389,7 +416,8 @@ class CommonTestCases:
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
...
@@ -397,16 +425,13 @@ class CommonTestCases:
...
@@ -397,16 +425,13 @@ class CommonTestCases:
self
.
assertEqual
(
attentions
[
2
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
self
.
assertEqual
(
attentions
[
2
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
self
.
assertEqual
(
attentions
[
3
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
self
.
assertEqual
(
attentions
[
3
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
directory
=
"pruned_model"
with
TemporaryDirectory
()
as
temp_dir_name
:
model
.
save_pretrained
(
temp_dir_name
)
if
not
os
.
path
.
exists
(
directory
):
model
=
model_class
.
from_pretrained
(
temp_dir_name
)
os
.
makedirs
(
directory
)
model
.
to
(
torch_device
)
model
.
save_pretrained
(
directory
)
model
=
model_class
.
from_pretrained
(
directory
)
model
.
to
(
torch_device
)
shutil
.
rmtree
(
directory
)
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
...
@@ -417,7 +442,8 @@ class CommonTestCases:
...
@@ -417,7 +442,8 @@ class CommonTestCases:
heads_to_prune
=
{
0
:
[
0
],
2
:
[
1
,
2
]}
heads_to_prune
=
{
0
:
[
0
],
2
:
[
1
,
2
]}
model
.
prune_heads
(
heads_to_prune
)
model
.
prune_heads
(
heads_to_prune
)
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
...
@@ -427,7 +453,6 @@ class CommonTestCases:
...
@@ -427,7 +453,6 @@ class CommonTestCases:
self
.
assertDictEqual
(
model
.
config
.
pruned_heads
,
{
0
:
[
0
],
1
:
[
1
,
2
],
2
:
[
1
,
2
]})
self
.
assertDictEqual
(
model
.
config
.
pruned_heads
,
{
0
:
[
0
],
1
:
[
1
,
2
],
2
:
[
1
,
2
]})
def
test_hidden_states_output
(
self
):
def
test_hidden_states_output
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
@@ -437,14 +462,16 @@ class CommonTestCases:
...
@@ -437,14 +462,16 @@ class CommonTestCases:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
hidden_states
=
outputs
[
-
1
]
hidden_states
=
outputs
[
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
False
)
self
.
assertEqual
(
model
.
config
.
output_attentions
,
False
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
len
(
hidden_states
),
self
.
model_tester
.
num_hidden_layers
+
1
)
self
.
assertEqual
(
len
(
hidden_states
),
self
.
model_tester
.
num_hidden_layers
+
1
)
self
.
assertListEqual
(
self
.
assertListEqual
(
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
])
[
self
.
model_tester
.
encoder_seq_length
if
hasattr
(
self
.
model_tester
,
'encoder_seq_length'
)
else
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
])
def
test_resize_tokens_embeddings
(
self
):
def
test_resize_tokens_embeddings
(
self
):
original_config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
original_config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
@@ -550,8 +577,14 @@ class CommonTestCases:
...
@@ -550,8 +577,14 @@ class CommonTestCases:
def
test_inputs_embeds
(
self
):
def
test_inputs_embeds
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
input_ids
=
inputs_dict
[
"input_ids"
]
if
not
self
.
is_encoder_decoder
:
del
inputs_dict
[
"input_ids"
]
input_ids
=
inputs_dict
[
"input_ids"
]
del
inputs_dict
[
"input_ids"
]
else
:
encoder_input_ids
=
inputs_dict
[
"encoder_input_ids"
]
decoder_input_ids
=
inputs_dict
[
"decoder_input_ids"
]
del
inputs_dict
[
"encoder_input_ids"
]
del
inputs_dict
[
"decoder_input_ids"
]
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
...
@@ -559,9 +592,14 @@ class CommonTestCases:
...
@@ -559,9 +592,14 @@ class CommonTestCases:
model
.
eval
()
model
.
eval
()
wte
=
model
.
get_input_embeddings
()
wte
=
model
.
get_input_embeddings
()
inputs_dict
[
"inputs_embeds"
]
=
wte
(
input_ids
)
if
not
self
.
is_encoder_decoder
:
outputs
=
model
(
**
inputs_dict
)
inputs_dict
[
"inputs_embeds"
]
=
wte
(
input_ids
)
else
:
inputs_dict
[
"encoder_inputs_embeds"
]
=
wte
(
encoder_input_ids
)
inputs_dict
[
"decoder_inputs_embeds"
]
=
wte
(
decoder_input_ids
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
class
GPTModelTester
(
CommonModelTester
):
class
GPTModelTester
(
CommonModelTester
):
...
@@ -633,7 +671,7 @@ class CommonTestCases:
...
@@ -633,7 +671,7 @@ class CommonTestCases:
mc_token_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
],
self
.
seq_length
)
mc_token_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
],
self
.
seq_length
)
config
=
self
.
config_class
(
config
=
self
.
config_class
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
n_positions
=
self
.
n_positions
,
n_positions
=
self
.
n_positions
,
n_embd
=
self
.
hidden_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_layer
=
self
.
num_hidden_layers
,
...
@@ -649,9 +687,10 @@ class CommonTestCases:
...
@@ -649,9 +687,10 @@ class CommonTestCases:
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
)
with
torch
.
no_grad
():
outputs
=
model
(
input_ids
,
position_ids
)
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
)
outputs
=
model
(
input_ids
)
outputs
=
model
(
input_ids
,
position_ids
)
outputs
=
model
(
input_ids
)
hidden_state
=
outputs
[
0
]
hidden_state
=
outputs
[
0
]
self
.
parent
.
assertListEqual
(
self
.
parent
.
assertListEqual
(
...
@@ -664,7 +703,8 @@ class CommonTestCases:
...
@@ -664,7 +703,8 @@ class CommonTestCases:
model
=
self
.
lm_head_model_class
(
config
)
model
=
self
.
lm_head_model_class
(
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
,
lm_labels
)
with
torch
.
no_grad
():
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
,
lm_labels
)
loss
,
lm_logits
=
outputs
[:
2
]
loss
,
lm_logits
=
outputs
[:
2
]
total_voc
=
self
.
vocab_size
total_voc
=
self
.
vocab_size
...
@@ -681,7 +721,8 @@ class CommonTestCases:
...
@@ -681,7 +721,8 @@ class CommonTestCases:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
input_ids
)
with
torch
.
no_grad
():
outputs
=
model
(
input_ids
)
presents
=
outputs
[
-
1
]
presents
=
outputs
[
-
1
]
self
.
parent
.
assertEqual
(
self
.
num_hidden_layers
,
len
(
presents
))
self
.
parent
.
assertEqual
(
self
.
num_hidden_layers
,
len
(
presents
))
self
.
parent
.
assertListEqual
(
self
.
parent
.
assertListEqual
(
...
@@ -694,7 +735,8 @@ class CommonTestCases:
...
@@ -694,7 +735,8 @@ class CommonTestCases:
model
=
self
.
double_head_model_class
(
config
)
model
=
self
.
double_head_model_class
(
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
outputs
=
model
(
input_ids
,
mc_token_ids
,
lm_labels
=
lm_labels
,
mc_labels
=
mc_labels
,
with
torch
.
no_grad
():
outputs
=
model
(
input_ids
,
mc_token_ids
,
lm_labels
=
lm_labels
,
mc_labels
=
mc_labels
,
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
)
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
)
lm_loss
,
mc_loss
,
lm_logits
,
mc_logits
=
outputs
[:
4
]
lm_loss
,
mc_loss
,
lm_logits
,
mc_logits
=
outputs
[:
4
]
loss
=
[
lm_loss
,
mc_loss
]
loss
=
[
lm_loss
,
mc_loss
]
...
@@ -711,10 +753,8 @@ class CommonTestCases:
...
@@ -711,10 +753,8 @@ class CommonTestCases:
[[],
[]])
[[],
[]])
def
create_and_check_model_from_pretrained
(
self
):
def
create_and_check_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
self
.
base_model_class
.
pretrained_model_archive_map
.
keys
())[:
1
]:
for
model_name
in
list
(
self
.
base_model_class
.
pretrained_model_archive_map
.
keys
())[:
1
]:
model
=
self
.
base_model_class
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
model
=
self
.
base_model_class
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
shutil
.
rmtree
(
cache_dir
)
self
.
parent
.
assertIsNotNone
(
model
)
self
.
parent
.
assertIsNotNone
(
model
)
def
prepare_config_and_inputs_for_common
(
self
):
def
prepare_config_and_inputs_for_common
(
self
):
...
...
transformers/tests/modeling_ctrl_test.py
View file @
1ab25c49
...
@@ -16,7 +16,6 @@ from __future__ import division
...
@@ -16,7 +16,6 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
import
unittest
import
shutil
import
pdb
import
pdb
from
transformers
import
is_torch_available
from
transformers
import
is_torch_available
...
@@ -27,7 +26,7 @@ if is_torch_available():
...
@@ -27,7 +26,7 @@ if is_torch_available():
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_torch
,
slow
,
torch_device
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
@
require_torch
@
require_torch
...
@@ -114,7 +113,7 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
...
@@ -114,7 +113,7 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
CTRLConfig
(
config
=
CTRLConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
n_head
=
self
.
num_attention_heads
,
...
@@ -205,10 +204,8 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
...
@@ -205,10 +204,8 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
@
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
CTRLModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
model
=
CTRLModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
...
...
transformers/tests/modeling_distilbert_test.py
View file @
1ab25c49
...
@@ -27,7 +27,7 @@ if is_torch_available():
...
@@ -27,7 +27,7 @@ if is_torch_available():
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_torch
,
slow
,
torch_device
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
@
require_torch
@
require_torch
...
@@ -105,7 +105,7 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
...
@@ -105,7 +105,7 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
DistilBertConfig
(
config
=
DistilBertConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
dim
=
self
.
hidden_size
,
dim
=
self
.
hidden_size
,
n_layers
=
self
.
num_hidden_layers
,
n_layers
=
self
.
num_hidden_layers
,
n_heads
=
self
.
num_attention_heads
,
n_heads
=
self
.
num_attention_heads
,
...
@@ -235,10 +235,8 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
...
@@ -235,10 +235,8 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
# @slow
# @slow
# def test_model_from_pretrained(self):
# def test_model_from_pretrained(self):
# cache_dir = "/tmp/transformers_test/"
# for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# model = DistilBertModel.from_pretrained(model_name, cache_dir=cache_dir)
# model = DistilBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
# shutil.rmtree(cache_dir)
# self.assertIsNotNone(model)
# self.assertIsNotNone(model)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
transformers/tests/modeling_gpt2_test.py
View file @
1ab25c49
...
@@ -17,7 +17,6 @@ from __future__ import division
...
@@ -17,7 +17,6 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
import
unittest
import
shutil
from
transformers
import
is_torch_available
from
transformers
import
is_torch_available
...
@@ -27,7 +26,7 @@ if is_torch_available():
...
@@ -27,7 +26,7 @@ if is_torch_available():
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_torch
,
slow
,
torch_device
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
@
require_torch
@
require_torch
...
@@ -110,7 +109,7 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester):
...
@@ -110,7 +109,7 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
GPT2Config
(
config
=
GPT2Config
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
n_head
=
self
.
num_attention_heads
,
...
@@ -239,10 +238,8 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester):
...
@@ -239,10 +238,8 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester):
@
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
GPT2Model
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
model
=
GPT2Model
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
...
...
transformers/tests/modeling_openai_test.py
View file @
1ab25c49
...
@@ -17,7 +17,6 @@ from __future__ import division
...
@@ -17,7 +17,6 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
import
unittest
import
shutil
from
transformers
import
is_torch_available
from
transformers
import
is_torch_available
...
@@ -27,7 +26,7 @@ if is_torch_available():
...
@@ -27,7 +26,7 @@ if is_torch_available():
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_torch
,
slow
,
torch_device
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
@
require_torch
@
require_torch
...
@@ -98,7 +97,7 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
...
@@ -98,7 +97,7 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
OpenAIGPTConfig
(
config
=
OpenAIGPTConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
n_head
=
self
.
num_attention_heads
,
...
@@ -207,10 +206,8 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
...
@@ -207,10 +206,8 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
@
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
OpenAIGPTModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
model
=
OpenAIGPTModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
...
...
transformers/tests/modeling_roberta_test.py
View file @
1ab25c49
...
@@ -17,7 +17,6 @@ from __future__ import division
...
@@ -17,7 +17,6 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
import
unittest
import
shutil
from
transformers
import
is_torch_available
from
transformers
import
is_torch_available
...
@@ -25,11 +24,12 @@ if is_torch_available():
...
@@ -25,11 +24,12 @@ if is_torch_available():
import
torch
import
torch
from
transformers
import
(
RobertaConfig
,
RobertaModel
,
RobertaForMaskedLM
,
from
transformers
import
(
RobertaConfig
,
RobertaModel
,
RobertaForMaskedLM
,
RobertaForSequenceClassification
,
RobertaForTokenClassification
)
RobertaForSequenceClassification
,
RobertaForTokenClassification
)
from
transformers.modeling_roberta
import
RobertaEmbeddings
from
transformers.modeling_roberta
import
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
from
transformers.modeling_roberta
import
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_torch
,
slow
,
torch_device
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
@
require_torch
@
require_torch
...
@@ -106,7 +106,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
...
@@ -106,7 +106,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
RobertaConfig
(
config
=
RobertaConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
num_attention_heads
=
self
.
num_attention_heads
,
...
@@ -199,12 +199,61 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
...
@@ -199,12 +199,61 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
@
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
RobertaModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
model
=
RobertaModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
def
test_create_position_ids_respects_padding_index
(
self
):
""" Ensure that the default position ids only assign a sequential . This is a regression
test for https://github.com/huggingface/transformers/issues/1761
The position ids should be masked with the embedding object's padding index. Therefore, the
first available non-padding position index is RobertaEmbeddings.padding_idx + 1
"""
config
=
self
.
model_tester
.
prepare_config_and_inputs
()[
0
]
model
=
RobertaEmbeddings
(
config
=
config
)
input_ids
=
torch
.
as_tensor
([[
12
,
31
,
13
,
model
.
padding_idx
]])
expected_positions
=
torch
.
as_tensor
([[
0
+
model
.
padding_idx
+
1
,
1
+
model
.
padding_idx
+
1
,
2
+
model
.
padding_idx
+
1
,
model
.
padding_idx
]])
position_ids
=
model
.
create_position_ids_from_input_ids
(
input_ids
)
self
.
assertEqual
(
position_ids
.
shape
,
expected_positions
.
shape
)
self
.
assertTrue
(
torch
.
all
(
torch
.
eq
(
position_ids
,
expected_positions
)))
def
test_create_position_ids_from_inputs_embeds
(
self
):
""" Ensure that the default position ids only assign a sequential . This is a regression
test for https://github.com/huggingface/transformers/issues/1761
The position ids should be masked with the embedding object's padding index. Therefore, the
first available non-padding position index is RobertaEmbeddings.padding_idx + 1
"""
config
=
self
.
model_tester
.
prepare_config_and_inputs
()[
0
]
embeddings
=
RobertaEmbeddings
(
config
=
config
)
inputs_embeds
=
torch
.
Tensor
(
2
,
4
,
30
)
expected_single_positions
=
[
0
+
embeddings
.
padding_idx
+
1
,
1
+
embeddings
.
padding_idx
+
1
,
2
+
embeddings
.
padding_idx
+
1
,
3
+
embeddings
.
padding_idx
+
1
,
]
expected_positions
=
torch
.
as_tensor
([
expected_single_positions
,
expected_single_positions
])
position_ids
=
embeddings
.
create_position_ids_from_inputs_embeds
(
inputs_embeds
)
self
.
assertEqual
(
position_ids
.
shape
,
expected_positions
.
shape
)
self
.
assertTrue
(
torch
.
all
(
torch
.
eq
(
position_ids
,
expected_positions
))
)
class
RobertaModelIntegrationTest
(
unittest
.
TestCase
):
class
RobertaModelIntegrationTest
(
unittest
.
TestCase
):
...
...
transformers/tests/modeling_t5_test.py
0 → 100644
View file @
1ab25c49
# coding=utf-8
# Copyright 2018 Google T5 Authors and HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
unittest
from
transformers
import
is_torch_available
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
,
floats_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
if
is_torch_available
():
from
transformers
import
(
T5Config
,
T5Model
,
T5WithLMHeadModel
)
from
transformers.modeling_t5
import
T5_PRETRAINED_MODEL_ARCHIVE_MAP
@
require_torch
class
T5ModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
T5Model
,
T5WithLMHeadModel
)
if
is_torch_available
()
else
()
test_pruning
=
False
test_torchscript
=
False
test_resize_embeddings
=
False
is_encoder_decoder
=
True
class
T5ModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
encoder_seq_length
=
7
,
decoder_seq_length
=
9
,
is_training
=
True
,
use_attention_mask
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
n_positions
=
14
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
d_ff
=
37
,
relative_attention_num_buckets
=
8
,
dropout_rate
=
0.1
,
initializer_factor
=
0.002
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
encoder_seq_length
=
encoder_seq_length
self
.
decoder_seq_length
=
decoder_seq_length
self
.
is_training
=
is_training
self
.
use_attention_mask
=
use_attention_mask
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
n_positions
=
n_positions
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
d_ff
=
d_ff
self
.
relative_attention_num_buckets
=
relative_attention_num_buckets
self
.
dropout_rate
=
dropout_rate
self
.
initializer_factor
=
initializer_factor
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
encoder_input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
encoder_seq_length
],
self
.
vocab_size
)
decoder_input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
decoder_seq_length
],
self
.
vocab_size
)
encoder_attention_mask
=
None
decoder_attention_mask
=
None
if
self
.
use_attention_mask
:
encoder_attention_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
encoder_seq_length
],
vocab_size
=
2
)
decoder_attention_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
decoder_seq_length
],
vocab_size
=
2
)
decoder_lm_labels
=
None
if
self
.
use_labels
:
decoder_lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
decoder_seq_length
],
self
.
vocab_size
)
config
=
T5Config
(
vocab_size
=
self
.
vocab_size
,
n_positions
=
self
.
n_positions
,
d_model
=
self
.
hidden_size
,
d_ff
=
self
.
d_ff
,
d_kv
=
self
.
hidden_size
//
self
.
num_attention_heads
,
num_layers
=
self
.
num_hidden_layers
,
num_heads
=
self
.
num_attention_heads
,
relative_attention_num_buckets
=
self
.
relative_attention_num_buckets
,
dropout_rate
=
self
.
dropout_rate
,
initializer_factor
=
self
.
initializer_factor
)
return
(
config
,
encoder_input_ids
,
decoder_input_ids
,
encoder_attention_mask
,
decoder_attention_mask
,
decoder_lm_labels
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_t5_model
(
self
,
config
,
encoder_input_ids
,
decoder_input_ids
,
encoder_attention_mask
,
decoder_attention_mask
,
decoder_lm_labels
):
model
=
T5Model
(
config
=
config
)
model
.
eval
()
decoder_output
,
encoder_output
=
model
(
encoder_input_ids
=
encoder_input_ids
,
decoder_input_ids
=
decoder_input_ids
,
encoder_attention_mask
=
encoder_attention_mask
,
decoder_attention_mask
=
decoder_attention_mask
)
decoder_output
,
encoder_output
=
model
(
encoder_input_ids
=
encoder_input_ids
,
decoder_input_ids
=
decoder_input_ids
)
result
=
{
"encoder_output"
:
encoder_output
,
"decoder_output"
:
decoder_output
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"encoder_output"
].
size
()),
[
self
.
batch_size
,
self
.
encoder_seq_length
,
self
.
hidden_size
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"decoder_output"
].
size
()),
[
self
.
batch_size
,
self
.
decoder_seq_length
,
self
.
hidden_size
])
def
create_and_check_t5_with_lm_head
(
self
,
config
,
encoder_input_ids
,
decoder_input_ids
,
encoder_attention_mask
,
decoder_attention_mask
,
decoder_lm_labels
):
model
=
T5WithLMHeadModel
(
config
=
config
)
model
.
eval
()
outputs
=
model
(
encoder_input_ids
=
encoder_input_ids
,
decoder_input_ids
=
decoder_input_ids
,
decoder_attention_mask
=
decoder_attention_mask
,
decoder_lm_labels
=
decoder_lm_labels
)
loss
,
prediction_scores
=
outputs
[
0
],
outputs
[
1
]
result
=
{
"loss"
:
loss
,
"prediction_scores"
:
prediction_scores
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
size
()),
[
self
.
batch_size
,
self
.
decoder_seq_length
,
self
.
vocab_size
])
self
.
check_loss_output
(
result
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
encoder_input_ids
,
decoder_input_ids
,
encoder_attention_mask
,
decoder_attention_mask
,
decoder_lm_labels
)
=
config_and_inputs
inputs_dict
=
{
'encoder_input_ids'
:
encoder_input_ids
,
'decoder_input_ids'
:
decoder_input_ids
,
'decoder_attention_mask'
:
decoder_attention_mask
,
'encoder_attention_mask'
:
encoder_attention_mask
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
T5ModelTest
.
T5ModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
T5Config
,
d_model
=
37
)
def
test_config
(
self
):
self
.
config_tester
.
run_common_tests
()
def
test_t5_model
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_t5_model
(
*
config_and_inputs
)
def
test_with_lm_head
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_t5_with_lm_head
(
*
config_and_inputs
)
@
slow
def
test_model_from_pretrained
(
self
):
for
model_name
in
list
(
T5_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
T5Model
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
transformers/tests/modeling_tf_albert_test.py
View file @
1ab25c49
...
@@ -17,12 +17,11 @@ from __future__ import division
...
@@ -17,12 +17,11 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
import
unittest
import
shutil
import
sys
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
AlbertConfig
,
is_tf_available
from
transformers
import
AlbertConfig
,
is_tf_available
...
@@ -118,7 +117,7 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -118,7 +117,7 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
AlbertConfig
(
config
=
AlbertConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
num_attention_heads
=
self
.
num_attention_heads
,
...
@@ -217,12 +216,8 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -217,12 +216,8 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
# for model_name in list(TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model
=
TFAlbertModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
for
model_name
in
[
'albert-base-uncased'
]:
model
=
TFAlbertModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
...
...
transformers/tests/modeling_tf_auto_test.py
View file @
1ab25c49
...
@@ -22,7 +22,7 @@ import logging
...
@@ -22,7 +22,7 @@ import logging
from
transformers
import
is_tf_available
from
transformers
import
is_tf_available
from
.utils
import
require_tf
,
slow
from
.utils
import
require_tf
,
slow
,
SMALL_MODEL_IDENTIFIER
if
is_tf_available
():
if
is_tf_available
():
from
transformers
import
(
AutoConfig
,
BertConfig
,
from
transformers
import
(
AutoConfig
,
BertConfig
,
...
@@ -46,11 +46,11 @@ class TFAutoModelTest(unittest.TestCase):
...
@@ -46,11 +46,11 @@ class TFAutoModelTest(unittest.TestCase):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logging
.
basicConfig
(
level
=
logging
.
INFO
)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for
model_name
in
[
'bert-base-uncased'
]:
for
model_name
in
[
'bert-base-uncased'
]:
config
=
AutoConfig
.
from_pretrained
(
model_name
,
force_download
=
True
)
config
=
AutoConfig
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
config
)
self
.
assertIsNotNone
(
config
)
self
.
assertIsInstance
(
config
,
BertConfig
)
self
.
assertIsInstance
(
config
,
BertConfig
)
model
=
TFAutoModel
.
from_pretrained
(
model_name
,
force_download
=
True
)
model
=
TFAutoModel
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsInstance
(
model
,
TFBertModel
)
self
.
assertIsInstance
(
model
,
TFBertModel
)
...
@@ -59,11 +59,11 @@ class TFAutoModelTest(unittest.TestCase):
...
@@ -59,11 +59,11 @@ class TFAutoModelTest(unittest.TestCase):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logging
.
basicConfig
(
level
=
logging
.
INFO
)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for
model_name
in
[
'bert-base-uncased'
]:
for
model_name
in
[
'bert-base-uncased'
]:
config
=
AutoConfig
.
from_pretrained
(
model_name
,
force_download
=
True
)
config
=
AutoConfig
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
config
)
self
.
assertIsNotNone
(
config
)
self
.
assertIsInstance
(
config
,
BertConfig
)
self
.
assertIsInstance
(
config
,
BertConfig
)
model
=
TFAutoModelWithLMHead
.
from_pretrained
(
model_name
,
force_download
=
True
)
model
=
TFAutoModelWithLMHead
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsInstance
(
model
,
TFBertForMaskedLM
)
self
.
assertIsInstance
(
model
,
TFBertForMaskedLM
)
...
@@ -72,11 +72,11 @@ class TFAutoModelTest(unittest.TestCase):
...
@@ -72,11 +72,11 @@ class TFAutoModelTest(unittest.TestCase):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logging
.
basicConfig
(
level
=
logging
.
INFO
)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for
model_name
in
[
'bert-base-uncased'
]:
for
model_name
in
[
'bert-base-uncased'
]:
config
=
AutoConfig
.
from_pretrained
(
model_name
,
force_download
=
True
)
config
=
AutoConfig
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
config
)
self
.
assertIsNotNone
(
config
)
self
.
assertIsInstance
(
config
,
BertConfig
)
self
.
assertIsInstance
(
config
,
BertConfig
)
model
=
TFAutoModelForSequenceClassification
.
from_pretrained
(
model_name
,
force_download
=
True
)
model
=
TFAutoModelForSequenceClassification
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsInstance
(
model
,
TFBertForSequenceClassification
)
self
.
assertIsInstance
(
model
,
TFBertForSequenceClassification
)
...
@@ -85,14 +85,19 @@ class TFAutoModelTest(unittest.TestCase):
...
@@ -85,14 +85,19 @@ class TFAutoModelTest(unittest.TestCase):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logging
.
basicConfig
(
level
=
logging
.
INFO
)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for
model_name
in
[
'bert-base-uncased'
]:
for
model_name
in
[
'bert-base-uncased'
]:
config
=
AutoConfig
.
from_pretrained
(
model_name
,
force_download
=
True
)
config
=
AutoConfig
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
config
)
self
.
assertIsNotNone
(
config
)
self
.
assertIsInstance
(
config
,
BertConfig
)
self
.
assertIsInstance
(
config
,
BertConfig
)
model
=
TFAutoModelForQuestionAnswering
.
from_pretrained
(
model_name
,
force_download
=
True
)
model
=
TFAutoModelForQuestionAnswering
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsInstance
(
model
,
TFBertForQuestionAnswering
)
self
.
assertIsInstance
(
model
,
TFBertForQuestionAnswering
)
def
test_from_pretrained_identifier
(
self
):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
model
=
TFAutoModelWithLMHead
.
from_pretrained
(
SMALL_MODEL_IDENTIFIER
)
self
.
assertIsInstance
(
model
,
TFBertForMaskedLM
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
transformers/tests/modeling_tf_bert_test.py
View file @
1ab25c49
...
@@ -17,12 +17,11 @@ from __future__ import division
...
@@ -17,12 +17,11 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
import
unittest
import
shutil
import
sys
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
BertConfig
,
is_tf_available
from
transformers
import
BertConfig
,
is_tf_available
...
@@ -114,7 +113,7 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -114,7 +113,7 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
BertConfig
(
config
=
BertConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
num_attention_heads
=
self
.
num_attention_heads
,
...
@@ -310,11 +309,9 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -310,11 +309,9 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for
model_name
in
[
'bert-base-uncased'
]:
for
model_name
in
[
'bert-base-uncased'
]:
model
=
TFBertModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
model
=
TFBertModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
transformers/tests/modeling_tf_common_test.py
View file @
1ab25c49
...
@@ -69,6 +69,7 @@ class TFCommonTestCases:
...
@@ -69,6 +69,7 @@ class TFCommonTestCases:
test_torchscript
=
True
test_torchscript
=
True
test_pruning
=
True
test_pruning
=
True
test_resize_embeddings
=
True
test_resize_embeddings
=
True
is_encoder_decoder
=
False
def
test_initialization
(
self
):
def
test_initialization
(
self
):
pass
pass
...
@@ -129,8 +130,12 @@ class TFCommonTestCases:
...
@@ -129,8 +130,12 @@ class TFCommonTestCases:
for
name
,
key
in
inputs_dict
.
items
())
for
name
,
key
in
inputs_dict
.
items
())
with
torch
.
no_grad
():
with
torch
.
no_grad
():
pto
=
pt_model
(
**
pt_inputs_dict
)
pto
=
pt_model
(
**
pt_inputs_dict
)
tfo
=
tf_model
(
inputs_dict
)
tfo
=
tf_model
(
inputs_dict
,
training
=
False
)
max_diff
=
np
.
amax
(
np
.
abs
(
tfo
[
0
].
numpy
()
-
pto
[
0
].
numpy
()))
tf_hidden_states
=
tfo
[
0
].
numpy
()
pt_hidden_states
=
pto
[
0
].
numpy
()
tf_hidden_states
[
np
.
isnan
(
tf_hidden_states
)]
=
0
pt_hidden_states
[
np
.
isnan
(
pt_hidden_states
)]
=
0
max_diff
=
np
.
amax
(
np
.
abs
(
tf_hidden_states
-
pt_hidden_states
))
self
.
assertLessEqual
(
max_diff
,
2e-2
)
self
.
assertLessEqual
(
max_diff
,
2e-2
)
# Check we can load pt model in tf and vice-versa with checkpoint => model functions
# Check we can load pt model in tf and vice-versa with checkpoint => model functions
...
@@ -150,13 +155,21 @@ class TFCommonTestCases:
...
@@ -150,13 +155,21 @@ class TFCommonTestCases:
with
torch
.
no_grad
():
with
torch
.
no_grad
():
pto
=
pt_model
(
**
pt_inputs_dict
)
pto
=
pt_model
(
**
pt_inputs_dict
)
tfo
=
tf_model
(
inputs_dict
)
tfo
=
tf_model
(
inputs_dict
)
max_diff
=
np
.
amax
(
np
.
abs
(
tfo
[
0
].
numpy
()
-
pto
[
0
].
numpy
()))
tfo
=
tfo
[
0
].
numpy
()
pto
=
pto
[
0
].
numpy
()
tfo
[
np
.
isnan
(
tfo
)]
=
0
pto
[
np
.
isnan
(
pto
)]
=
0
max_diff
=
np
.
amax
(
np
.
abs
(
tfo
-
pto
))
self
.
assertLessEqual
(
max_diff
,
2e-2
)
self
.
assertLessEqual
(
max_diff
,
2e-2
)
def
test_compile_tf_model
(
self
):
def
test_compile_tf_model
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
input_ids
=
tf
.
keras
.
Input
(
batch_shape
=
(
2
,
2000
),
name
=
'input_ids'
,
dtype
=
'int32'
)
if
self
.
is_encoder_decoder
:
input_ids
=
{
'decoder_input_ids'
:
tf
.
keras
.
Input
(
batch_shape
=
(
2
,
2000
),
name
=
'decoder_input_ids'
,
dtype
=
'int32'
),
'encoder_input_ids'
:
tf
.
keras
.
Input
(
batch_shape
=
(
2
,
2000
),
name
=
'encoder_input_ids'
,
dtype
=
'int32'
)}
else
:
input_ids
=
tf
.
keras
.
Input
(
batch_shape
=
(
2
,
2000
),
name
=
'input_ids'
,
dtype
=
'int32'
)
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
3e-5
,
epsilon
=
1e-08
,
clipnorm
=
1.0
)
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
3e-5
,
epsilon
=
1e-08
,
clipnorm
=
1.0
)
loss
=
tf
.
keras
.
losses
.
SparseCategoricalCrossentropy
(
from_logits
=
True
)
loss
=
tf
.
keras
.
losses
.
SparseCategoricalCrossentropy
(
from_logits
=
True
)
metric
=
tf
.
keras
.
metrics
.
SparseCategoricalAccuracy
(
'accuracy'
)
metric
=
tf
.
keras
.
metrics
.
SparseCategoricalAccuracy
(
'accuracy'
)
...
@@ -189,7 +202,7 @@ class TFCommonTestCases:
...
@@ -189,7 +202,7 @@ class TFCommonTestCases:
outputs_dict
=
model
(
inputs_dict
)
outputs_dict
=
model
(
inputs_dict
)
inputs_keywords
=
copy
.
deepcopy
(
inputs_dict
)
inputs_keywords
=
copy
.
deepcopy
(
inputs_dict
)
input_ids
=
inputs_keywords
.
pop
(
'input_ids'
)
input_ids
=
inputs_keywords
.
pop
(
'input_ids'
if
not
self
.
is_encoder_decoder
else
'decoder_input_ids'
,
None
)
outputs_keywords
=
model
(
input_ids
,
**
inputs_keywords
)
outputs_keywords
=
model
(
input_ids
,
**
inputs_keywords
)
output_dict
=
outputs_dict
[
0
].
numpy
()
output_dict
=
outputs_dict
[
0
].
numpy
()
...
@@ -200,6 +213,11 @@ class TFCommonTestCases:
...
@@ -200,6 +213,11 @@ class TFCommonTestCases:
def
test_attention_outputs
(
self
):
def
test_attention_outputs
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
decoder_seq_length
=
self
.
model_tester
.
decoder_seq_length
if
hasattr
(
self
.
model_tester
,
'decoder_seq_length'
)
else
self
.
model_tester
.
seq_length
encoder_seq_length
=
self
.
model_tester
.
encoder_seq_length
if
hasattr
(
self
.
model_tester
,
'encoder_seq_length'
)
else
self
.
model_tester
.
seq_length
decoder_key_length
=
self
.
model_tester
.
key_length
if
hasattr
(
self
.
model_tester
,
'key_length'
)
else
decoder_seq_length
encoder_key_length
=
self
.
model_tester
.
key_length
if
hasattr
(
self
.
model_tester
,
'key_length'
)
else
encoder_seq_length
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
config
.
output_attentions
=
True
config
.
output_attentions
=
True
config
.
output_hidden_states
=
False
config
.
output_hidden_states
=
False
...
@@ -212,16 +230,28 @@ class TFCommonTestCases:
...
@@ -212,16 +230,28 @@ class TFCommonTestCases:
self
.
assertListEqual
(
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
list
(
attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
[
self
.
model_tester
.
num_attention_heads
,
self
.
model_tester
.
seq_length
,
encoder_
seq_length
,
self
.
model_tester
.
key_len
if
hasattr
(
self
.
model_tester
,
'key_len'
)
else
self
.
model_tester
.
seq
_length
])
encoder_key
_length
])
out_len
=
len
(
outputs
)
out_len
=
len
(
outputs
)
if
self
.
is_encoder_decoder
:
self
.
assertEqual
(
out_len
%
2
,
0
)
decoder_attentions
=
outputs
[(
out_len
//
2
)
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
self
.
assertEqual
(
len
(
decoder_attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertListEqual
(
list
(
decoder_attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
decoder_seq_length
,
decoder_key_length
])
# Check attention is always last and order is fine
# Check attention is always last and order is fine
config
.
output_attentions
=
True
config
.
output_attentions
=
True
config
.
output_hidden_states
=
True
config
.
output_hidden_states
=
True
model
=
model_class
(
config
)
model
=
model_class
(
config
)
outputs
=
model
(
inputs_dict
)
outputs
=
model
(
inputs_dict
)
self
.
assertEqual
(
out_len
+
1
,
len
(
outputs
))
self
.
assertEqual
(
out_len
+
(
2
if
self
.
is_encoder_decoder
else
1
)
,
len
(
outputs
))
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
...
@@ -230,8 +260,8 @@ class TFCommonTestCases:
...
@@ -230,8 +260,8 @@ class TFCommonTestCases:
self
.
assertListEqual
(
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
list
(
attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
[
self
.
model_tester
.
num_attention_heads
,
self
.
model_tester
.
seq_length
,
encoder_
seq_length
,
self
.
model_tester
.
key_len
if
hasattr
(
self
.
model_tester
,
'key_len'
)
else
self
.
model_tester
.
seq
_length
])
encoder_key
_length
])
def
test_hidden_states_output
(
self
):
def
test_hidden_states_output
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
@@ -264,35 +294,53 @@ class TFCommonTestCases:
...
@@ -264,35 +294,53 @@ class TFCommonTestCases:
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
first
,
second
=
model
(
inputs_dict
,
training
=
False
)[
0
],
model
(
inputs_dict
,
training
=
False
)[
0
]
first
,
second
=
model
(
inputs_dict
,
training
=
False
)[
0
],
model
(
inputs_dict
,
training
=
False
)[
0
]
self
.
assertTrue
(
tf
.
math
.
equal
(
first
,
second
).
numpy
().
all
())
out_1
=
first
.
numpy
()
out_2
=
second
.
numpy
()
out_1
=
out_1
[
~
np
.
isnan
(
out_1
)]
out_2
=
out_2
[
~
np
.
isnan
(
out_2
)]
max_diff
=
np
.
amax
(
np
.
abs
(
out_1
-
out_2
))
self
.
assertLessEqual
(
max_diff
,
1e-5
)
def
_get_embeds
(
self
,
wte
,
input_ids
):
# ^^ In our TF models, the input_embeddings can take slightly different forms,
# so we try a few of them.
# We used to fall back to just synthetically creating a dummy tensor of ones:
try
:
x
=
wte
(
input_ids
,
mode
=
"embedding"
)
except
:
try
:
x
=
wte
([
input_ids
],
mode
=
"embedding"
)
except
:
try
:
x
=
wte
([
input_ids
,
None
,
None
,
None
],
mode
=
"embedding"
)
except
:
if
hasattr
(
self
.
model_tester
,
"embedding_size"
):
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
embedding_size
],
dtype
=
tf
.
dtypes
.
float32
)
else
:
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
hidden_size
],
dtype
=
tf
.
dtypes
.
float32
)
return
x
def
test_inputs_embeds
(
self
):
def
test_inputs_embeds
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
input_ids
=
inputs_dict
[
"input_ids"
]
if
not
self
.
is_encoder_decoder
:
del
inputs_dict
[
"input_ids"
]
input_ids
=
inputs_dict
[
"input_ids"
]
del
inputs_dict
[
"input_ids"
]
else
:
encoder_input_ids
=
inputs_dict
[
"encoder_input_ids"
]
decoder_input_ids
=
inputs_dict
[
"decoder_input_ids"
]
del
inputs_dict
[
"encoder_input_ids"
]
del
inputs_dict
[
"decoder_input_ids"
]
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
wte
=
model
.
get_input_embeddings
()
wte
=
model
.
get_input_embeddings
()
try
:
if
not
self
.
is_encoder_decoder
:
x
=
wte
(
input_ids
,
mode
=
"embedding"
)
inputs_dict
[
"inputs_embeds"
]
=
self
.
_get_embeds
(
wte
,
input_ids
)
except
:
else
:
try
:
inputs_dict
[
"encoder_inputs_embeds"
]
=
self
.
_get_embeds
(
wte
,
encoder_input_ids
)
x
=
wte
([
input_ids
],
mode
=
"embedding"
)
inputs_dict
[
"decoder_inputs_embeds"
]
=
self
.
_get_embeds
(
wte
,
decoder_input_ids
)
except
:
try
:
x
=
wte
([
input_ids
,
None
,
None
,
None
],
mode
=
"embedding"
)
except
:
if
hasattr
(
self
.
model_tester
,
"embedding_size"
):
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
embedding_size
],
dtype
=
tf
.
dtypes
.
float32
)
else
:
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
hidden_size
],
dtype
=
tf
.
dtypes
.
float32
)
# ^^ In our TF models, the input_embeddings can take slightly different forms,
# so we try a few of them.
# We used to fall back to just synthetically creating a dummy tensor of ones:
#
inputs_dict
[
"inputs_embeds"
]
=
x
outputs
=
model
(
inputs_dict
)
outputs
=
model
(
inputs_dict
)
...
...
transformers/tests/modeling_tf_ctrl_test.py
View file @
1ab25c49
...
@@ -17,12 +17,11 @@ from __future__ import division
...
@@ -17,12 +17,11 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
import
unittest
import
shutil
import
sys
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
CTRLConfig
,
is_tf_available
from
transformers
import
CTRLConfig
,
is_tf_available
...
@@ -112,7 +111,7 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -112,7 +111,7 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
CTRLConfig
(
config
=
CTRLConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
n_head
=
self
.
num_attention_heads
,
...
@@ -189,10 +188,8 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -189,10 +188,8 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFCTRLModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
model
=
TFCTRLModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
transformers/tests/modeling_tf_distilbert_test.py
View file @
1ab25c49
...
@@ -20,7 +20,7 @@ import unittest
...
@@ -20,7 +20,7 @@ import unittest
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
DistilBertConfig
,
is_tf_available
from
transformers
import
DistilBertConfig
,
is_tf_available
...
@@ -107,7 +107,7 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -107,7 +107,7 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
DistilBertConfig
(
config
=
DistilBertConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
dim
=
self
.
hidden_size
,
dim
=
self
.
hidden_size
,
n_layers
=
self
.
num_hidden_layers
,
n_layers
=
self
.
num_hidden_layers
,
n_heads
=
self
.
num_attention_heads
,
n_heads
=
self
.
num_attention_heads
,
...
@@ -211,10 +211,8 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -211,10 +211,8 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
# @slow
# @slow
# def test_model_from_pretrained(self):
# def test_model_from_pretrained(self):
# cache_dir = "/tmp/transformers_test/"
# for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# model = DistilBertModel.from_pretrained(model_name, cache_dir=cache_dir)
# model = DistilBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
# shutil.rmtree(cache_dir)
# self.assertIsNotNone(model)
# self.assertIsNotNone(model)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
transformers/tests/modeling_tf_gpt2_test.py
View file @
1ab25c49
...
@@ -17,12 +17,11 @@ from __future__ import division
...
@@ -17,12 +17,11 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
import
unittest
import
shutil
import
sys
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
GPT2Config
,
is_tf_available
from
transformers
import
GPT2Config
,
is_tf_available
...
@@ -115,7 +114,7 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -115,7 +114,7 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
GPT2Config
(
config
=
GPT2Config
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
n_head
=
self
.
num_attention_heads
,
...
@@ -220,10 +219,8 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -220,10 +219,8 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFGPT2Model
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
model
=
TFGPT2Model
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
transformers/tests/modeling_tf_openai_gpt_test.py
View file @
1ab25c49
...
@@ -17,12 +17,11 @@ from __future__ import division
...
@@ -17,12 +17,11 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
import
unittest
import
shutil
import
sys
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
OpenAIGPTConfig
,
is_tf_available
from
transformers
import
OpenAIGPTConfig
,
is_tf_available
...
@@ -114,7 +113,7 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -114,7 +113,7 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
OpenAIGPTConfig
(
config
=
OpenAIGPTConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
n_head
=
self
.
num_attention_heads
,
...
@@ -219,10 +218,8 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -219,10 +218,8 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFOpenAIGPTModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
model
=
TFOpenAIGPTModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
transformers/tests/modeling_tf_roberta_test.py
View file @
1ab25c49
...
@@ -17,11 +17,10 @@ from __future__ import division
...
@@ -17,11 +17,10 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
import
unittest
import
shutil
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
RobertaConfig
,
is_tf_available
from
transformers
import
RobertaConfig
,
is_tf_available
...
@@ -109,7 +108,7 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -109,7 +108,7 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
RobertaConfig
(
config
=
RobertaConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
num_attention_heads
=
self
.
num_attention_heads
,
...
@@ -192,10 +191,8 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -192,10 +191,8 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFRobertaModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
model
=
TFRobertaModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
...
...
transformers/tests/modeling_tf_t5_test.py
0 → 100644
View file @
1ab25c49
# coding=utf-8
# Copyright 2018 Google T5 Authors and HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
unittest
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
T5Config
,
is_tf_available
if
is_tf_available
():
import
tensorflow
as
tf
from
transformers.modeling_tf_t5
import
(
TFT5Model
,
TFT5WithLMHeadModel
,
TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP
)
@
require_tf
class
TFT5ModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
is_encoder_decoder
=
True
all_model_classes
=
(
TFT5Model
,
TFT5WithLMHeadModel
)
if
is_tf_available
()
else
()
class
TFT5ModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_mask
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
n_positions
=
14
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
d_ff
=
37
,
relative_attention_num_buckets
=
8
,
dropout_rate
=
0.1
,
initializer_factor
=
0.002
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_input_mask
=
use_input_mask
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
n_positions
=
n_positions
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
d_ff
=
d_ff
self
.
relative_attention_num_buckets
=
relative_attention_num_buckets
self
.
dropout_rate
=
dropout_rate
self
.
initializer_factor
=
initializer_factor
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_labels
=
None
if
self
.
use_labels
:
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
config
=
T5Config
(
vocab_size
=
self
.
vocab_size
,
n_positions
=
self
.
n_positions
,
d_model
=
self
.
hidden_size
,
d_ff
=
self
.
d_ff
,
d_kv
=
self
.
hidden_size
//
self
.
num_attention_heads
,
num_layers
=
self
.
num_hidden_layers
,
num_heads
=
self
.
num_attention_heads
,
relative_attention_num_buckets
=
self
.
relative_attention_num_buckets
,
dropout_rate
=
self
.
dropout_rate
,
initializer_factor
=
self
.
initializer_factor
)
return
(
config
,
input_ids
,
input_mask
,
token_labels
)
def
create_and_check_t5_model
(
self
,
config
,
input_ids
,
input_mask
,
token_labels
):
model
=
TFT5Model
(
config
=
config
)
inputs
=
{
'encoder_input_ids'
:
input_ids
,
'decoder_input_ids'
:
input_ids
,
'decoder_attention_mask'
:
input_mask
}
encoder_output
,
decoder_output
=
model
(
inputs
)
encoder_output
,
decoder_output
=
model
(
input_ids
,
decoder_attention_mask
=
input_mask
,
encoder_input_ids
=
input_ids
)
result
=
{
"encoder_output"
:
encoder_output
.
numpy
(),
"decoder_output"
:
decoder_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"encoder_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"decoder_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
])
def
create_and_check_t5_with_lm_head
(
self
,
config
,
input_ids
,
input_mask
,
token_labels
):
model
=
TFT5WithLMHeadModel
(
config
=
config
)
inputs
=
{
'encoder_input_ids'
:
input_ids
,
'decoder_input_ids'
:
input_ids
,
'decoder_attention_mask'
:
input_mask
}
prediction_scores
,
decoder_output
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
token_labels
)
=
config_and_inputs
inputs_dict
=
{
'encoder_input_ids'
:
input_ids
,
'decoder_input_ids'
:
input_ids
,
'decoder_attention_mask'
:
input_mask
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
TFT5ModelTest
.
TFT5ModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
T5Config
,
d_model
=
37
)
def
test_config
(
self
):
self
.
config_tester
.
run_common_tests
()
def
test_t5_model
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_t5_model
(
*
config_and_inputs
)
def
test_with_lm_head
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_t5_with_lm_head
(
*
config_and_inputs
)
@
slow
def
test_model_from_pretrained
(
self
):
for
model_name
in
[
't5-small'
]:
model
=
TFT5Model
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
transformers/tests/modeling_tf_transfo_xl_test.py
View file @
1ab25c49
...
@@ -18,11 +18,10 @@ from __future__ import print_function
...
@@ -18,11 +18,10 @@ from __future__ import print_function
import
unittest
import
unittest
import
random
import
random
import
shutil
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
TransfoXLConfig
,
is_tf_available
from
transformers
import
TransfoXLConfig
,
is_tf_available
...
@@ -67,7 +66,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -67,7 +66,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
self
.
batch_size
=
batch_size
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
seq_length
=
seq_length
self
.
mem_len
=
mem_len
self
.
mem_len
=
mem_len
self
.
key_len
=
seq_length
+
mem_len
self
.
key_len
gth
=
seq_length
+
mem_len
self
.
clamp_len
=
clamp_len
self
.
clamp_len
=
clamp_len
self
.
is_training
=
is_training
self
.
is_training
=
is_training
self
.
use_labels
=
use_labels
self
.
use_labels
=
use_labels
...
@@ -92,7 +91,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -92,7 +91,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
config
=
TransfoXLConfig
(
config
=
TransfoXLConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
mem_len
=
self
.
mem_len
,
mem_len
=
self
.
mem_len
,
clamp_len
=
self
.
clamp_len
,
clamp_len
=
self
.
clamp_len
,
cutoffs
=
self
.
cutoffs
,
cutoffs
=
self
.
cutoffs
,
...
@@ -205,10 +204,8 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -205,10 +204,8 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFTransfoXLModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
model
=
TFTransfoXLModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
...
...
transformers/tests/modeling_tf_xlm_test.py
View file @
1ab25c49
...
@@ -17,7 +17,6 @@ from __future__ import division
...
@@ -17,7 +17,6 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
import
unittest
import
shutil
from
transformers
import
is_tf_available
from
transformers
import
is_tf_available
...
@@ -31,7 +30,7 @@ if is_tf_available():
...
@@ -31,7 +30,7 @@ if is_tf_available():
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
@
require_tf
@
require_tf
...
@@ -125,7 +124,7 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -125,7 +124,7 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
is_impossible_labels
=
ids_tensor
([
self
.
batch_size
],
2
,
dtype
=
tf
.
float32
)
is_impossible_labels
=
ids_tensor
([
self
.
batch_size
],
2
,
dtype
=
tf
.
float32
)
config
=
XLMConfig
(
config
=
XLMConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
n_special
=
self
.
n_special
,
n_special
=
self
.
n_special
,
emb_dim
=
self
.
hidden_size
,
emb_dim
=
self
.
hidden_size
,
n_layers
=
self
.
num_hidden_layers
,
n_layers
=
self
.
num_hidden_layers
,
...
@@ -252,10 +251,8 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -252,10 +251,8 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
XLMModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
model
=
TFXLMModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
...
...
transformers/tests/modeling_tf_xlnet_test.py
View file @
1ab25c49
...
@@ -20,7 +20,6 @@ import os
...
@@ -20,7 +20,6 @@ import os
import
unittest
import
unittest
import
json
import
json
import
random
import
random
import
shutil
from
transformers
import
XLNetConfig
,
is_tf_available
from
transformers
import
XLNetConfig
,
is_tf_available
...
@@ -35,7 +34,7 @@ if is_tf_available():
...
@@ -35,7 +34,7 @@ if is_tf_available():
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
@
require_tf
@
require_tf
...
@@ -64,7 +63,6 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -64,7 +63,6 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
num_attention_heads
=
4
,
num_attention_heads
=
4
,
d_inner
=
128
,
d_inner
=
128
,
num_hidden_layers
=
5
,
num_hidden_layers
=
5
,
max_position_embeddings
=
10
,
type_sequence_label_size
=
2
,
type_sequence_label_size
=
2
,
untie_r
=
True
,
untie_r
=
True
,
bi_data
=
False
,
bi_data
=
False
,
...
@@ -88,7 +86,6 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -88,7 +86,6 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
self
.
num_attention_heads
=
num_attention_heads
self
.
num_attention_heads
=
num_attention_heads
self
.
d_inner
=
d_inner
self
.
d_inner
=
d_inner
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_hidden_layers
=
num_hidden_layers
self
.
max_position_embeddings
=
max_position_embeddings
self
.
bi_data
=
bi_data
self
.
bi_data
=
bi_data
self
.
untie_r
=
untie_r
self
.
untie_r
=
untie_r
self
.
same_length
=
same_length
self
.
same_length
=
same_length
...
@@ -122,13 +119,12 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -122,13 +119,12 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
is_impossible_labels
=
ids_tensor
([
self
.
batch_size
],
2
,
dtype
=
tf
.
float32
)
is_impossible_labels
=
ids_tensor
([
self
.
batch_size
],
2
,
dtype
=
tf
.
float32
)
config
=
XLNetConfig
(
config
=
XLNetConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
d_model
=
self
.
hidden_size
,
d_model
=
self
.
hidden_size
,
n_head
=
self
.
num_attention_heads
,
n_head
=
self
.
num_attention_heads
,
d_inner
=
self
.
d_inner
,
d_inner
=
self
.
d_inner
,
n_layer
=
self
.
num_hidden_layers
,
n_layer
=
self
.
num_hidden_layers
,
untie_r
=
self
.
untie_r
,
untie_r
=
self
.
untie_r
,
max_position_embeddings
=
self
.
max_position_embeddings
,
mem_len
=
self
.
mem_len
,
mem_len
=
self
.
mem_len
,
clamp_len
=
self
.
clamp_len
,
clamp_len
=
self
.
clamp_len
,
same_length
=
self
.
same_length
,
same_length
=
self
.
same_length
,
...
@@ -322,10 +318,8 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
...
@@ -322,10 +318,8 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
@
slow
def
test_model_from_pretrained
(
self
):
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
for
model_name
in
list
(
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFXLNetModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
model
=
TFXLNetModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
shutil
.
rmtree
(
cache_dir
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment