Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
1ab25c49
Commit
1ab25c49
authored
Dec 21, 2019
by
thomwolf
Browse files
Merge branch 'master' into pr/2115
parents
df396112
18601c3b
Changes
143
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
653 additions
and
205 deletions
+653
-205
transformers/tests/modeling_common_test.py
transformers/tests/modeling_common_test.py
+110
-70
transformers/tests/modeling_ctrl_test.py
transformers/tests/modeling_ctrl_test.py
+3
-6
transformers/tests/modeling_distilbert_test.py
transformers/tests/modeling_distilbert_test.py
+3
-5
transformers/tests/modeling_gpt2_test.py
transformers/tests/modeling_gpt2_test.py
+3
-6
transformers/tests/modeling_openai_test.py
transformers/tests/modeling_openai_test.py
+3
-6
transformers/tests/modeling_roberta_test.py
transformers/tests/modeling_roberta_test.py
+55
-6
transformers/tests/modeling_t5_test.py
transformers/tests/modeling_t5_test.py
+182
-0
transformers/tests/modeling_tf_albert_test.py
transformers/tests/modeling_tf_albert_test.py
+4
-9
transformers/tests/modeling_tf_auto_test.py
transformers/tests/modeling_tf_auto_test.py
+14
-9
transformers/tests/modeling_tf_bert_test.py
transformers/tests/modeling_tf_bert_test.py
+3
-6
transformers/tests/modeling_tf_common_test.py
transformers/tests/modeling_tf_common_test.py
+79
-31
transformers/tests/modeling_tf_ctrl_test.py
transformers/tests/modeling_tf_ctrl_test.py
+3
-6
transformers/tests/modeling_tf_distilbert_test.py
transformers/tests/modeling_tf_distilbert_test.py
+3
-5
transformers/tests/modeling_tf_gpt2_test.py
transformers/tests/modeling_tf_gpt2_test.py
+3
-6
transformers/tests/modeling_tf_openai_gpt_test.py
transformers/tests/modeling_tf_openai_gpt_test.py
+3
-6
transformers/tests/modeling_tf_roberta_test.py
transformers/tests/modeling_tf_roberta_test.py
+3
-6
transformers/tests/modeling_tf_t5_test.py
transformers/tests/modeling_tf_t5_test.py
+169
-0
transformers/tests/modeling_tf_transfo_xl_test.py
transformers/tests/modeling_tf_transfo_xl_test.py
+4
-7
transformers/tests/modeling_tf_xlm_test.py
transformers/tests/modeling_tf_xlm_test.py
+3
-6
transformers/tests/modeling_tf_xlnet_test.py
transformers/tests/modeling_tf_xlnet_test.py
+3
-9
No files found.
transformers/tests/modeling_common_test.py
View file @
1ab25c49
...
...
@@ -18,7 +18,7 @@ from __future__ import print_function
import
copy
import
sys
import
os
import
os
.path
import
shutil
import
tempfile
import
json
...
...
@@ -30,7 +30,7 @@ import logging
from
transformers
import
is_torch_available
from
.utils
import
require_torch
,
slow
,
torch_device
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
if
is_torch_available
():
import
torch
...
...
@@ -58,7 +58,7 @@ else:
def
_config_zero_init
(
config
):
configs_no_init
=
copy
.
deepcopy
(
config
)
for
key
in
configs_no_init
.
__dict__
.
keys
():
if
'_range'
in
key
or
'_std'
in
key
:
if
'_range'
in
key
or
'_std'
in
key
or
'initializer_factor'
in
key
:
setattr
(
configs_no_init
,
key
,
0.0
)
return
configs_no_init
...
...
@@ -73,6 +73,7 @@ class CommonTestCases:
test_pruning
=
True
test_resize_embeddings
=
True
test_head_masking
=
True
is_encoder_decoder
=
False
def
test_save_load
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
...
@@ -83,6 +84,8 @@ class CommonTestCases:
model
.
eval
()
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
out_2
=
outputs
[
0
].
numpy
()
out_2
[
np
.
isnan
(
out_2
)]
=
0
with
TemporaryDirectory
()
as
tmpdirname
:
model
.
save_pretrained
(
tmpdirname
)
...
...
@@ -93,9 +96,7 @@ class CommonTestCases:
# Make sure we don't have nans
out_1
=
after_outputs
[
0
].
cpu
().
numpy
()
out_2
=
outputs
[
0
].
cpu
().
numpy
()
out_1
=
out_1
[
~
np
.
isnan
(
out_1
)]
out_2
=
out_2
[
~
np
.
isnan
(
out_2
)]
out_1
[
np
.
isnan
(
out_1
)]
=
0
max_diff
=
np
.
amax
(
np
.
abs
(
out_1
-
out_2
))
self
.
assertLessEqual
(
max_diff
,
1e-5
)
...
...
@@ -117,20 +118,32 @@ class CommonTestCases:
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
first
,
second
=
model
(
inputs_dict
[
"input_ids"
])[
0
],
model
(
inputs_dict
[
"input_ids"
])[
0
]
self
.
assertEqual
(
first
.
ne
(
second
).
sum
().
item
(),
0
)
with
torch
.
no_grad
():
first
=
model
(
**
inputs_dict
)[
0
]
second
=
model
(
**
inputs_dict
)[
0
]
out_1
=
first
.
cpu
().
numpy
()
out_2
=
second
.
cpu
().
numpy
()
out_1
=
out_1
[
~
np
.
isnan
(
out_1
)]
out_2
=
out_2
[
~
np
.
isnan
(
out_2
)]
max_diff
=
np
.
amax
(
np
.
abs
(
out_1
-
out_2
))
self
.
assertLessEqual
(
max_diff
,
1e-5
)
def
test_attention_outputs
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
decoder_seq_length
=
self
.
model_tester
.
decoder_seq_length
if
hasattr
(
self
.
model_tester
,
'decoder_seq_length'
)
else
self
.
model_tester
.
seq_length
encoder_seq_length
=
self
.
model_tester
.
encoder_seq_length
if
hasattr
(
self
.
model_tester
,
'encoder_seq_length'
)
else
self
.
model_tester
.
seq_length
decoder_key_length
=
self
.
model_tester
.
key_length
if
hasattr
(
self
.
model_tester
,
'key_length'
)
else
decoder_seq_length
encoder_key_length
=
self
.
model_tester
.
key_length
if
hasattr
(
self
.
model_tester
,
'key_length'
)
else
encoder_seq_length
for
model_class
in
self
.
all_model_classes
:
config
.
output_attentions
=
True
config
.
output_hidden_states
=
False
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
...
...
@@ -138,28 +151,42 @@ class CommonTestCases:
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
key_len
if
hasattr
(
self
.
model_tester
,
'key_len'
)
else
self
.
model_tester
.
seq
_length
])
encoder_
seq_length
,
encoder_key
_length
])
out_len
=
len
(
outputs
)
if
self
.
is_encoder_decoder
:
self
.
assertEqual
(
out_len
%
2
,
0
)
decoder_attentions
=
outputs
[(
out_len
//
2
)
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
self
.
assertEqual
(
len
(
decoder_attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertListEqual
(
list
(
decoder_attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
decoder_seq_length
,
decoder_key_length
])
# Check attention is always last and order is fine
config
.
output_attentions
=
True
config
.
output_hidden_states
=
True
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
self
.
assertEqual
(
out_len
+
1
,
len
(
outputs
))
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
self
.
assertEqual
(
out_len
+
(
2
if
self
.
is_encoder_decoder
else
1
),
len
(
outputs
))
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
len
(
attentions
),
self
.
model_tester
.
num_hidden_layers
)
self_
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
len
(
self_
attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
list
(
self_
attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
key_len
if
hasattr
(
self
.
model_tester
,
'key_len'
)
else
self
.
model_tester
.
seq
_length
])
encoder_
seq_length
,
encoder_key
_length
])
def
test_torchscript
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
...
@@ -191,21 +218,22 @@ class CommonTestCases:
inputs
=
inputs_dict
[
'input_ids'
]
# Let's keep only input_ids
try
:
torch
.
jit
.
trace
(
model
,
inputs
)
traced_gpt2
=
torch
.
jit
.
trace
(
model
,
inputs
)
except
RuntimeError
:
self
.
fail
(
"Couldn't trace module."
)
try
:
traced_gpt2
=
torch
.
jit
.
trace
(
model
,
inputs
)
torch
.
jit
.
save
(
traced_gpt2
,
"traced_model.pt"
)
except
RuntimeError
:
self
.
fail
(
"Couldn't save module."
)
with
TemporaryDirectory
()
as
tmp_dir_name
:
pt_file_name
=
os
.
path
.
join
(
tmp_dir_name
,
"traced_model.pt"
)
try
:
loaded_model
=
torch
.
jit
.
load
(
"traced_model.pt"
)
os
.
remove
(
"traced_model.pt"
)
except
ValueError
:
self
.
fail
(
"Couldn't load module."
)
try
:
torch
.
jit
.
save
(
traced_gpt2
,
pt_file_name
)
except
Exception
:
self
.
fail
(
"Couldn't save module."
)
try
:
loaded_model
=
torch
.
jit
.
load
(
pt_file_name
)
except
Exception
:
self
.
fail
(
"Couldn't load module."
)
model
.
to
(
torch_device
)
model
.
eval
()
...
...
@@ -223,7 +251,6 @@ class CommonTestCases:
self
.
assertTrue
(
models_equal
)
def
test_headmasking
(
self
):
if
not
self
.
test_head_masking
:
return
...
...
@@ -278,7 +305,6 @@ class CommonTestCases:
self
.
assertNotEqual
(
attentions
[
-
1
][...,
-
1
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
def
test_head_pruning
(
self
):
if
not
self
.
test_pruning
:
return
...
...
@@ -297,7 +323,8 @@ class CommonTestCases:
heads_to_prune
=
{
0
:
list
(
range
(
1
,
self
.
model_tester
.
num_attention_heads
)),
-
1
:
[
0
]}
model
.
prune_heads
(
heads_to_prune
)
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
...
...
@@ -326,20 +353,19 @@ class CommonTestCases:
heads_to_prune
=
{
0
:
list
(
range
(
1
,
self
.
model_tester
.
num_attention_heads
)),
-
1
:
[
0
]}
model
.
prune_heads
(
heads_to_prune
)
directory
=
"pruned_model"
if
not
os
.
path
.
exists
(
directory
):
os
.
makedirs
(
directory
)
model
.
save_pretrained
(
directory
)
model
=
model_class
.
from_pretrained
(
directory
)
model
.
to
(
torch_device
)
outputs
=
model
(
**
inputs_dict
)
with
TemporaryDirectory
()
as
temp_dir_name
:
model
.
save_pretrained
(
temp_dir_name
)
model
=
model_class
.
from_pretrained
(
temp_dir_name
)
model
.
to
(
torch_device
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
1
)
self
.
assertEqual
(
attentions
[
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
self
.
assertEqual
(
attentions
[
-
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
shutil
.
rmtree
(
directory
)
def
test_head_pruning_save_load_from_config_init
(
self
):
if
not
self
.
test_pruning
:
...
...
@@ -362,7 +388,8 @@ class CommonTestCases:
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
1
)
...
...
@@ -389,7 +416,8 @@ class CommonTestCases:
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
...
...
@@ -397,16 +425,13 @@ class CommonTestCases:
self
.
assertEqual
(
attentions
[
2
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
self
.
assertEqual
(
attentions
[
3
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
directory
=
"pruned_model"
if
not
os
.
path
.
exists
(
directory
):
os
.
makedirs
(
directory
)
model
.
save_pretrained
(
directory
)
model
=
model_class
.
from_pretrained
(
directory
)
model
.
to
(
torch_device
)
shutil
.
rmtree
(
directory
)
with
TemporaryDirectory
()
as
temp_dir_name
:
model
.
save_pretrained
(
temp_dir_name
)
model
=
model_class
.
from_pretrained
(
temp_dir_name
)
model
.
to
(
torch_device
)
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
...
...
@@ -417,7 +442,8 @@ class CommonTestCases:
heads_to_prune
=
{
0
:
[
0
],
2
:
[
1
,
2
]}
model
.
prune_heads
(
heads_to_prune
)
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
...
...
@@ -427,7 +453,6 @@ class CommonTestCases:
self
.
assertDictEqual
(
model
.
config
.
pruned_heads
,
{
0
:
[
0
],
1
:
[
1
,
2
],
2
:
[
1
,
2
]})
def
test_hidden_states_output
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
...
@@ -437,14 +462,16 @@ class CommonTestCases:
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
**
inputs_dict
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
hidden_states
=
outputs
[
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
False
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
len
(
hidden_states
),
self
.
model_tester
.
num_hidden_layers
+
1
)
self
.
assertListEqual
(
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
])
[
self
.
model_tester
.
encoder_seq_length
if
hasattr
(
self
.
model_tester
,
'encoder_seq_length'
)
else
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
])
def
test_resize_tokens_embeddings
(
self
):
original_config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
...
@@ -550,8 +577,14 @@ class CommonTestCases:
def
test_inputs_embeds
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
input_ids
=
inputs_dict
[
"input_ids"
]
del
inputs_dict
[
"input_ids"
]
if
not
self
.
is_encoder_decoder
:
input_ids
=
inputs_dict
[
"input_ids"
]
del
inputs_dict
[
"input_ids"
]
else
:
encoder_input_ids
=
inputs_dict
[
"encoder_input_ids"
]
decoder_input_ids
=
inputs_dict
[
"decoder_input_ids"
]
del
inputs_dict
[
"encoder_input_ids"
]
del
inputs_dict
[
"decoder_input_ids"
]
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
...
...
@@ -559,9 +592,14 @@ class CommonTestCases:
model
.
eval
()
wte
=
model
.
get_input_embeddings
()
inputs_dict
[
"inputs_embeds"
]
=
wte
(
input_ids
)
outputs
=
model
(
**
inputs_dict
)
if
not
self
.
is_encoder_decoder
:
inputs_dict
[
"inputs_embeds"
]
=
wte
(
input_ids
)
else
:
inputs_dict
[
"encoder_inputs_embeds"
]
=
wte
(
encoder_input_ids
)
inputs_dict
[
"decoder_inputs_embeds"
]
=
wte
(
decoder_input_ids
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
class
GPTModelTester
(
CommonModelTester
):
...
...
@@ -633,7 +671,7 @@ class CommonTestCases:
mc_token_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
],
self
.
seq_length
)
config
=
self
.
config_class
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
n_positions
=
self
.
n_positions
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
...
...
@@ -649,9 +687,10 @@ class CommonTestCases:
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
)
outputs
=
model
(
input_ids
,
position_ids
)
outputs
=
model
(
input_ids
)
with
torch
.
no_grad
():
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
)
outputs
=
model
(
input_ids
,
position_ids
)
outputs
=
model
(
input_ids
)
hidden_state
=
outputs
[
0
]
self
.
parent
.
assertListEqual
(
...
...
@@ -664,7 +703,8 @@ class CommonTestCases:
model
=
self
.
lm_head_model_class
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
,
lm_labels
)
with
torch
.
no_grad
():
outputs
=
model
(
input_ids
,
position_ids
,
token_type_ids
,
lm_labels
)
loss
,
lm_logits
=
outputs
[:
2
]
total_voc
=
self
.
vocab_size
...
...
@@ -681,7 +721,8 @@ class CommonTestCases:
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
)
with
torch
.
no_grad
():
outputs
=
model
(
input_ids
)
presents
=
outputs
[
-
1
]
self
.
parent
.
assertEqual
(
self
.
num_hidden_layers
,
len
(
presents
))
self
.
parent
.
assertListEqual
(
...
...
@@ -694,7 +735,8 @@ class CommonTestCases:
model
=
self
.
double_head_model_class
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
,
mc_token_ids
,
lm_labels
=
lm_labels
,
mc_labels
=
mc_labels
,
with
torch
.
no_grad
():
outputs
=
model
(
input_ids
,
mc_token_ids
,
lm_labels
=
lm_labels
,
mc_labels
=
mc_labels
,
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
)
lm_loss
,
mc_loss
,
lm_logits
,
mc_logits
=
outputs
[:
4
]
loss
=
[
lm_loss
,
mc_loss
]
...
...
@@ -711,10 +753,8 @@ class CommonTestCases:
[[],
[]])
def
create_and_check_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
self
.
base_model_class
.
pretrained_model_archive_map
.
keys
())[:
1
]:
model
=
self
.
base_model_class
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
model
=
self
.
base_model_class
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
parent
.
assertIsNotNone
(
model
)
def
prepare_config_and_inputs_for_common
(
self
):
...
...
transformers/tests/modeling_ctrl_test.py
View file @
1ab25c49
...
...
@@ -16,7 +16,6 @@ from __future__ import division
from
__future__
import
print_function
import
unittest
import
shutil
import
pdb
from
transformers
import
is_torch_available
...
...
@@ -27,7 +26,7 @@ if is_torch_available():
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_torch
,
slow
,
torch_device
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
@
require_torch
...
...
@@ -114,7 +113,7 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
CTRLConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
...
...
@@ -205,10 +204,8 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
@
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
CTRLModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
model
=
CTRLModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
...
...
transformers/tests/modeling_distilbert_test.py
View file @
1ab25c49
...
...
@@ -27,7 +27,7 @@ if is_torch_available():
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_torch
,
slow
,
torch_device
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
@
require_torch
...
...
@@ -105,7 +105,7 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
DistilBertConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
dim
=
self
.
hidden_size
,
n_layers
=
self
.
num_hidden_layers
,
n_heads
=
self
.
num_attention_heads
,
...
...
@@ -235,10 +235,8 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
# @slow
# def test_model_from_pretrained(self):
# cache_dir = "/tmp/transformers_test/"
# for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# model = DistilBertModel.from_pretrained(model_name, cache_dir=cache_dir)
# shutil.rmtree(cache_dir)
# model = DistilBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
# self.assertIsNotNone(model)
if
__name__
==
"__main__"
:
...
...
transformers/tests/modeling_gpt2_test.py
View file @
1ab25c49
...
...
@@ -17,7 +17,6 @@ from __future__ import division
from
__future__
import
print_function
import
unittest
import
shutil
from
transformers
import
is_torch_available
...
...
@@ -27,7 +26,7 @@ if is_torch_available():
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_torch
,
slow
,
torch_device
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
@
require_torch
...
...
@@ -110,7 +109,7 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
GPT2Config
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
...
...
@@ -239,10 +238,8 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester):
@
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
GPT2Model
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
model
=
GPT2Model
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
...
...
transformers/tests/modeling_openai_test.py
View file @
1ab25c49
...
...
@@ -17,7 +17,6 @@ from __future__ import division
from
__future__
import
print_function
import
unittest
import
shutil
from
transformers
import
is_torch_available
...
...
@@ -27,7 +26,7 @@ if is_torch_available():
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_torch
,
slow
,
torch_device
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
@
require_torch
...
...
@@ -98,7 +97,7 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
OpenAIGPTConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
...
...
@@ -207,10 +206,8 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
@
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
OpenAIGPTModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
model
=
OpenAIGPTModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
...
...
transformers/tests/modeling_roberta_test.py
View file @
1ab25c49
...
...
@@ -17,7 +17,6 @@ from __future__ import division
from
__future__
import
print_function
import
unittest
import
shutil
from
transformers
import
is_torch_available
...
...
@@ -25,11 +24,12 @@ if is_torch_available():
import
torch
from
transformers
import
(
RobertaConfig
,
RobertaModel
,
RobertaForMaskedLM
,
RobertaForSequenceClassification
,
RobertaForTokenClassification
)
from
transformers.modeling_roberta
import
RobertaEmbeddings
from
transformers.modeling_roberta
import
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_torch
,
slow
,
torch_device
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
@
require_torch
...
...
@@ -106,7 +106,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
RobertaConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
...
...
@@ -199,12 +199,61 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
@
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
RobertaModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
model
=
RobertaModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
def
test_create_position_ids_respects_padding_index
(
self
):
""" Ensure that the default position ids only assign a sequential . This is a regression
test for https://github.com/huggingface/transformers/issues/1761
The position ids should be masked with the embedding object's padding index. Therefore, the
first available non-padding position index is RobertaEmbeddings.padding_idx + 1
"""
config
=
self
.
model_tester
.
prepare_config_and_inputs
()[
0
]
model
=
RobertaEmbeddings
(
config
=
config
)
input_ids
=
torch
.
as_tensor
([[
12
,
31
,
13
,
model
.
padding_idx
]])
expected_positions
=
torch
.
as_tensor
([[
0
+
model
.
padding_idx
+
1
,
1
+
model
.
padding_idx
+
1
,
2
+
model
.
padding_idx
+
1
,
model
.
padding_idx
]])
position_ids
=
model
.
create_position_ids_from_input_ids
(
input_ids
)
self
.
assertEqual
(
position_ids
.
shape
,
expected_positions
.
shape
)
self
.
assertTrue
(
torch
.
all
(
torch
.
eq
(
position_ids
,
expected_positions
)))
def
test_create_position_ids_from_inputs_embeds
(
self
):
""" Ensure that the default position ids only assign a sequential . This is a regression
test for https://github.com/huggingface/transformers/issues/1761
The position ids should be masked with the embedding object's padding index. Therefore, the
first available non-padding position index is RobertaEmbeddings.padding_idx + 1
"""
config
=
self
.
model_tester
.
prepare_config_and_inputs
()[
0
]
embeddings
=
RobertaEmbeddings
(
config
=
config
)
inputs_embeds
=
torch
.
Tensor
(
2
,
4
,
30
)
expected_single_positions
=
[
0
+
embeddings
.
padding_idx
+
1
,
1
+
embeddings
.
padding_idx
+
1
,
2
+
embeddings
.
padding_idx
+
1
,
3
+
embeddings
.
padding_idx
+
1
,
]
expected_positions
=
torch
.
as_tensor
([
expected_single_positions
,
expected_single_positions
])
position_ids
=
embeddings
.
create_position_ids_from_inputs_embeds
(
inputs_embeds
)
self
.
assertEqual
(
position_ids
.
shape
,
expected_positions
.
shape
)
self
.
assertTrue
(
torch
.
all
(
torch
.
eq
(
position_ids
,
expected_positions
))
)
class
RobertaModelIntegrationTest
(
unittest
.
TestCase
):
...
...
transformers/tests/modeling_t5_test.py
0 → 100644
View file @
1ab25c49
# coding=utf-8
# Copyright 2018 Google T5 Authors and HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
unittest
from
transformers
import
is_torch_available
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
,
floats_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
if
is_torch_available
():
from
transformers
import
(
T5Config
,
T5Model
,
T5WithLMHeadModel
)
from
transformers.modeling_t5
import
T5_PRETRAINED_MODEL_ARCHIVE_MAP
@
require_torch
class
T5ModelTest
(
CommonTestCases
.
CommonModelTester
):
all_model_classes
=
(
T5Model
,
T5WithLMHeadModel
)
if
is_torch_available
()
else
()
test_pruning
=
False
test_torchscript
=
False
test_resize_embeddings
=
False
is_encoder_decoder
=
True
class
T5ModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
encoder_seq_length
=
7
,
decoder_seq_length
=
9
,
is_training
=
True
,
use_attention_mask
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
n_positions
=
14
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
d_ff
=
37
,
relative_attention_num_buckets
=
8
,
dropout_rate
=
0.1
,
initializer_factor
=
0.002
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
encoder_seq_length
=
encoder_seq_length
self
.
decoder_seq_length
=
decoder_seq_length
self
.
is_training
=
is_training
self
.
use_attention_mask
=
use_attention_mask
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
n_positions
=
n_positions
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
d_ff
=
d_ff
self
.
relative_attention_num_buckets
=
relative_attention_num_buckets
self
.
dropout_rate
=
dropout_rate
self
.
initializer_factor
=
initializer_factor
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
encoder_input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
encoder_seq_length
],
self
.
vocab_size
)
decoder_input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
decoder_seq_length
],
self
.
vocab_size
)
encoder_attention_mask
=
None
decoder_attention_mask
=
None
if
self
.
use_attention_mask
:
encoder_attention_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
encoder_seq_length
],
vocab_size
=
2
)
decoder_attention_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
decoder_seq_length
],
vocab_size
=
2
)
decoder_lm_labels
=
None
if
self
.
use_labels
:
decoder_lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
decoder_seq_length
],
self
.
vocab_size
)
config
=
T5Config
(
vocab_size
=
self
.
vocab_size
,
n_positions
=
self
.
n_positions
,
d_model
=
self
.
hidden_size
,
d_ff
=
self
.
d_ff
,
d_kv
=
self
.
hidden_size
//
self
.
num_attention_heads
,
num_layers
=
self
.
num_hidden_layers
,
num_heads
=
self
.
num_attention_heads
,
relative_attention_num_buckets
=
self
.
relative_attention_num_buckets
,
dropout_rate
=
self
.
dropout_rate
,
initializer_factor
=
self
.
initializer_factor
)
return
(
config
,
encoder_input_ids
,
decoder_input_ids
,
encoder_attention_mask
,
decoder_attention_mask
,
decoder_lm_labels
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_t5_model
(
self
,
config
,
encoder_input_ids
,
decoder_input_ids
,
encoder_attention_mask
,
decoder_attention_mask
,
decoder_lm_labels
):
model
=
T5Model
(
config
=
config
)
model
.
eval
()
decoder_output
,
encoder_output
=
model
(
encoder_input_ids
=
encoder_input_ids
,
decoder_input_ids
=
decoder_input_ids
,
encoder_attention_mask
=
encoder_attention_mask
,
decoder_attention_mask
=
decoder_attention_mask
)
decoder_output
,
encoder_output
=
model
(
encoder_input_ids
=
encoder_input_ids
,
decoder_input_ids
=
decoder_input_ids
)
result
=
{
"encoder_output"
:
encoder_output
,
"decoder_output"
:
decoder_output
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"encoder_output"
].
size
()),
[
self
.
batch_size
,
self
.
encoder_seq_length
,
self
.
hidden_size
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"decoder_output"
].
size
()),
[
self
.
batch_size
,
self
.
decoder_seq_length
,
self
.
hidden_size
])
def
create_and_check_t5_with_lm_head
(
self
,
config
,
encoder_input_ids
,
decoder_input_ids
,
encoder_attention_mask
,
decoder_attention_mask
,
decoder_lm_labels
):
model
=
T5WithLMHeadModel
(
config
=
config
)
model
.
eval
()
outputs
=
model
(
encoder_input_ids
=
encoder_input_ids
,
decoder_input_ids
=
decoder_input_ids
,
decoder_attention_mask
=
decoder_attention_mask
,
decoder_lm_labels
=
decoder_lm_labels
)
loss
,
prediction_scores
=
outputs
[
0
],
outputs
[
1
]
result
=
{
"loss"
:
loss
,
"prediction_scores"
:
prediction_scores
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
size
()),
[
self
.
batch_size
,
self
.
decoder_seq_length
,
self
.
vocab_size
])
self
.
check_loss_output
(
result
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
encoder_input_ids
,
decoder_input_ids
,
encoder_attention_mask
,
decoder_attention_mask
,
decoder_lm_labels
)
=
config_and_inputs
inputs_dict
=
{
'encoder_input_ids'
:
encoder_input_ids
,
'decoder_input_ids'
:
decoder_input_ids
,
'decoder_attention_mask'
:
decoder_attention_mask
,
'encoder_attention_mask'
:
encoder_attention_mask
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
T5ModelTest
.
T5ModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
T5Config
,
d_model
=
37
)
def
test_config
(
self
):
self
.
config_tester
.
run_common_tests
()
def
test_t5_model
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_t5_model
(
*
config_and_inputs
)
def
test_with_lm_head
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_t5_with_lm_head
(
*
config_and_inputs
)
@
slow
def
test_model_from_pretrained
(
self
):
for
model_name
in
list
(
T5_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
T5Model
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
transformers/tests/modeling_tf_albert_test.py
View file @
1ab25c49
...
...
@@ -17,12 +17,11 @@ from __future__ import division
from
__future__
import
print_function
import
unittest
import
shutil
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
AlbertConfig
,
is_tf_available
...
...
@@ -118,7 +117,7 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
AlbertConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
...
...
@@ -217,12 +216,8 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
# for model_name in list(TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for
model_name
in
[
'albert-base-uncased'
]:
model
=
TFAlbertModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
for
model_name
in
list
(
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFAlbertModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
...
...
transformers/tests/modeling_tf_auto_test.py
View file @
1ab25c49
...
...
@@ -22,7 +22,7 @@ import logging
from
transformers
import
is_tf_available
from
.utils
import
require_tf
,
slow
from
.utils
import
require_tf
,
slow
,
SMALL_MODEL_IDENTIFIER
if
is_tf_available
():
from
transformers
import
(
AutoConfig
,
BertConfig
,
...
...
@@ -46,11 +46,11 @@ class TFAutoModelTest(unittest.TestCase):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for
model_name
in
[
'bert-base-uncased'
]:
config
=
AutoConfig
.
from_pretrained
(
model_name
,
force_download
=
True
)
config
=
AutoConfig
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
config
)
self
.
assertIsInstance
(
config
,
BertConfig
)
model
=
TFAutoModel
.
from_pretrained
(
model_name
,
force_download
=
True
)
model
=
TFAutoModel
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsInstance
(
model
,
TFBertModel
)
...
...
@@ -59,11 +59,11 @@ class TFAutoModelTest(unittest.TestCase):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for
model_name
in
[
'bert-base-uncased'
]:
config
=
AutoConfig
.
from_pretrained
(
model_name
,
force_download
=
True
)
config
=
AutoConfig
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
config
)
self
.
assertIsInstance
(
config
,
BertConfig
)
model
=
TFAutoModelWithLMHead
.
from_pretrained
(
model_name
,
force_download
=
True
)
model
=
TFAutoModelWithLMHead
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsInstance
(
model
,
TFBertForMaskedLM
)
...
...
@@ -72,11 +72,11 @@ class TFAutoModelTest(unittest.TestCase):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for
model_name
in
[
'bert-base-uncased'
]:
config
=
AutoConfig
.
from_pretrained
(
model_name
,
force_download
=
True
)
config
=
AutoConfig
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
config
)
self
.
assertIsInstance
(
config
,
BertConfig
)
model
=
TFAutoModelForSequenceClassification
.
from_pretrained
(
model_name
,
force_download
=
True
)
model
=
TFAutoModelForSequenceClassification
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsInstance
(
model
,
TFBertForSequenceClassification
)
...
...
@@ -85,14 +85,19 @@ class TFAutoModelTest(unittest.TestCase):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for
model_name
in
[
'bert-base-uncased'
]:
config
=
AutoConfig
.
from_pretrained
(
model_name
,
force_download
=
True
)
config
=
AutoConfig
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
config
)
self
.
assertIsInstance
(
config
,
BertConfig
)
model
=
TFAutoModelForQuestionAnswering
.
from_pretrained
(
model_name
,
force_download
=
True
)
model
=
TFAutoModelForQuestionAnswering
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsInstance
(
model
,
TFBertForQuestionAnswering
)
def
test_from_pretrained_identifier
(
self
):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
model
=
TFAutoModelWithLMHead
.
from_pretrained
(
SMALL_MODEL_IDENTIFIER
)
self
.
assertIsInstance
(
model
,
TFBertForMaskedLM
)
if
__name__
==
"__main__"
:
unittest
.
main
()
transformers/tests/modeling_tf_bert_test.py
View file @
1ab25c49
...
...
@@ -17,12 +17,11 @@ from __future__ import division
from
__future__
import
print_function
import
unittest
import
shutil
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
BertConfig
,
is_tf_available
...
...
@@ -114,7 +113,7 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
BertConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
...
...
@@ -310,11 +309,9 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for
model_name
in
[
'bert-base-uncased'
]:
model
=
TFBertModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
model
=
TFBertModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
...
...
transformers/tests/modeling_tf_common_test.py
View file @
1ab25c49
...
...
@@ -69,6 +69,7 @@ class TFCommonTestCases:
test_torchscript
=
True
test_pruning
=
True
test_resize_embeddings
=
True
is_encoder_decoder
=
False
def
test_initialization
(
self
):
pass
...
...
@@ -129,8 +130,12 @@ class TFCommonTestCases:
for
name
,
key
in
inputs_dict
.
items
())
with
torch
.
no_grad
():
pto
=
pt_model
(
**
pt_inputs_dict
)
tfo
=
tf_model
(
inputs_dict
)
max_diff
=
np
.
amax
(
np
.
abs
(
tfo
[
0
].
numpy
()
-
pto
[
0
].
numpy
()))
tfo
=
tf_model
(
inputs_dict
,
training
=
False
)
tf_hidden_states
=
tfo
[
0
].
numpy
()
pt_hidden_states
=
pto
[
0
].
numpy
()
tf_hidden_states
[
np
.
isnan
(
tf_hidden_states
)]
=
0
pt_hidden_states
[
np
.
isnan
(
pt_hidden_states
)]
=
0
max_diff
=
np
.
amax
(
np
.
abs
(
tf_hidden_states
-
pt_hidden_states
))
self
.
assertLessEqual
(
max_diff
,
2e-2
)
# Check we can load pt model in tf and vice-versa with checkpoint => model functions
...
...
@@ -150,13 +155,21 @@ class TFCommonTestCases:
with
torch
.
no_grad
():
pto
=
pt_model
(
**
pt_inputs_dict
)
tfo
=
tf_model
(
inputs_dict
)
max_diff
=
np
.
amax
(
np
.
abs
(
tfo
[
0
].
numpy
()
-
pto
[
0
].
numpy
()))
tfo
=
tfo
[
0
].
numpy
()
pto
=
pto
[
0
].
numpy
()
tfo
[
np
.
isnan
(
tfo
)]
=
0
pto
[
np
.
isnan
(
pto
)]
=
0
max_diff
=
np
.
amax
(
np
.
abs
(
tfo
-
pto
))
self
.
assertLessEqual
(
max_diff
,
2e-2
)
def
test_compile_tf_model
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
input_ids
=
tf
.
keras
.
Input
(
batch_shape
=
(
2
,
2000
),
name
=
'input_ids'
,
dtype
=
'int32'
)
if
self
.
is_encoder_decoder
:
input_ids
=
{
'decoder_input_ids'
:
tf
.
keras
.
Input
(
batch_shape
=
(
2
,
2000
),
name
=
'decoder_input_ids'
,
dtype
=
'int32'
),
'encoder_input_ids'
:
tf
.
keras
.
Input
(
batch_shape
=
(
2
,
2000
),
name
=
'encoder_input_ids'
,
dtype
=
'int32'
)}
else
:
input_ids
=
tf
.
keras
.
Input
(
batch_shape
=
(
2
,
2000
),
name
=
'input_ids'
,
dtype
=
'int32'
)
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
3e-5
,
epsilon
=
1e-08
,
clipnorm
=
1.0
)
loss
=
tf
.
keras
.
losses
.
SparseCategoricalCrossentropy
(
from_logits
=
True
)
metric
=
tf
.
keras
.
metrics
.
SparseCategoricalAccuracy
(
'accuracy'
)
...
...
@@ -189,7 +202,7 @@ class TFCommonTestCases:
outputs_dict
=
model
(
inputs_dict
)
inputs_keywords
=
copy
.
deepcopy
(
inputs_dict
)
input_ids
=
inputs_keywords
.
pop
(
'input_ids'
)
input_ids
=
inputs_keywords
.
pop
(
'input_ids'
if
not
self
.
is_encoder_decoder
else
'decoder_input_ids'
,
None
)
outputs_keywords
=
model
(
input_ids
,
**
inputs_keywords
)
output_dict
=
outputs_dict
[
0
].
numpy
()
...
...
@@ -200,6 +213,11 @@ class TFCommonTestCases:
def
test_attention_outputs
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
decoder_seq_length
=
self
.
model_tester
.
decoder_seq_length
if
hasattr
(
self
.
model_tester
,
'decoder_seq_length'
)
else
self
.
model_tester
.
seq_length
encoder_seq_length
=
self
.
model_tester
.
encoder_seq_length
if
hasattr
(
self
.
model_tester
,
'encoder_seq_length'
)
else
self
.
model_tester
.
seq_length
decoder_key_length
=
self
.
model_tester
.
key_length
if
hasattr
(
self
.
model_tester
,
'key_length'
)
else
decoder_seq_length
encoder_key_length
=
self
.
model_tester
.
key_length
if
hasattr
(
self
.
model_tester
,
'key_length'
)
else
encoder_seq_length
for
model_class
in
self
.
all_model_classes
:
config
.
output_attentions
=
True
config
.
output_hidden_states
=
False
...
...
@@ -212,16 +230,28 @@ class TFCommonTestCases:
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
key_len
if
hasattr
(
self
.
model_tester
,
'key_len'
)
else
self
.
model_tester
.
seq
_length
])
encoder_
seq_length
,
encoder_key
_length
])
out_len
=
len
(
outputs
)
if
self
.
is_encoder_decoder
:
self
.
assertEqual
(
out_len
%
2
,
0
)
decoder_attentions
=
outputs
[(
out_len
//
2
)
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
self
.
assertEqual
(
len
(
decoder_attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertListEqual
(
list
(
decoder_attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
decoder_seq_length
,
decoder_key_length
])
# Check attention is always last and order is fine
config
.
output_attentions
=
True
config
.
output_hidden_states
=
True
model
=
model_class
(
config
)
outputs
=
model
(
inputs_dict
)
self
.
assertEqual
(
out_len
+
1
,
len
(
outputs
))
self
.
assertEqual
(
out_len
+
(
2
if
self
.
is_encoder_decoder
else
1
)
,
len
(
outputs
))
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
...
...
@@ -230,8 +260,8 @@ class TFCommonTestCases:
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
key_len
if
hasattr
(
self
.
model_tester
,
'key_len'
)
else
self
.
model_tester
.
seq
_length
])
encoder_
seq_length
,
encoder_key
_length
])
def
test_hidden_states_output
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
...
@@ -264,35 +294,53 @@ class TFCommonTestCases:
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
first
,
second
=
model
(
inputs_dict
,
training
=
False
)[
0
],
model
(
inputs_dict
,
training
=
False
)[
0
]
self
.
assertTrue
(
tf
.
math
.
equal
(
first
,
second
).
numpy
().
all
())
out_1
=
first
.
numpy
()
out_2
=
second
.
numpy
()
out_1
=
out_1
[
~
np
.
isnan
(
out_1
)]
out_2
=
out_2
[
~
np
.
isnan
(
out_2
)]
max_diff
=
np
.
amax
(
np
.
abs
(
out_1
-
out_2
))
self
.
assertLessEqual
(
max_diff
,
1e-5
)
def
_get_embeds
(
self
,
wte
,
input_ids
):
# ^^ In our TF models, the input_embeddings can take slightly different forms,
# so we try a few of them.
# We used to fall back to just synthetically creating a dummy tensor of ones:
try
:
x
=
wte
(
input_ids
,
mode
=
"embedding"
)
except
:
try
:
x
=
wte
([
input_ids
],
mode
=
"embedding"
)
except
:
try
:
x
=
wte
([
input_ids
,
None
,
None
,
None
],
mode
=
"embedding"
)
except
:
if
hasattr
(
self
.
model_tester
,
"embedding_size"
):
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
embedding_size
],
dtype
=
tf
.
dtypes
.
float32
)
else
:
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
hidden_size
],
dtype
=
tf
.
dtypes
.
float32
)
return
x
def
test_inputs_embeds
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
input_ids
=
inputs_dict
[
"input_ids"
]
del
inputs_dict
[
"input_ids"
]
if
not
self
.
is_encoder_decoder
:
input_ids
=
inputs_dict
[
"input_ids"
]
del
inputs_dict
[
"input_ids"
]
else
:
encoder_input_ids
=
inputs_dict
[
"encoder_input_ids"
]
decoder_input_ids
=
inputs_dict
[
"decoder_input_ids"
]
del
inputs_dict
[
"encoder_input_ids"
]
del
inputs_dict
[
"decoder_input_ids"
]
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
wte
=
model
.
get_input_embeddings
()
try
:
x
=
wte
(
input_ids
,
mode
=
"embedding"
)
except
:
try
:
x
=
wte
([
input_ids
],
mode
=
"embedding"
)
except
:
try
:
x
=
wte
([
input_ids
,
None
,
None
,
None
],
mode
=
"embedding"
)
except
:
if
hasattr
(
self
.
model_tester
,
"embedding_size"
):
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
embedding_size
],
dtype
=
tf
.
dtypes
.
float32
)
else
:
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
hidden_size
],
dtype
=
tf
.
dtypes
.
float32
)
# ^^ In our TF models, the input_embeddings can take slightly different forms,
# so we try a few of them.
# We used to fall back to just synthetically creating a dummy tensor of ones:
#
inputs_dict
[
"inputs_embeds"
]
=
x
if
not
self
.
is_encoder_decoder
:
inputs_dict
[
"inputs_embeds"
]
=
self
.
_get_embeds
(
wte
,
input_ids
)
else
:
inputs_dict
[
"encoder_inputs_embeds"
]
=
self
.
_get_embeds
(
wte
,
encoder_input_ids
)
inputs_dict
[
"decoder_inputs_embeds"
]
=
self
.
_get_embeds
(
wte
,
decoder_input_ids
)
outputs
=
model
(
inputs_dict
)
...
...
transformers/tests/modeling_tf_ctrl_test.py
View file @
1ab25c49
...
...
@@ -17,12 +17,11 @@ from __future__ import division
from
__future__
import
print_function
import
unittest
import
shutil
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
CTRLConfig
,
is_tf_available
...
...
@@ -112,7 +111,7 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
CTRLConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
...
...
@@ -189,10 +188,8 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFCTRLModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
model
=
TFCTRLModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
...
...
transformers/tests/modeling_tf_distilbert_test.py
View file @
1ab25c49
...
...
@@ -20,7 +20,7 @@ import unittest
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
DistilBertConfig
,
is_tf_available
...
...
@@ -107,7 +107,7 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
DistilBertConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
dim
=
self
.
hidden_size
,
n_layers
=
self
.
num_hidden_layers
,
n_heads
=
self
.
num_attention_heads
,
...
...
@@ -211,10 +211,8 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
# @slow
# def test_model_from_pretrained(self):
# cache_dir = "/tmp/transformers_test/"
# for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# model = DistilBertModel.from_pretrained(model_name, cache_dir=cache_dir)
# shutil.rmtree(cache_dir)
# model = DistilBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
# self.assertIsNotNone(model)
if
__name__
==
"__main__"
:
...
...
transformers/tests/modeling_tf_gpt2_test.py
View file @
1ab25c49
...
...
@@ -17,12 +17,11 @@ from __future__ import division
from
__future__
import
print_function
import
unittest
import
shutil
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
GPT2Config
,
is_tf_available
...
...
@@ -115,7 +114,7 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
GPT2Config
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
...
...
@@ -220,10 +219,8 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFGPT2Model
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
model
=
TFGPT2Model
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
...
...
transformers/tests/modeling_tf_openai_gpt_test.py
View file @
1ab25c49
...
...
@@ -17,12 +17,11 @@ from __future__ import division
from
__future__
import
print_function
import
unittest
import
shutil
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
OpenAIGPTConfig
,
is_tf_available
...
...
@@ -114,7 +113,7 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
OpenAIGPTConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
...
...
@@ -219,10 +218,8 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFOpenAIGPTModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
model
=
TFOpenAIGPTModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
...
...
transformers/tests/modeling_tf_roberta_test.py
View file @
1ab25c49
...
...
@@ -17,11 +17,10 @@ from __future__ import division
from
__future__
import
print_function
import
unittest
import
shutil
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
RobertaConfig
,
is_tf_available
...
...
@@ -109,7 +108,7 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
RobertaConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
...
...
@@ -192,10 +191,8 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFRobertaModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
model
=
TFRobertaModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
...
...
transformers/tests/modeling_tf_t5_test.py
0 → 100644
View file @
1ab25c49
# coding=utf-8
# Copyright 2018 Google T5 Authors and HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
unittest
import
sys
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
T5Config
,
is_tf_available
if
is_tf_available
():
import
tensorflow
as
tf
from
transformers.modeling_tf_t5
import
(
TFT5Model
,
TFT5WithLMHeadModel
,
TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP
)
@
require_tf
class
TFT5ModelTest
(
TFCommonTestCases
.
TFCommonModelTester
):
is_encoder_decoder
=
True
all_model_classes
=
(
TFT5Model
,
TFT5WithLMHeadModel
)
if
is_tf_available
()
else
()
class
TFT5ModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_mask
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
n_positions
=
14
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
d_ff
=
37
,
relative_attention_num_buckets
=
8
,
dropout_rate
=
0.1
,
initializer_factor
=
0.002
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_input_mask
=
use_input_mask
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
n_positions
=
n_positions
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
d_ff
=
d_ff
self
.
relative_attention_num_buckets
=
relative_attention_num_buckets
self
.
dropout_rate
=
dropout_rate
self
.
initializer_factor
=
initializer_factor
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_labels
=
None
if
self
.
use_labels
:
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
config
=
T5Config
(
vocab_size
=
self
.
vocab_size
,
n_positions
=
self
.
n_positions
,
d_model
=
self
.
hidden_size
,
d_ff
=
self
.
d_ff
,
d_kv
=
self
.
hidden_size
//
self
.
num_attention_heads
,
num_layers
=
self
.
num_hidden_layers
,
num_heads
=
self
.
num_attention_heads
,
relative_attention_num_buckets
=
self
.
relative_attention_num_buckets
,
dropout_rate
=
self
.
dropout_rate
,
initializer_factor
=
self
.
initializer_factor
)
return
(
config
,
input_ids
,
input_mask
,
token_labels
)
def
create_and_check_t5_model
(
self
,
config
,
input_ids
,
input_mask
,
token_labels
):
model
=
TFT5Model
(
config
=
config
)
inputs
=
{
'encoder_input_ids'
:
input_ids
,
'decoder_input_ids'
:
input_ids
,
'decoder_attention_mask'
:
input_mask
}
encoder_output
,
decoder_output
=
model
(
inputs
)
encoder_output
,
decoder_output
=
model
(
input_ids
,
decoder_attention_mask
=
input_mask
,
encoder_input_ids
=
input_ids
)
result
=
{
"encoder_output"
:
encoder_output
.
numpy
(),
"decoder_output"
:
decoder_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"encoder_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"decoder_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
])
def
create_and_check_t5_with_lm_head
(
self
,
config
,
input_ids
,
input_mask
,
token_labels
):
model
=
TFT5WithLMHeadModel
(
config
=
config
)
inputs
=
{
'encoder_input_ids'
:
input_ids
,
'decoder_input_ids'
:
input_ids
,
'decoder_attention_mask'
:
input_mask
}
prediction_scores
,
decoder_output
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
token_labels
)
=
config_and_inputs
inputs_dict
=
{
'encoder_input_ids'
:
input_ids
,
'decoder_input_ids'
:
input_ids
,
'decoder_attention_mask'
:
input_mask
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
TFT5ModelTest
.
TFT5ModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
T5Config
,
d_model
=
37
)
def
test_config
(
self
):
self
.
config_tester
.
run_common_tests
()
def
test_t5_model
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_t5_model
(
*
config_and_inputs
)
def
test_with_lm_head
(
self
):
config_and_inputs
=
self
.
model_tester
.
prepare_config_and_inputs
()
self
.
model_tester
.
create_and_check_t5_with_lm_head
(
*
config_and_inputs
)
@
slow
def
test_model_from_pretrained
(
self
):
for
model_name
in
[
't5-small'
]:
model
=
TFT5Model
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
transformers/tests/modeling_tf_transfo_xl_test.py
View file @
1ab25c49
...
...
@@ -18,11 +18,10 @@ from __future__ import print_function
import
unittest
import
random
import
shutil
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
from
transformers
import
TransfoXLConfig
,
is_tf_available
...
...
@@ -67,7 +66,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
mem_len
=
mem_len
self
.
key_len
=
seq_length
+
mem_len
self
.
key_len
gth
=
seq_length
+
mem_len
self
.
clamp_len
=
clamp_len
self
.
is_training
=
is_training
self
.
use_labels
=
use_labels
...
...
@@ -92,7 +91,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
config
=
TransfoXLConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
mem_len
=
self
.
mem_len
,
clamp_len
=
self
.
clamp_len
,
cutoffs
=
self
.
cutoffs
,
...
...
@@ -205,10 +204,8 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFTransfoXLModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
model
=
TFTransfoXLModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
...
...
transformers/tests/modeling_tf_xlm_test.py
View file @
1ab25c49
...
...
@@ -17,7 +17,6 @@ from __future__ import division
from
__future__
import
print_function
import
unittest
import
shutil
from
transformers
import
is_tf_available
...
...
@@ -31,7 +30,7 @@ if is_tf_available():
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
@
require_tf
...
...
@@ -125,7 +124,7 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
is_impossible_labels
=
ids_tensor
([
self
.
batch_size
],
2
,
dtype
=
tf
.
float32
)
config
=
XLMConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
n_special
=
self
.
n_special
,
emb_dim
=
self
.
hidden_size
,
n_layers
=
self
.
num_hidden_layers
,
...
...
@@ -252,10 +251,8 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
XLMModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
model
=
TFXLMModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
...
...
transformers/tests/modeling_tf_xlnet_test.py
View file @
1ab25c49
...
...
@@ -20,7 +20,6 @@ import os
import
unittest
import
json
import
random
import
shutil
from
transformers
import
XLNetConfig
,
is_tf_available
...
...
@@ -35,7 +34,7 @@ if is_tf_available():
from
.modeling_tf_common_test
import
(
TFCommonTestCases
,
ids_tensor
)
from
.configuration_common_test
import
ConfigTester
from
.utils
import
require_tf
,
slow
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
@
require_tf
...
...
@@ -64,7 +63,6 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
num_attention_heads
=
4
,
d_inner
=
128
,
num_hidden_layers
=
5
,
max_position_embeddings
=
10
,
type_sequence_label_size
=
2
,
untie_r
=
True
,
bi_data
=
False
,
...
...
@@ -88,7 +86,6 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
self
.
num_attention_heads
=
num_attention_heads
self
.
d_inner
=
d_inner
self
.
num_hidden_layers
=
num_hidden_layers
self
.
max_position_embeddings
=
max_position_embeddings
self
.
bi_data
=
bi_data
self
.
untie_r
=
untie_r
self
.
same_length
=
same_length
...
...
@@ -122,13 +119,12 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
is_impossible_labels
=
ids_tensor
([
self
.
batch_size
],
2
,
dtype
=
tf
.
float32
)
config
=
XLNetConfig
(
vocab_size
_or_config_json_file
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
d_model
=
self
.
hidden_size
,
n_head
=
self
.
num_attention_heads
,
d_inner
=
self
.
d_inner
,
n_layer
=
self
.
num_hidden_layers
,
untie_r
=
self
.
untie_r
,
max_position_embeddings
=
self
.
max_position_embeddings
,
mem_len
=
self
.
mem_len
,
clamp_len
=
self
.
clamp_len
,
same_length
=
self
.
same_length
,
...
...
@@ -322,10 +318,8 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
@
slow
def
test_model_from_pretrained
(
self
):
cache_dir
=
"/tmp/transformers_test/"
for
model_name
in
list
(
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFXLNetModel
.
from_pretrained
(
model_name
,
cache_dir
=
cache_dir
)
shutil
.
rmtree
(
cache_dir
)
model
=
TFXLNetModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment