Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
5daca95d
Unverified
Commit
5daca95d
authored
Dec 22, 2019
by
Thomas Wolf
Committed by
GitHub
Dec 22, 2019
Browse files
Merge pull request #2268 from aaugustin/improve-repository-structure
Improve repository structure
parents
54abc67a
00204f2b
Changes
167
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1124 additions
and
120 deletions
+1124
-120
tests/test_modeling_bert.py
tests/test_modeling_bert.py
+3
-7
tests/test_modeling_common.py
tests/test_modeling_common.py
+704
-0
tests/test_modeling_ctrl.py
tests/test_modeling_ctrl.py
+3
-7
tests/test_modeling_distilbert.py
tests/test_modeling_distilbert.py
+3
-7
tests/test_modeling_encoder_decoder.py
tests/test_modeling_encoder_decoder.py
+0
-4
tests/test_modeling_gpt2.py
tests/test_modeling_gpt2.py
+3
-7
tests/test_modeling_openai.py
tests/test_modeling_openai.py
+3
-7
tests/test_modeling_roberta.py
tests/test_modeling_roberta.py
+3
-7
tests/test_modeling_t5.py
tests/test_modeling_t5.py
+3
-7
tests/test_modeling_tf_albert.py
tests/test_modeling_tf_albert.py
+3
-7
tests/test_modeling_tf_auto.py
tests/test_modeling_tf_auto.py
+0
-4
tests/test_modeling_tf_bert.py
tests/test_modeling_tf_bert.py
+3
-7
tests/test_modeling_tf_common.py
tests/test_modeling_tf_common.py
+372
-0
tests/test_modeling_tf_ctrl.py
tests/test_modeling_tf_ctrl.py
+3
-7
tests/test_modeling_tf_distilbert.py
tests/test_modeling_tf_distilbert.py
+3
-7
tests/test_modeling_tf_gpt2.py
tests/test_modeling_tf_gpt2.py
+3
-7
tests/test_modeling_tf_openai_gpt.py
tests/test_modeling_tf_openai_gpt.py
+3
-7
tests/test_modeling_tf_roberta.py
tests/test_modeling_tf_roberta.py
+3
-7
tests/test_modeling_tf_t5.py
tests/test_modeling_tf_t5.py
+3
-7
tests/test_modeling_tf_transfo_xl.py
tests/test_modeling_tf_transfo_xl.py
+3
-7
No files found.
t
ransformer
s/test
s/
modeling_bert
_test
.py
→
t
est
s/test
_
modeling_bert.py
View file @
5daca95d
...
...
@@ -18,8 +18,8 @@ import unittest
from
transformers
import
is_torch_available
from
.configuration_common
_test
import
ConfigTester
from
.modeling_common
_test
import
CommonTestCases
,
floats_tensor
,
ids_tensor
from
.
test_
configuration_common
import
ConfigTester
from
.
test_
modeling_common
import
ModelTesterMixin
,
floats_tensor
,
ids_tensor
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
...
...
@@ -39,7 +39,7 @@ if is_torch_available():
@
require_torch
class
BertModelTest
(
CommonTestCases
.
CommonModelTester
):
class
BertModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
(
...
...
@@ -475,7 +475,3 @@ class BertModelTest(CommonTestCases.CommonModelTester):
for
model_name
in
list
(
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
BertModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
tests/test_modeling_common.py
0 → 100644
View file @
5daca95d
# coding=utf-8
# Copyright 2019 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
import
copy
import
json
import
logging
import
os.path
import
random
import
shutil
import
sys
import
tempfile
import
unittest
import
uuid
from
transformers
import
is_torch_available
from
.utils
import
require_torch
,
slow
,
torch_device
if
is_torch_available
():
import
torch
import
numpy
as
np
from
transformers
import
(
AdaptiveEmbedding
,
PretrainedConfig
,
PreTrainedModel
,
BertModel
,
BertConfig
,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
,
)
if
sys
.
version_info
[
0
]
==
2
:
class
TemporaryDirectory
(
object
):
"""Context manager for tempfile.mkdtemp() so it's usable with "with" statement."""
def
__enter__
(
self
):
self
.
name
=
tempfile
.
mkdtemp
()
return
self
.
name
def
__exit__
(
self
,
exc_type
,
exc_value
,
traceback
):
shutil
.
rmtree
(
self
.
name
)
else
:
TemporaryDirectory
=
tempfile
.
TemporaryDirectory
unicode
=
str
def
_config_zero_init
(
config
):
configs_no_init
=
copy
.
deepcopy
(
config
)
for
key
in
configs_no_init
.
__dict__
.
keys
():
if
"_range"
in
key
or
"_std"
in
key
or
"initializer_factor"
in
key
:
setattr
(
configs_no_init
,
key
,
0.0
)
return
configs_no_init
@
require_torch
class
ModelTesterMixin
:
model_tester
=
None
all_model_classes
=
()
test_torchscript
=
True
test_pruning
=
True
test_resize_embeddings
=
True
test_head_masking
=
True
is_encoder_decoder
=
False
def
test_save_load
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
out_2
=
outputs
[
0
].
numpy
()
out_2
[
np
.
isnan
(
out_2
)]
=
0
with
TemporaryDirectory
()
as
tmpdirname
:
model
.
save_pretrained
(
tmpdirname
)
model
=
model_class
.
from_pretrained
(
tmpdirname
)
model
.
to
(
torch_device
)
with
torch
.
no_grad
():
after_outputs
=
model
(
**
inputs_dict
)
# Make sure we don't have nans
out_1
=
after_outputs
[
0
].
cpu
().
numpy
()
out_1
[
np
.
isnan
(
out_1
)]
=
0
max_diff
=
np
.
amax
(
np
.
abs
(
out_1
-
out_2
))
self
.
assertLessEqual
(
max_diff
,
1e-5
)
def
test_initialization
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
configs_no_init
=
_config_zero_init
(
config
)
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
=
configs_no_init
)
for
name
,
param
in
model
.
named_parameters
():
if
param
.
requires_grad
:
self
.
assertIn
(
param
.
data
.
mean
().
item
(),
[
0.0
,
1.0
],
msg
=
"Parameter {} of model {} seems not properly initialized"
.
format
(
name
,
model_class
),
)
def
test_determinism
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
with
torch
.
no_grad
():
first
=
model
(
**
inputs_dict
)[
0
]
second
=
model
(
**
inputs_dict
)[
0
]
out_1
=
first
.
cpu
().
numpy
()
out_2
=
second
.
cpu
().
numpy
()
out_1
=
out_1
[
~
np
.
isnan
(
out_1
)]
out_2
=
out_2
[
~
np
.
isnan
(
out_2
)]
max_diff
=
np
.
amax
(
np
.
abs
(
out_1
-
out_2
))
self
.
assertLessEqual
(
max_diff
,
1e-5
)
def
test_attention_outputs
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
decoder_seq_length
=
(
self
.
model_tester
.
decoder_seq_length
if
hasattr
(
self
.
model_tester
,
"decoder_seq_length"
)
else
self
.
model_tester
.
seq_length
)
encoder_seq_length
=
(
self
.
model_tester
.
encoder_seq_length
if
hasattr
(
self
.
model_tester
,
"encoder_seq_length"
)
else
self
.
model_tester
.
seq_length
)
decoder_key_length
=
(
self
.
model_tester
.
key_length
if
hasattr
(
self
.
model_tester
,
"key_length"
)
else
decoder_seq_length
)
encoder_key_length
=
(
self
.
model_tester
.
key_length
if
hasattr
(
self
.
model_tester
,
"key_length"
)
else
encoder_seq_length
)
for
model_class
in
self
.
all_model_classes
:
config
.
output_attentions
=
True
config
.
output_hidden_states
=
False
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
self
.
assertEqual
(
len
(
attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
encoder_seq_length
,
encoder_key_length
],
)
out_len
=
len
(
outputs
)
if
self
.
is_encoder_decoder
:
self
.
assertEqual
(
out_len
%
2
,
0
)
decoder_attentions
=
outputs
[(
out_len
//
2
)
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
self
.
assertEqual
(
len
(
decoder_attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertListEqual
(
list
(
decoder_attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
decoder_seq_length
,
decoder_key_length
],
)
# Check attention is always last and order is fine
config
.
output_attentions
=
True
config
.
output_hidden_states
=
True
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
self
.
assertEqual
(
out_len
+
(
2
if
self
.
is_encoder_decoder
else
1
),
len
(
outputs
))
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self_attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
len
(
self_attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertListEqual
(
list
(
self_attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
encoder_seq_length
,
encoder_key_length
],
)
def
test_torchscript
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
self
.
_create_and_check_torchscript
(
config
,
inputs_dict
)
def
test_torchscript_output_attentions
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
.
output_attentions
=
True
self
.
_create_and_check_torchscript
(
config
,
inputs_dict
)
def
test_torchscript_output_hidden_state
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
.
output_hidden_states
=
True
self
.
_create_and_check_torchscript
(
config
,
inputs_dict
)
def
_create_and_check_torchscript
(
self
,
config
,
inputs_dict
):
if
not
self
.
test_torchscript
:
return
configs_no_init
=
_config_zero_init
(
config
)
# To be sure we have no Nan
configs_no_init
.
torchscript
=
True
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
=
configs_no_init
)
model
.
to
(
torch_device
)
model
.
eval
()
inputs
=
inputs_dict
[
"input_ids"
]
# Let's keep only input_ids
try
:
traced_gpt2
=
torch
.
jit
.
trace
(
model
,
inputs
)
except
RuntimeError
:
self
.
fail
(
"Couldn't trace module."
)
with
TemporaryDirectory
()
as
tmp_dir_name
:
pt_file_name
=
os
.
path
.
join
(
tmp_dir_name
,
"traced_model.pt"
)
try
:
torch
.
jit
.
save
(
traced_gpt2
,
pt_file_name
)
except
Exception
:
self
.
fail
(
"Couldn't save module."
)
try
:
loaded_model
=
torch
.
jit
.
load
(
pt_file_name
)
except
Exception
:
self
.
fail
(
"Couldn't load module."
)
model
.
to
(
torch_device
)
model
.
eval
()
loaded_model
.
to
(
torch_device
)
loaded_model
.
eval
()
model_params
=
model
.
parameters
()
loaded_model_params
=
loaded_model
.
parameters
()
models_equal
=
True
for
p1
,
p2
in
zip
(
model_params
,
loaded_model_params
):
if
p1
.
data
.
ne
(
p2
.
data
).
sum
()
>
0
:
models_equal
=
False
self
.
assertTrue
(
models_equal
)
def
test_headmasking
(
self
):
if
not
self
.
test_head_masking
:
return
global_rng
.
seed
(
42
)
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
global_rng
.
seed
()
config
.
output_attentions
=
True
config
.
output_hidden_states
=
True
configs_no_init
=
_config_zero_init
(
config
)
# To be sure we have no Nan
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
=
configs_no_init
)
model
.
to
(
torch_device
)
model
.
eval
()
# Prepare head_mask
# Set require_grad after having prepared the tensor to avoid error (leaf variable has been moved into the graph interior)
head_mask
=
torch
.
ones
(
self
.
model_tester
.
num_hidden_layers
,
self
.
model_tester
.
num_attention_heads
,
device
=
torch_device
)
head_mask
[
0
,
0
]
=
0
head_mask
[
-
1
,
:
-
1
]
=
0
head_mask
.
requires_grad_
(
requires_grad
=
True
)
inputs
=
inputs_dict
.
copy
()
inputs
[
"head_mask"
]
=
head_mask
outputs
=
model
(
**
inputs
)
# Test that we can get a gradient back for importance score computation
output
=
sum
(
t
.
sum
()
for
t
in
outputs
[
0
])
output
=
output
.
sum
()
output
.
backward
()
multihead_outputs
=
head_mask
.
grad
attentions
=
outputs
[
-
1
]
hidden_states
=
outputs
[
-
2
]
# Remove Nan
for
t
in
attentions
:
self
.
assertLess
(
torch
.
sum
(
torch
.
isnan
(
t
)),
t
.
numel
()
/
4
)
# Check we don't have more than 25% nans (arbitrary)
attentions
=
[
t
.
masked_fill
(
torch
.
isnan
(
t
),
0.0
)
for
t
in
attentions
]
# remove them (the test is less complete)
self
.
assertIsNotNone
(
multihead_outputs
)
self
.
assertEqual
(
len
(
multihead_outputs
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertAlmostEqual
(
attentions
[
0
][...,
0
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
self
.
assertNotEqual
(
attentions
[
0
][...,
-
1
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
self
.
assertNotEqual
(
attentions
[
1
][...,
0
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
self
.
assertAlmostEqual
(
attentions
[
-
1
][...,
-
2
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
self
.
assertNotEqual
(
attentions
[
-
1
][...,
-
1
,
:,
:].
flatten
().
sum
().
item
(),
0.0
)
def
test_head_pruning
(
self
):
if
not
self
.
test_pruning
:
return
for
model_class
in
self
.
all_model_classes
:
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
"head_mask"
in
inputs_dict
:
del
inputs_dict
[
"head_mask"
]
config
.
output_attentions
=
True
config
.
output_hidden_states
=
False
model
=
model_class
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
heads_to_prune
=
{
0
:
list
(
range
(
1
,
self
.
model_tester
.
num_attention_heads
)),
-
1
:
[
0
]}
model
.
prune_heads
(
heads_to_prune
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
1
)
self
.
assertEqual
(
attentions
[
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
self
.
assertEqual
(
attentions
[
-
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
def
test_head_pruning_save_load_from_pretrained
(
self
):
if
not
self
.
test_pruning
:
return
for
model_class
in
self
.
all_model_classes
:
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
"head_mask"
in
inputs_dict
:
del
inputs_dict
[
"head_mask"
]
config
.
output_attentions
=
True
config
.
output_hidden_states
=
False
model
=
model_class
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
heads_to_prune
=
{
0
:
list
(
range
(
1
,
self
.
model_tester
.
num_attention_heads
)),
-
1
:
[
0
]}
model
.
prune_heads
(
heads_to_prune
)
with
TemporaryDirectory
()
as
temp_dir_name
:
model
.
save_pretrained
(
temp_dir_name
)
model
=
model_class
.
from_pretrained
(
temp_dir_name
)
model
.
to
(
torch_device
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
1
)
self
.
assertEqual
(
attentions
[
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
self
.
assertEqual
(
attentions
[
-
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
def
test_head_pruning_save_load_from_config_init
(
self
):
if
not
self
.
test_pruning
:
return
for
model_class
in
self
.
all_model_classes
:
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
"head_mask"
in
inputs_dict
:
del
inputs_dict
[
"head_mask"
]
config
.
output_attentions
=
True
config
.
output_hidden_states
=
False
heads_to_prune
=
{
0
:
list
(
range
(
1
,
self
.
model_tester
.
num_attention_heads
)),
-
1
:
[
0
]}
config
.
pruned_heads
=
heads_to_prune
model
=
model_class
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
1
)
self
.
assertEqual
(
attentions
[
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
self
.
assertEqual
(
attentions
[
-
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
def
test_head_pruning_integration
(
self
):
if
not
self
.
test_pruning
:
return
for
model_class
in
self
.
all_model_classes
:
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
"head_mask"
in
inputs_dict
:
del
inputs_dict
[
"head_mask"
]
config
.
output_attentions
=
True
config
.
output_hidden_states
=
False
heads_to_prune
=
{
0
:
[
0
],
1
:
[
1
,
2
]}
config
.
pruned_heads
=
heads_to_prune
model
=
model_class
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
self
.
assertEqual
(
attentions
[
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
2
)
self
.
assertEqual
(
attentions
[
2
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
self
.
assertEqual
(
attentions
[
3
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
with
TemporaryDirectory
()
as
temp_dir_name
:
model
.
save_pretrained
(
temp_dir_name
)
model
=
model_class
.
from_pretrained
(
temp_dir_name
)
model
.
to
(
torch_device
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
self
.
assertEqual
(
attentions
[
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
2
)
self
.
assertEqual
(
attentions
[
2
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
self
.
assertEqual
(
attentions
[
3
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
heads_to_prune
=
{
0
:
[
0
],
2
:
[
1
,
2
]}
model
.
prune_heads
(
heads_to_prune
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
attentions
=
outputs
[
-
1
]
self
.
assertEqual
(
attentions
[
0
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
1
)
self
.
assertEqual
(
attentions
[
1
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
2
)
self
.
assertEqual
(
attentions
[
2
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
-
2
)
self
.
assertEqual
(
attentions
[
3
].
shape
[
-
3
],
self
.
model_tester
.
num_attention_heads
)
self
.
assertDictEqual
(
model
.
config
.
pruned_heads
,
{
0
:
[
0
],
1
:
[
1
,
2
],
2
:
[
1
,
2
]})
def
test_hidden_states_output
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
for
model_class
in
self
.
all_model_classes
:
config
.
output_hidden_states
=
True
config
.
output_attentions
=
False
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
hidden_states
=
outputs
[
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
False
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
len
(
hidden_states
),
self
.
model_tester
.
num_hidden_layers
+
1
)
self
.
assertListEqual
(
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
self
.
model_tester
.
encoder_seq_length
if
hasattr
(
self
.
model_tester
,
"encoder_seq_length"
)
else
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
,
],
)
def
test_resize_tokens_embeddings
(
self
):
original_config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
not
self
.
test_resize_embeddings
:
return
for
model_class
in
self
.
all_model_classes
:
config
=
copy
.
deepcopy
(
original_config
)
model
=
model_class
(
config
)
model_vocab_size
=
config
.
vocab_size
# Retrieve the embeddings and clone theme
model_embed
=
model
.
resize_token_embeddings
(
model_vocab_size
)
cloned_embeddings
=
model_embed
.
weight
.
clone
()
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
model_embed
=
model
.
resize_token_embeddings
(
model_vocab_size
+
10
)
self
.
assertEqual
(
model
.
config
.
vocab_size
,
model_vocab_size
+
10
)
# Check that it actually resizes the embeddings matrix
self
.
assertEqual
(
model_embed
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
+
10
)
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
model_embed
=
model
.
resize_token_embeddings
(
model_vocab_size
-
15
)
self
.
assertEqual
(
model
.
config
.
vocab_size
,
model_vocab_size
-
15
)
# Check that it actually resizes the embeddings matrix
self
.
assertEqual
(
model_embed
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
-
15
)
# Check that adding and removing tokens has not modified the first part of the embedding matrix.
models_equal
=
True
for
p1
,
p2
in
zip
(
cloned_embeddings
,
model_embed
.
weight
):
if
p1
.
data
.
ne
(
p2
.
data
).
sum
()
>
0
:
models_equal
=
False
self
.
assertTrue
(
models_equal
)
def
test_model_common_attributes
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
torch
.
nn
.
Embedding
,
AdaptiveEmbedding
))
model
.
set_input_embeddings
(
torch
.
nn
.
Embedding
(
10
,
10
))
x
=
model
.
get_output_embeddings
()
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
torch
.
nn
.
Linear
))
def
test_tie_model_weights
(
self
):
if
not
self
.
test_torchscript
:
return
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
def
check_same_values
(
layer_1
,
layer_2
):
equal
=
True
for
p1
,
p2
in
zip
(
layer_1
.
weight
,
layer_2
.
weight
):
if
p1
.
data
.
ne
(
p2
.
data
).
sum
()
>
0
:
equal
=
False
return
equal
for
model_class
in
self
.
all_model_classes
:
config
.
torchscript
=
True
model_not_tied
=
model_class
(
config
)
if
model_not_tied
.
get_output_embeddings
()
is
None
:
continue
params_not_tied
=
list
(
model_not_tied
.
parameters
())
config_tied
=
copy
.
deepcopy
(
config
)
config_tied
.
torchscript
=
False
model_tied
=
model_class
(
config_tied
)
params_tied
=
list
(
model_tied
.
parameters
())
# Check that the embedding layer and decoding layer are the same in size and in value
self
.
assertGreater
(
len
(
params_not_tied
),
len
(
params_tied
))
# self.assertTrue(check_same_values(embeddings, decoding))
# # Check that after modification, they remain the same.
# embeddings.weight.data.div_(2)
# # Check that the embedding layer and decoding layer are the same in size and in value
# self.assertTrue(embeddings.weight.shape, decoding.weight.shape)
# self.assertTrue(check_same_values(embeddings, decoding))
# # Check that after modification, they remain the same.
# decoding.weight.data.div_(4)
# # Check that the embedding layer and decoding layer are the same in size and in value
# self.assertTrue(embeddings.weight.shape, decoding.weight.shape)
# self.assertTrue(check_same_values(embeddings, decoding))
# Check that after resize they remain tied.
model_tied
.
resize_token_embeddings
(
config
.
vocab_size
+
10
)
params_tied_2
=
list
(
model_tied
.
parameters
())
self
.
assertGreater
(
len
(
params_not_tied
),
len
(
params_tied
))
self
.
assertEqual
(
len
(
params_tied_2
),
len
(
params_tied
))
# decoding.weight.data.mul_(20)
# # Check that the embedding layer and decoding layer are the same in size and in value
# self.assertTrue(model.transformer.wte.weight.shape, model.lm_head.weight.shape)
# self.assertTrue(check_same_values(model.transformer.wte, model.lm_head))
def
test_inputs_embeds
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
not
self
.
is_encoder_decoder
:
input_ids
=
inputs_dict
[
"input_ids"
]
del
inputs_dict
[
"input_ids"
]
else
:
encoder_input_ids
=
inputs_dict
[
"encoder_input_ids"
]
decoder_input_ids
=
inputs_dict
[
"decoder_input_ids"
]
del
inputs_dict
[
"encoder_input_ids"
]
del
inputs_dict
[
"decoder_input_ids"
]
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
wte
=
model
.
get_input_embeddings
()
if
not
self
.
is_encoder_decoder
:
inputs_dict
[
"inputs_embeds"
]
=
wte
(
input_ids
)
else
:
inputs_dict
[
"encoder_inputs_embeds"
]
=
wte
(
encoder_input_ids
)
inputs_dict
[
"decoder_inputs_embeds"
]
=
wte
(
decoder_input_ids
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs_dict
)
class
ConfigTester
(
object
):
def
__init__
(
self
,
parent
,
config_class
=
None
,
**
kwargs
):
self
.
parent
=
parent
self
.
config_class
=
config_class
self
.
inputs_dict
=
kwargs
def
create_and_test_config_common_properties
(
self
):
config
=
self
.
config_class
(
**
self
.
inputs_dict
)
self
.
parent
.
assertTrue
(
hasattr
(
config
,
"vocab_size"
))
self
.
parent
.
assertTrue
(
hasattr
(
config
,
"hidden_size"
))
self
.
parent
.
assertTrue
(
hasattr
(
config
,
"num_attention_heads"
))
self
.
parent
.
assertTrue
(
hasattr
(
config
,
"num_hidden_layers"
))
def
create_and_test_config_to_json_string
(
self
):
config
=
self
.
config_class
(
**
self
.
inputs_dict
)
obj
=
json
.
loads
(
config
.
to_json_string
())
for
key
,
value
in
self
.
inputs_dict
.
items
():
self
.
parent
.
assertEqual
(
obj
[
key
],
value
)
def
create_and_test_config_to_json_file
(
self
):
config_first
=
self
.
config_class
(
**
self
.
inputs_dict
)
json_file_path
=
os
.
path
.
join
(
os
.
getcwd
(),
"config_"
+
str
(
uuid
.
uuid4
())
+
".json"
)
config_first
.
to_json_file
(
json_file_path
)
config_second
=
self
.
config_class
.
from_json_file
(
json_file_path
)
os
.
remove
(
json_file_path
)
self
.
parent
.
assertEqual
(
config_second
.
to_dict
(),
config_first
.
to_dict
())
def
run_common_tests
(
self
):
self
.
create_and_test_config_common_properties
()
self
.
create_and_test_config_to_json_string
()
self
.
create_and_test_config_to_json_file
()
global_rng
=
random
.
Random
()
def
ids_tensor
(
shape
,
vocab_size
,
rng
=
None
,
name
=
None
):
"""Creates a random int32 tensor of the shape within the vocab size."""
if
rng
is
None
:
rng
=
global_rng
total_dims
=
1
for
dim
in
shape
:
total_dims
*=
dim
values
=
[]
for
_
in
range
(
total_dims
):
values
.
append
(
rng
.
randint
(
0
,
vocab_size
-
1
))
return
torch
.
tensor
(
data
=
values
,
dtype
=
torch
.
long
,
device
=
torch_device
).
view
(
shape
).
contiguous
()
def
floats_tensor
(
shape
,
scale
=
1.0
,
rng
=
None
,
name
=
None
):
"""Creates a random float32 tensor of the shape within the vocab size."""
if
rng
is
None
:
rng
=
global_rng
total_dims
=
1
for
dim
in
shape
:
total_dims
*=
dim
values
=
[]
for
_
in
range
(
total_dims
):
values
.
append
(
rng
.
random
()
*
scale
)
return
torch
.
tensor
(
data
=
values
,
dtype
=
torch
.
float
,
device
=
torch_device
).
view
(
shape
).
contiguous
()
@
require_torch
class
ModelUtilsTest
(
unittest
.
TestCase
):
@
slow
def
test_model_from_pretrained
(
self
):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
for
model_name
in
list
(
BERT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
config
=
BertConfig
.
from_pretrained
(
model_name
)
self
.
assertIsNotNone
(
config
)
self
.
assertIsInstance
(
config
,
PretrainedConfig
)
model
=
BertModel
.
from_pretrained
(
model_name
)
model
,
loading_info
=
BertModel
.
from_pretrained
(
model_name
,
output_loading_info
=
True
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsInstance
(
model
,
PreTrainedModel
)
for
value
in
loading_info
.
values
():
self
.
assertEqual
(
len
(
value
),
0
)
config
=
BertConfig
.
from_pretrained
(
model_name
,
output_attentions
=
True
,
output_hidden_states
=
True
)
model
=
BertModel
.
from_pretrained
(
model_name
,
output_attentions
=
True
,
output_hidden_states
=
True
)
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
model
.
config
,
config
)
t
ransformer
s/test
s/
modeling_ctrl
_test
.py
→
t
est
s/test
_
modeling_ctrl.py
View file @
5daca95d
...
...
@@ -17,8 +17,8 @@ import unittest
from
transformers
import
is_torch_available
from
.configuration_common
_test
import
ConfigTester
from
.modeling_common
_test
import
CommonTestCases
,
ids_tensor
from
.
test_
configuration_common
import
ConfigTester
from
.
test_
modeling_common
import
ModelTesterMixin
,
ids_tensor
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
...
...
@@ -27,7 +27,7 @@ if is_torch_available():
@
require_torch
class
CTRLModelTest
(
CommonTestCases
.
CommonModelTester
):
class
CTRLModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
CTRLModel
,
CTRLLMHeadModel
)
if
is_torch_available
()
else
()
test_pruning
=
False
...
...
@@ -211,7 +211,3 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
for
model_name
in
list
(
CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
CTRLModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
t
ransformer
s/test
s/
modeling_distilbert
_test
.py
→
t
est
s/test
_
modeling_distilbert.py
View file @
5daca95d
...
...
@@ -18,8 +18,8 @@ import unittest
from
transformers
import
is_torch_available
from
.configuration_common
_test
import
ConfigTester
from
.modeling_common
_test
import
CommonTestCases
,
ids_tensor
from
.
test_
configuration_common
import
ConfigTester
from
.
test_
modeling_common
import
ModelTesterMixin
,
ids_tensor
from
.utils
import
require_torch
,
torch_device
...
...
@@ -35,7 +35,7 @@ if is_torch_available():
@
require_torch
class
DistilBertModelTest
(
CommonTestCases
.
CommonModelTester
):
class
DistilBertModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
(
DistilBertModel
,
DistilBertForMaskedLM
,
DistilBertForQuestionAnswering
,
DistilBertForSequenceClassification
)
...
...
@@ -250,7 +250,3 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
# for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# model = DistilBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
# self.assertIsNotNone(model)
if
__name__
==
"__main__"
:
unittest
.
main
()
t
ransformer
s/test
s/
modeling_encoder_decoder
_test
.py
→
t
est
s/test
_
modeling_encoder_decoder.py
View file @
5daca95d
...
...
@@ -48,7 +48,3 @@ class EncoderDecoderModelTest(unittest.TestCase):
with
self
.
assertRaises
(
ValueError
):
_
=
Model2Model
.
from_pretrained
(
"does-not-exist"
)
if
__name__
==
"__main__"
:
unittest
.
main
()
t
ransformer
s/test
s/
modeling_gpt2
_test
.py
→
t
est
s/test
_
modeling_gpt2.py
View file @
5daca95d
...
...
@@ -18,8 +18,8 @@ import unittest
from
transformers
import
is_torch_available
from
.configuration_common
_test
import
ConfigTester
from
.modeling_common
_test
import
CommonTestCases
,
ids_tensor
from
.
test_
configuration_common
import
ConfigTester
from
.
test_
modeling_common
import
ModelTesterMixin
,
ids_tensor
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
...
...
@@ -34,7 +34,7 @@ if is_torch_available():
@
require_torch
class
GPT2ModelTest
(
CommonTestCases
.
CommonModelTester
):
class
GPT2ModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
GPT2Model
,
GPT2LMHeadModel
,
GPT2DoubleHeadsModel
)
if
is_torch_available
()
else
()
...
...
@@ -248,7 +248,3 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester):
for
model_name
in
list
(
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
GPT2Model
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
t
ransformer
s/test
s/
modeling_openai
_test
.py
→
t
est
s/test
_
modeling_openai.py
View file @
5daca95d
...
...
@@ -18,8 +18,8 @@ import unittest
from
transformers
import
is_torch_available
from
.configuration_common
_test
import
ConfigTester
from
.modeling_common
_test
import
CommonTestCases
,
ids_tensor
from
.
test_
configuration_common
import
ConfigTester
from
.
test_
modeling_common
import
ModelTesterMixin
,
ids_tensor
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
...
...
@@ -34,7 +34,7 @@ if is_torch_available():
@
require_torch
class
OpenAIGPTModelTest
(
CommonTestCases
.
CommonModelTester
):
class
OpenAIGPTModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
(
OpenAIGPTModel
,
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
)
if
is_torch_available
()
else
()
...
...
@@ -205,7 +205,3 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
for
model_name
in
list
(
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
OpenAIGPTModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
t
ransformer
s/test
s/
modeling_roberta
_test
.py
→
t
est
s/test
_
modeling_roberta.py
View file @
5daca95d
...
...
@@ -18,8 +18,8 @@ import unittest
from
transformers
import
is_torch_available
from
.configuration_common
_test
import
ConfigTester
from
.modeling_common
_test
import
CommonTestCases
,
ids_tensor
from
.
test_
configuration_common
import
ConfigTester
from
.
test_
modeling_common
import
ModelTesterMixin
,
ids_tensor
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
,
torch_device
...
...
@@ -37,7 +37,7 @@ if is_torch_available():
@
require_torch
class
RobertaModelTest
(
CommonTestCases
.
CommonModelTester
):
class
RobertaModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
RobertaForMaskedLM
,
RobertaModel
)
if
is_torch_available
()
else
()
...
...
@@ -298,7 +298,3 @@ class RobertaModelIntegrationTest(unittest.TestCase):
self
.
assertEqual
(
output
.
shape
,
expected_shape
)
expected_tensor
=
torch
.
Tensor
([[
-
0.9469
,
0.3913
,
0.5118
]])
self
.
assertTrue
(
torch
.
allclose
(
output
,
expected_tensor
,
atol
=
1e-3
))
if
__name__
==
"__main__"
:
unittest
.
main
()
t
ransformer
s/test
s/
modeling_t5
_test
.py
→
t
est
s/test
_
modeling_t5.py
View file @
5daca95d
...
...
@@ -18,8 +18,8 @@ import unittest
from
transformers
import
is_torch_available
from
.configuration_common
_test
import
ConfigTester
from
.modeling_common
_test
import
CommonTestCases
,
ids_tensor
from
.
test_
configuration_common
import
ConfigTester
from
.
test_
modeling_common
import
ModelTesterMixin
,
ids_tensor
from
.utils
import
CACHE_DIR
,
require_torch
,
slow
...
...
@@ -29,7 +29,7 @@ if is_torch_available():
@
require_torch
class
T5ModelTest
(
CommonTestCases
.
CommonModelTester
):
class
T5ModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
T5Model
,
T5WithLMHeadModel
)
if
is_torch_available
()
else
()
test_pruning
=
False
...
...
@@ -212,7 +212,3 @@ class T5ModelTest(CommonTestCases.CommonModelTester):
for
model_name
in
list
(
T5_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
T5Model
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
t
ransformer
s/test
s/
modeling_tf_albert
_test
.py
→
t
est
s/test
_
modeling_tf_albert.py
View file @
5daca95d
...
...
@@ -18,8 +18,8 @@ import unittest
from
transformers
import
AlbertConfig
,
is_tf_available
from
.configuration_common
_test
import
ConfigTester
from
.modeling_tf_common
_test
import
TF
CommonTestCases
,
ids_tensor
from
.
test_
configuration_common
import
ConfigTester
from
.
test_
modeling_tf_common
import
TF
ModelTesterMixin
,
ids_tensor
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
...
...
@@ -33,7 +33,7 @@ if is_tf_available():
@
require_tf
class
TFAlbertModelTest
(
TF
CommonTestCases
.
TFCommonModelTester
):
class
TFAlbertModelTest
(
TF
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
(
TFAlbertModel
,
TFAlbertForMaskedLM
,
TFAlbertForSequenceClassification
)
if
is_tf_available
()
else
()
...
...
@@ -213,7 +213,3 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
for
model_name
in
list
(
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFAlbertModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
t
ransformer
s/test
s/
modeling_tf_auto
_test
.py
→
t
est
s/test
_
modeling_tf_auto.py
View file @
5daca95d
...
...
@@ -99,7 +99,3 @@ class TFAutoModelTest(unittest.TestCase):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
model
=
TFAutoModelWithLMHead
.
from_pretrained
(
SMALL_MODEL_IDENTIFIER
)
self
.
assertIsInstance
(
model
,
TFBertForMaskedLM
)
if
__name__
==
"__main__"
:
unittest
.
main
()
t
ransformer
s/test
s/
modeling_tf_bert
_test
.py
→
t
est
s/test
_
modeling_tf_bert.py
View file @
5daca95d
...
...
@@ -18,8 +18,8 @@ import unittest
from
transformers
import
BertConfig
,
is_tf_available
from
.configuration_common
_test
import
ConfigTester
from
.modeling_tf_common
_test
import
TF
CommonTestCases
,
ids_tensor
from
.
test_
configuration_common
import
ConfigTester
from
.
test_
modeling_tf_common
import
TF
ModelTesterMixin
,
ids_tensor
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
...
...
@@ -38,7 +38,7 @@ if is_tf_available():
@
require_tf
class
TFBertModelTest
(
TF
CommonTestCases
.
TFCommonModelTester
):
class
TFBertModelTest
(
TF
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
(
...
...
@@ -315,7 +315,3 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
for
model_name
in
[
"bert-base-uncased"
]:
model
=
TFBertModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
tests/test_modeling_tf_common.py
0 → 100644
View file @
5daca95d
# coding=utf-8
# Copyright 2019 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
import
copy
import
os
import
random
import
shutil
import
sys
import
tempfile
from
transformers
import
is_tf_available
,
is_torch_available
from
.utils
import
require_tf
if
is_tf_available
():
import
tensorflow
as
tf
import
numpy
as
np
# from transformers.modeling_bert import BertModel, BertConfig, BERT_PRETRAINED_MODEL_ARCHIVE_MAP
if
sys
.
version_info
[
0
]
==
2
:
class
TemporaryDirectory
(
object
):
"""Context manager for tempfile.mkdtemp() so it's usable with "with" statement."""
def
__enter__
(
self
):
self
.
name
=
tempfile
.
mkdtemp
()
return
self
.
name
def
__exit__
(
self
,
exc_type
,
exc_value
,
traceback
):
shutil
.
rmtree
(
self
.
name
)
else
:
TemporaryDirectory
=
tempfile
.
TemporaryDirectory
unicode
=
str
def
_config_zero_init
(
config
):
configs_no_init
=
copy
.
deepcopy
(
config
)
for
key
in
configs_no_init
.
__dict__
.
keys
():
if
"_range"
in
key
or
"_std"
in
key
:
setattr
(
configs_no_init
,
key
,
0.0
)
return
configs_no_init
@
require_tf
class
TFModelTesterMixin
:
model_tester
=
None
all_model_classes
=
()
test_torchscript
=
True
test_pruning
=
True
test_resize_embeddings
=
True
is_encoder_decoder
=
False
def
test_initialization
(
self
):
pass
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
# configs_no_init = _config_zero_init(config)
# for model_class in self.all_model_classes:
# model = model_class(config=configs_no_init)
# for name, param in model.named_parameters():
# if param.requires_grad:
# self.assertIn(param.data.mean().item(), [0.0, 1.0],
# msg="Parameter {} of model {} seems not properly initialized".format(name, model_class))
def
test_save_load
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
outputs
=
model
(
inputs_dict
)
with
TemporaryDirectory
()
as
tmpdirname
:
model
.
save_pretrained
(
tmpdirname
)
model
=
model_class
.
from_pretrained
(
tmpdirname
)
after_outputs
=
model
(
inputs_dict
)
# Make sure we don't have nans
out_1
=
after_outputs
[
0
].
numpy
()
out_2
=
outputs
[
0
].
numpy
()
out_1
=
out_1
[
~
np
.
isnan
(
out_1
)]
out_2
=
out_2
[
~
np
.
isnan
(
out_2
)]
max_diff
=
np
.
amax
(
np
.
abs
(
out_1
-
out_2
))
self
.
assertLessEqual
(
max_diff
,
1e-5
)
def
test_pt_tf_model_equivalence
(
self
):
if
not
is_torch_available
():
return
import
torch
import
transformers
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
for
model_class
in
self
.
all_model_classes
:
pt_model_class_name
=
model_class
.
__name__
[
2
:]
# Skip the "TF" at the beggining
pt_model_class
=
getattr
(
transformers
,
pt_model_class_name
)
config
.
output_hidden_states
=
True
tf_model
=
model_class
(
config
)
pt_model
=
pt_model_class
(
config
)
# Check we can load pt model in tf and vice-versa with model => model functions
tf_model
=
transformers
.
load_pytorch_model_in_tf2_model
(
tf_model
,
pt_model
,
tf_inputs
=
inputs_dict
)
pt_model
=
transformers
.
load_tf2_model_in_pytorch_model
(
pt_model
,
tf_model
)
# Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
pt_model
.
eval
()
pt_inputs_dict
=
dict
(
(
name
,
torch
.
from_numpy
(
key
.
numpy
()).
to
(
torch
.
long
))
for
name
,
key
in
inputs_dict
.
items
()
)
with
torch
.
no_grad
():
pto
=
pt_model
(
**
pt_inputs_dict
)
tfo
=
tf_model
(
inputs_dict
,
training
=
False
)
tf_hidden_states
=
tfo
[
0
].
numpy
()
pt_hidden_states
=
pto
[
0
].
numpy
()
tf_hidden_states
[
np
.
isnan
(
tf_hidden_states
)]
=
0
pt_hidden_states
[
np
.
isnan
(
pt_hidden_states
)]
=
0
max_diff
=
np
.
amax
(
np
.
abs
(
tf_hidden_states
-
pt_hidden_states
))
self
.
assertLessEqual
(
max_diff
,
2e-2
)
# Check we can load pt model in tf and vice-versa with checkpoint => model functions
with
TemporaryDirectory
()
as
tmpdirname
:
pt_checkpoint_path
=
os
.
path
.
join
(
tmpdirname
,
"pt_model.bin"
)
torch
.
save
(
pt_model
.
state_dict
(),
pt_checkpoint_path
)
tf_model
=
transformers
.
load_pytorch_checkpoint_in_tf2_model
(
tf_model
,
pt_checkpoint_path
)
tf_checkpoint_path
=
os
.
path
.
join
(
tmpdirname
,
"tf_model.h5"
)
tf_model
.
save_weights
(
tf_checkpoint_path
)
pt_model
=
transformers
.
load_tf2_checkpoint_in_pytorch_model
(
pt_model
,
tf_checkpoint_path
)
# Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
pt_model
.
eval
()
pt_inputs_dict
=
dict
(
(
name
,
torch
.
from_numpy
(
key
.
numpy
()).
to
(
torch
.
long
))
for
name
,
key
in
inputs_dict
.
items
()
)
with
torch
.
no_grad
():
pto
=
pt_model
(
**
pt_inputs_dict
)
tfo
=
tf_model
(
inputs_dict
)
tfo
=
tfo
[
0
].
numpy
()
pto
=
pto
[
0
].
numpy
()
tfo
[
np
.
isnan
(
tfo
)]
=
0
pto
[
np
.
isnan
(
pto
)]
=
0
max_diff
=
np
.
amax
(
np
.
abs
(
tfo
-
pto
))
self
.
assertLessEqual
(
max_diff
,
2e-2
)
def
test_compile_tf_model
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
self
.
is_encoder_decoder
:
input_ids
=
{
"decoder_input_ids"
:
tf
.
keras
.
Input
(
batch_shape
=
(
2
,
2000
),
name
=
"decoder_input_ids"
,
dtype
=
"int32"
),
"encoder_input_ids"
:
tf
.
keras
.
Input
(
batch_shape
=
(
2
,
2000
),
name
=
"encoder_input_ids"
,
dtype
=
"int32"
),
}
else
:
input_ids
=
tf
.
keras
.
Input
(
batch_shape
=
(
2
,
2000
),
name
=
"input_ids"
,
dtype
=
"int32"
)
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
3e-5
,
epsilon
=
1e-08
,
clipnorm
=
1.0
)
loss
=
tf
.
keras
.
losses
.
SparseCategoricalCrossentropy
(
from_logits
=
True
)
metric
=
tf
.
keras
.
metrics
.
SparseCategoricalAccuracy
(
"accuracy"
)
for
model_class
in
self
.
all_model_classes
:
# Prepare our model
model
=
model_class
(
config
)
# Let's load it from the disk to be sure we can use pretrained weights
with
TemporaryDirectory
()
as
tmpdirname
:
outputs
=
model
(
inputs_dict
)
# build the model
model
.
save_pretrained
(
tmpdirname
)
model
=
model_class
.
from_pretrained
(
tmpdirname
)
outputs_dict
=
model
(
input_ids
)
hidden_states
=
outputs_dict
[
0
]
# Add a dense layer on top to test intetgration with other keras modules
outputs
=
tf
.
keras
.
layers
.
Dense
(
2
,
activation
=
"softmax"
,
name
=
"outputs"
)(
hidden_states
)
# Compile extended model
extended_model
=
tf
.
keras
.
Model
(
inputs
=
[
input_ids
],
outputs
=
[
outputs
])
extended_model
.
compile
(
optimizer
=
optimizer
,
loss
=
loss
,
metrics
=
[
metric
])
def
test_keyword_and_dict_args
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
outputs_dict
=
model
(
inputs_dict
)
inputs_keywords
=
copy
.
deepcopy
(
inputs_dict
)
input_ids
=
inputs_keywords
.
pop
(
"input_ids"
if
not
self
.
is_encoder_decoder
else
"decoder_input_ids"
,
None
)
outputs_keywords
=
model
(
input_ids
,
**
inputs_keywords
)
output_dict
=
outputs_dict
[
0
].
numpy
()
output_keywords
=
outputs_keywords
[
0
].
numpy
()
self
.
assertLess
(
np
.
sum
(
np
.
abs
(
output_dict
-
output_keywords
)),
1e-6
)
def
test_attention_outputs
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
decoder_seq_length
=
(
self
.
model_tester
.
decoder_seq_length
if
hasattr
(
self
.
model_tester
,
"decoder_seq_length"
)
else
self
.
model_tester
.
seq_length
)
encoder_seq_length
=
(
self
.
model_tester
.
encoder_seq_length
if
hasattr
(
self
.
model_tester
,
"encoder_seq_length"
)
else
self
.
model_tester
.
seq_length
)
decoder_key_length
=
(
self
.
model_tester
.
key_length
if
hasattr
(
self
.
model_tester
,
"key_length"
)
else
decoder_seq_length
)
encoder_key_length
=
(
self
.
model_tester
.
key_length
if
hasattr
(
self
.
model_tester
,
"key_length"
)
else
encoder_seq_length
)
for
model_class
in
self
.
all_model_classes
:
config
.
output_attentions
=
True
config
.
output_hidden_states
=
False
model
=
model_class
(
config
)
outputs
=
model
(
inputs_dict
)
attentions
=
[
t
.
numpy
()
for
t
in
outputs
[
-
1
]]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
self
.
assertEqual
(
len
(
attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
encoder_seq_length
,
encoder_key_length
],
)
out_len
=
len
(
outputs
)
if
self
.
is_encoder_decoder
:
self
.
assertEqual
(
out_len
%
2
,
0
)
decoder_attentions
=
outputs
[(
out_len
//
2
)
-
1
]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
False
)
self
.
assertEqual
(
len
(
decoder_attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertListEqual
(
list
(
decoder_attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
decoder_seq_length
,
decoder_key_length
],
)
# Check attention is always last and order is fine
config
.
output_attentions
=
True
config
.
output_hidden_states
=
True
model
=
model_class
(
config
)
outputs
=
model
(
inputs_dict
)
self
.
assertEqual
(
out_len
+
(
2
if
self
.
is_encoder_decoder
else
1
),
len
(
outputs
))
self
.
assertEqual
(
model
.
config
.
output_attentions
,
True
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
attentions
=
[
t
.
numpy
()
for
t
in
outputs
[
-
1
]]
self
.
assertEqual
(
len
(
attentions
),
self
.
model_tester
.
num_hidden_layers
)
self
.
assertListEqual
(
list
(
attentions
[
0
].
shape
[
-
3
:]),
[
self
.
model_tester
.
num_attention_heads
,
encoder_seq_length
,
encoder_key_length
],
)
def
test_hidden_states_output
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
for
model_class
in
self
.
all_model_classes
:
config
.
output_hidden_states
=
True
config
.
output_attentions
=
False
model
=
model_class
(
config
)
outputs
=
model
(
inputs_dict
)
hidden_states
=
[
t
.
numpy
()
for
t
in
outputs
[
-
1
]]
self
.
assertEqual
(
model
.
config
.
output_attentions
,
False
)
self
.
assertEqual
(
model
.
config
.
output_hidden_states
,
True
)
self
.
assertEqual
(
len
(
hidden_states
),
self
.
model_tester
.
num_hidden_layers
+
1
)
self
.
assertListEqual
(
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
]
)
def
test_model_common_attributes
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
assert
isinstance
(
model
.
get_input_embeddings
(),
tf
.
keras
.
layers
.
Layer
)
x
=
model
.
get_output_embeddings
()
assert
x
is
None
or
isinstance
(
x
,
tf
.
keras
.
layers
.
Layer
)
def
test_determinism
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
first
,
second
=
model
(
inputs_dict
,
training
=
False
)[
0
],
model
(
inputs_dict
,
training
=
False
)[
0
]
out_1
=
first
.
numpy
()
out_2
=
second
.
numpy
()
out_1
=
out_1
[
~
np
.
isnan
(
out_1
)]
out_2
=
out_2
[
~
np
.
isnan
(
out_2
)]
max_diff
=
np
.
amax
(
np
.
abs
(
out_1
-
out_2
))
self
.
assertLessEqual
(
max_diff
,
1e-5
)
def
_get_embeds
(
self
,
wte
,
input_ids
):
# ^^ In our TF models, the input_embeddings can take slightly different forms,
# so we try a few of them.
# We used to fall back to just synthetically creating a dummy tensor of ones:
try
:
x
=
wte
(
input_ids
,
mode
=
"embedding"
)
except
Exception
:
try
:
x
=
wte
([
input_ids
],
mode
=
"embedding"
)
except
Exception
:
try
:
x
=
wte
([
input_ids
,
None
,
None
,
None
],
mode
=
"embedding"
)
except
Exception
:
if
hasattr
(
self
.
model_tester
,
"embedding_size"
):
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
embedding_size
],
dtype
=
tf
.
dtypes
.
float32
)
else
:
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
hidden_size
],
dtype
=
tf
.
dtypes
.
float32
)
return
x
def
test_inputs_embeds
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
if
not
self
.
is_encoder_decoder
:
input_ids
=
inputs_dict
[
"input_ids"
]
del
inputs_dict
[
"input_ids"
]
else
:
encoder_input_ids
=
inputs_dict
[
"encoder_input_ids"
]
decoder_input_ids
=
inputs_dict
[
"decoder_input_ids"
]
del
inputs_dict
[
"encoder_input_ids"
]
del
inputs_dict
[
"decoder_input_ids"
]
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
wte
=
model
.
get_input_embeddings
()
if
not
self
.
is_encoder_decoder
:
inputs_dict
[
"inputs_embeds"
]
=
self
.
_get_embeds
(
wte
,
input_ids
)
else
:
inputs_dict
[
"encoder_inputs_embeds"
]
=
self
.
_get_embeds
(
wte
,
encoder_input_ids
)
inputs_dict
[
"decoder_inputs_embeds"
]
=
self
.
_get_embeds
(
wte
,
decoder_input_ids
)
outputs
=
model
(
inputs_dict
)
def
ids_tensor
(
shape
,
vocab_size
,
rng
=
None
,
name
=
None
,
dtype
=
None
):
"""Creates a random int32 tensor of the shape within the vocab size."""
if
rng
is
None
:
rng
=
random
.
Random
()
total_dims
=
1
for
dim
in
shape
:
total_dims
*=
dim
values
=
[]
for
_
in
range
(
total_dims
):
values
.
append
(
rng
.
randint
(
0
,
vocab_size
-
1
))
output
=
tf
.
constant
(
values
,
shape
=
shape
,
dtype
=
dtype
if
dtype
is
not
None
else
tf
.
int32
)
return
output
t
ransformer
s/test
s/
modeling_tf_ctrl
_test
.py
→
t
est
s/test
_
modeling_tf_ctrl.py
View file @
5daca95d
...
...
@@ -18,8 +18,8 @@ import unittest
from
transformers
import
CTRLConfig
,
is_tf_available
from
.configuration_common
_test
import
ConfigTester
from
.modeling_tf_common
_test
import
TF
CommonTestCases
,
ids_tensor
from
.
test_
configuration_common
import
ConfigTester
from
.
test_
modeling_tf_common
import
TF
ModelTesterMixin
,
ids_tensor
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
...
...
@@ -28,7 +28,7 @@ if is_tf_available():
@
require_tf
class
TFCTRLModelTest
(
TF
CommonTestCases
.
TFCommonModelTester
):
class
TFCTRLModelTest
(
TF
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
TFCTRLModel
,
TFCTRLLMHeadModel
)
if
is_tf_available
()
else
()
...
...
@@ -201,7 +201,3 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
for
model_name
in
list
(
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFCTRLModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
t
ransformer
s/test
s/
modeling_tf_distilbert
_test
.py
→
t
est
s/test
_
modeling_tf_distilbert.py
View file @
5daca95d
...
...
@@ -18,8 +18,8 @@ import unittest
from
transformers
import
DistilBertConfig
,
is_tf_available
from
.configuration_common
_test
import
ConfigTester
from
.modeling_tf_common
_test
import
TF
CommonTestCases
,
ids_tensor
from
.
test_
configuration_common
import
ConfigTester
from
.
test_
modeling_tf_common
import
TF
ModelTesterMixin
,
ids_tensor
from
.utils
import
require_tf
...
...
@@ -33,7 +33,7 @@ if is_tf_available():
@
require_tf
class
TFDistilBertModelTest
(
TF
CommonTestCases
.
TFCommonModelTester
):
class
TFDistilBertModelTest
(
TF
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
(
...
...
@@ -221,7 +221,3 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
# for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# model = DistilBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
# self.assertIsNotNone(model)
if
__name__
==
"__main__"
:
unittest
.
main
()
t
ransformer
s/test
s/
modeling_tf_gpt2
_test
.py
→
t
est
s/test
_
modeling_tf_gpt2.py
View file @
5daca95d
...
...
@@ -18,8 +18,8 @@ import unittest
from
transformers
import
GPT2Config
,
is_tf_available
from
.configuration_common
_test
import
ConfigTester
from
.modeling_tf_common
_test
import
TF
CommonTestCases
,
ids_tensor
from
.
test_
configuration_common
import
ConfigTester
from
.
test_
modeling_tf_common
import
TF
ModelTesterMixin
,
ids_tensor
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
...
...
@@ -34,7 +34,7 @@ if is_tf_available():
@
require_tf
class
TFGPT2ModelTest
(
TF
CommonTestCases
.
TFCommonModelTester
):
class
TFGPT2ModelTest
(
TF
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
TFGPT2Model
,
TFGPT2LMHeadModel
,
TFGPT2DoubleHeadsModel
)
if
is_tf_available
()
else
()
# all_model_classes = (TFGPT2Model, TFGPT2LMHeadModel) if is_tf_available() else ()
...
...
@@ -234,7 +234,3 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
for
model_name
in
list
(
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFGPT2Model
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
t
ransformer
s/test
s/
modeling_tf_openai_gpt
_test
.py
→
t
est
s/test
_
modeling_tf_openai_gpt.py
View file @
5daca95d
...
...
@@ -18,8 +18,8 @@ import unittest
from
transformers
import
OpenAIGPTConfig
,
is_tf_available
from
.configuration_common
_test
import
ConfigTester
from
.modeling_tf_common
_test
import
TF
CommonTestCases
,
ids_tensor
from
.
test_
configuration_common
import
ConfigTester
from
.
test_
modeling_tf_common
import
TF
ModelTesterMixin
,
ids_tensor
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
...
...
@@ -34,7 +34,7 @@ if is_tf_available():
@
require_tf
class
TFOpenAIGPTModelTest
(
TF
CommonTestCases
.
TFCommonModelTester
):
class
TFOpenAIGPTModelTest
(
TF
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
(
TFOpenAIGPTModel
,
TFOpenAIGPTLMHeadModel
,
TFOpenAIGPTDoubleHeadsModel
)
if
is_tf_available
()
else
()
...
...
@@ -235,7 +235,3 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
for
model_name
in
list
(
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFOpenAIGPTModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
t
ransformer
s/test
s/
modeling_tf_roberta
_test
.py
→
t
est
s/test
_
modeling_tf_roberta.py
View file @
5daca95d
...
...
@@ -18,8 +18,8 @@ import unittest
from
transformers
import
RobertaConfig
,
is_tf_available
from
.configuration_common
_test
import
ConfigTester
from
.modeling_tf_common
_test
import
TF
CommonTestCases
,
ids_tensor
from
.
test_
configuration_common
import
ConfigTester
from
.
test_
modeling_tf_common
import
TF
ModelTesterMixin
,
ids_tensor
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
...
...
@@ -36,7 +36,7 @@ if is_tf_available():
@
require_tf
class
TFRobertaModelTest
(
TF
CommonTestCases
.
TFCommonModelTester
):
class
TFRobertaModelTest
(
TF
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
(
TFRobertaModel
,
TFRobertaForMaskedLM
,
TFRobertaForSequenceClassification
)
if
is_tf_available
()
else
()
...
...
@@ -244,7 +244,3 @@ class TFRobertaModelIntegrationTest(unittest.TestCase):
self
.
assertEqual
(
list
(
output
.
numpy
().
shape
),
expected_shape
)
expected_tensor
=
tf
.
constant
([[
-
0.9469
,
0.3913
,
0.5118
]])
self
.
assertTrue
(
numpy
.
allclose
(
output
.
numpy
(),
expected_tensor
.
numpy
(),
atol
=
1e-3
))
if
__name__
==
"__main__"
:
unittest
.
main
()
t
ransformer
s/test
s/
modeling_tf_t5
_test
.py
→
t
est
s/test
_
modeling_tf_t5.py
View file @
5daca95d
...
...
@@ -18,8 +18,8 @@ import unittest
from
transformers
import
T5Config
,
is_tf_available
from
.configuration_common
_test
import
ConfigTester
from
.modeling_tf_common
_test
import
TF
CommonTestCases
,
ids_tensor
from
.
test_
configuration_common
import
ConfigTester
from
.
test_
modeling_tf_common
import
TF
ModelTesterMixin
,
ids_tensor
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
...
...
@@ -28,7 +28,7 @@ if is_tf_available():
@
require_tf
class
TFT5ModelTest
(
TF
CommonTestCases
.
TFCommonModelTester
):
class
TFT5ModelTest
(
TF
ModelTesterMixin
,
unittest
.
TestCase
):
is_encoder_decoder
=
True
all_model_classes
=
(
TFT5Model
,
TFT5WithLMHeadModel
)
if
is_tf_available
()
else
()
...
...
@@ -165,7 +165,3 @@ class TFT5ModelTest(TFCommonTestCases.TFCommonModelTester):
for
model_name
in
[
"t5-small"
]:
model
=
TFT5Model
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
t
ransformer
s/test
s/
modeling_tf_transfo_xl
_test
.py
→
t
est
s/test
_
modeling_tf_transfo_xl.py
View file @
5daca95d
...
...
@@ -19,8 +19,8 @@ import unittest
from
transformers
import
TransfoXLConfig
,
is_tf_available
from
.configuration_common
_test
import
ConfigTester
from
.modeling_tf_common
_test
import
TF
CommonTestCases
,
ids_tensor
from
.
test_
configuration_common
import
ConfigTester
from
.
test_
modeling_tf_common
import
TF
ModelTesterMixin
,
ids_tensor
from
.utils
import
CACHE_DIR
,
require_tf
,
slow
...
...
@@ -34,7 +34,7 @@ if is_tf_available():
@
require_tf
class
TFTransfoXLModelTest
(
TF
CommonTestCases
.
TFCommonModelTester
):
class
TFTransfoXLModelTest
(
TF
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
TFTransfoXLModel
,
TFTransfoXLLMHeadModel
)
if
is_tf_available
()
else
()
test_pruning
=
False
...
...
@@ -207,7 +207,3 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
for
model_name
in
list
(
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
())[:
1
]:
model
=
TFTransfoXLModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
if
__name__
==
"__main__"
:
unittest
.
main
()
Prev
1
…
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment