Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
a75c64d8
Commit
a75c64d8
authored
Aug 26, 2020
by
Lysandre
Browse files
Black 20 release
parent
e78c1103
Changes
191
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
228 additions
and
67 deletions
+228
-67
tests/test_modeling_longformer.py
tests/test_modeling_longformer.py
+2
-1
tests/test_modeling_mobilebert.py
tests/test_modeling_mobilebert.py
+9
-2
tests/test_modeling_openai.py
tests/test_modeling_openai.py
+2
-1
tests/test_modeling_reformer.py
tests/test_modeling_reformer.py
+70
-22
tests/test_modeling_roberta.py
tests/test_modeling_roberta.py
+4
-3
tests/test_modeling_t5.py
tests/test_modeling_t5.py
+73
-12
tests/test_modeling_tf_camembert.py
tests/test_modeling_tf_camembert.py
+4
-2
tests/test_modeling_tf_common.py
tests/test_modeling_tf_common.py
+24
-7
tests/test_modeling_tf_ctrl.py
tests/test_modeling_tf_ctrl.py
+2
-1
tests/test_modeling_tf_distilbert.py
tests/test_modeling_tf_distilbert.py
+2
-1
tests/test_modeling_tf_electra.py
tests/test_modeling_tf_electra.py
+2
-1
tests/test_modeling_tf_flaubert.py
tests/test_modeling_tf_flaubert.py
+4
-2
tests/test_modeling_tf_gpt2.py
tests/test_modeling_tf_gpt2.py
+2
-1
tests/test_modeling_tf_longformer.py
tests/test_modeling_tf_longformer.py
+12
-4
tests/test_modeling_tf_openai.py
tests/test_modeling_tf_openai.py
+2
-1
tests/test_modeling_tf_roberta.py
tests/test_modeling_tf_roberta.py
+2
-1
tests/test_modeling_tf_t5.py
tests/test_modeling_tf_t5.py
+6
-2
tests/test_modeling_tf_transfo_xl.py
tests/test_modeling_tf_transfo_xl.py
+2
-1
tests/test_modeling_tf_xlm.py
tests/test_modeling_tf_xlm.py
+2
-1
tests/test_modeling_tf_xlnet.py
tests/test_modeling_tf_xlnet.py
+2
-1
No files found.
tests/test_modeling_longformer.py
View file @
a75c64d8
...
...
@@ -40,7 +40,8 @@ if is_torch_available():
class
LongformerModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
tests/test_modeling_mobilebert.py
View file @
a75c64d8
...
...
@@ -217,7 +217,10 @@ class MobileBertModelTester:
model
.
to
(
torch_device
)
model
.
eval
()
result
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
next_sentence_label
=
sequence_labels
,
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
next_sentence_label
=
sequence_labels
,
)
self
.
parent
.
assertEqual
(
result
.
logits
.
shape
,
(
self
.
batch_size
,
2
))
...
...
@@ -397,7 +400,11 @@ class MobileBertModelTest(ModelTesterMixin, unittest.TestCase):
def
_long_tensor
(
tok_lst
):
return
torch
.
tensor
(
tok_lst
,
dtype
=
torch
.
long
,
device
=
torch_device
,)
return
torch
.
tensor
(
tok_lst
,
dtype
=
torch
.
long
,
device
=
torch_device
,
)
TOLERANCE
=
1e-3
...
...
tests/test_modeling_openai.py
View file @
a75c64d8
...
...
@@ -37,7 +37,8 @@ if is_torch_available():
class
OpenAIGPTModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
tests/test_modeling_reformer.py
View file @
a75c64d8
...
...
@@ -240,14 +240,19 @@ class ReformerModelTester:
half_input_ids
=
input_ids
[:,
:
half_seq_len
]
# normal padded
attn_mask
=
torch
.
cat
([
torch
.
ones_like
(
half_input_ids
),
torch
.
zeros_like
(
half_input_ids
)],
dim
=-
1
,)
attn_mask
=
torch
.
cat
(
[
torch
.
ones_like
(
half_input_ids
),
torch
.
zeros_like
(
half_input_ids
)],
dim
=-
1
,
)
input_ids_padded
=
torch
.
cat
(
[
half_input_ids
,
ids_tensor
((
self
.
batch_size
,
half_seq_len
),
self
.
vocab_size
)],
dim
=-
1
,
[
half_input_ids
,
ids_tensor
((
self
.
batch_size
,
half_seq_len
),
self
.
vocab_size
)],
dim
=-
1
,
)
# shifted padded
input_ids_roll
=
torch
.
cat
(
[
half_input_ids
,
ids_tensor
((
self
.
batch_size
,
half_seq_len
),
self
.
vocab_size
)],
dim
=-
1
,
[
half_input_ids
,
ids_tensor
((
self
.
batch_size
,
half_seq_len
),
self
.
vocab_size
)],
dim
=-
1
,
)
input_ids_roll
=
torch
.
roll
(
input_ids_roll
,
roll
,
dims
=-
1
)
attn_mask_roll
=
torch
.
roll
(
attn_mask
,
roll
,
dims
=-
1
)
...
...
@@ -283,13 +288,21 @@ class ReformerModelTester:
torch
.
manual_seed
(
layer
.
attention_seed
)
attn_outputs
=
layer
.
attention
(
hidden_states
,
attention_mask
=
input_mask
)
self
.
parent
.
assertTrue
(
torch
.
allclose
(
prev_attn_output
+
attn_outputs
.
hidden_states
,
next_attn_output
,
atol
=
1e-3
,)
torch
.
allclose
(
prev_attn_output
+
attn_outputs
.
hidden_states
,
next_attn_output
,
atol
=
1e-3
,
)
)
torch
.
manual_seed
(
layer
.
feed_forward_seed
)
feed_forward_hidden_states
=
layer
.
feed_forward
(
next_attn_output
)
self
.
parent
.
assertTrue
(
torch
.
allclose
(
next_hidden_states
,
hidden_states
+
feed_forward_hidden_states
,
atol
=
1e-3
,)
torch
.
allclose
(
next_hidden_states
,
hidden_states
+
feed_forward_hidden_states
,
atol
=
1e-3
,
)
)
def
create_and_check_reformer_feed_backward_chunking
(
self
,
config
,
input_ids
,
input_mask
,
choice_labels
):
...
...
@@ -416,7 +429,10 @@ class ReformerModelTester:
model
.
to
(
torch_device
)
model
.
eval
()
result
=
model
(
input_ids
,
attention_mask
=
input_mask
,
start_positions
=
choice_labels
,
end_positions
=
choice_labels
,
input_ids
,
attention_mask
=
input_mask
,
start_positions
=
choice_labels
,
end_positions
=
choice_labels
,
)
self
.
parent
.
assertEqual
(
result
.
start_logits
.
shape
,
(
self
.
batch_size
,
self
.
seq_length
))
self
.
parent
.
assertEqual
(
result
.
end_logits
.
shape
,
(
self
.
batch_size
,
self
.
seq_length
))
...
...
@@ -887,7 +903,9 @@ class ReformerIntegrationTests(unittest.TestCase):
reformer_output
=
layer
(
prev_attn_output
=
hidden_states
.
clone
(),
hidden_states
=
hidden_states
)
output_slice
=
reformer_output
.
hidden_states
[
0
,
0
,
:
5
]
expected_output_slice
=
torch
.
tensor
(
[
1.6879
,
-
1.3083
,
-
0.4708
,
1.3555
,
-
0.6292
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
1.6879
,
-
1.3083
,
-
0.4708
,
1.3555
,
-
0.6292
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
...
...
@@ -902,11 +920,15 @@ class ReformerIntegrationTests(unittest.TestCase):
layer
=
ReformerLayer
(
ReformerConfig
(
**
config
)).
to
(
torch_device
)
layer
.
eval
()
reformer_output
=
layer
(
prev_attn_output
=
hidden_states
.
clone
(),
hidden_states
=
hidden_states
,
attention_mask
=
attn_mask
,
prev_attn_output
=
hidden_states
.
clone
(),
hidden_states
=
hidden_states
,
attention_mask
=
attn_mask
,
)
output_slice
=
reformer_output
.
hidden_states
[
0
,
0
,
:
5
]
expected_output_slice
=
torch
.
tensor
(
[
1.6439
,
-
1.2306
,
-
0.5108
,
1.3006
,
-
0.6537
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
1.6439
,
-
1.2306
,
-
0.5108
,
1.3006
,
-
0.6537
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
...
...
@@ -922,7 +944,9 @@ class ReformerIntegrationTests(unittest.TestCase):
reformer_output
=
layer
(
prev_attn_output
=
hidden_states
,
hidden_states
=
hidden_states
)
output_slice
=
reformer_output
.
hidden_states
[
0
,
0
,
:
5
]
expected_output_slice
=
torch
.
tensor
(
[
1.4212
,
-
2.0576
,
-
0.9688
,
1.4599
,
-
0.1344
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
1.4212
,
-
2.0576
,
-
0.9688
,
1.4599
,
-
0.1344
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
...
...
@@ -935,10 +959,16 @@ class ReformerIntegrationTests(unittest.TestCase):
torch
.
manual_seed
(
0
)
layer
=
ReformerLayer
(
ReformerConfig
(
**
config
)).
to
(
torch_device
)
layer
.
eval
()
reformer_output
=
layer
(
prev_attn_output
=
hidden_states
,
hidden_states
=
hidden_states
,
attention_mask
=
attn_mask
,)
reformer_output
=
layer
(
prev_attn_output
=
hidden_states
,
hidden_states
=
hidden_states
,
attention_mask
=
attn_mask
,
)
output_slice
=
reformer_output
.
hidden_states
[
0
,
0
,
:
5
]
expected_output_slice
=
torch
.
tensor
(
[
1.4750
,
-
2.0235
,
-
0.9743
,
1.4463
,
-
0.1269
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
1.4750
,
-
2.0235
,
-
0.9743
,
1.4463
,
-
0.1269
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
...
...
@@ -953,7 +983,9 @@ class ReformerIntegrationTests(unittest.TestCase):
hidden_states
=
model
(
input_ids
=
input_ids
,
attention_mask
=
attn_mask
)[
0
]
output_slice
=
hidden_states
[
0
,
0
,
:
5
]
expected_output_slice
=
torch
.
tensor
(
[
-
0.9896
,
-
0.9396
,
-
1.0831
,
-
0.0597
,
0.2456
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
-
0.9896
,
-
0.9396
,
-
1.0831
,
-
0.0597
,
0.2456
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
...
...
@@ -967,7 +999,9 @@ class ReformerIntegrationTests(unittest.TestCase):
hidden_states
=
model
(
input_ids
=
input_ids
,
attention_mask
=
attn_mask
)[
0
]
output_slice
=
hidden_states
[
0
,
0
,
:
5
]
expected_output_slice
=
torch
.
tensor
(
[
-
1.6791
,
0.7171
,
0.1594
,
0.4063
,
1.2584
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
-
1.6791
,
0.7171
,
0.1594
,
0.4063
,
1.2584
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
...
...
@@ -983,7 +1017,9 @@ class ReformerIntegrationTests(unittest.TestCase):
hidden_states
=
model
(
input_ids
=
input_ids
,
attention_mask
=
attn_mask
)[
0
]
output_slice
=
hidden_states
[
1
,
-
1
,
:
5
]
expected_output_slice
=
torch
.
tensor
(
[
0.0256
,
-
0.0121
,
0.0636
,
0.0024
,
-
0.0393
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
0.0256
,
-
0.0121
,
0.0636
,
0.0024
,
-
0.0393
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
...
...
@@ -1005,15 +1041,21 @@ class ReformerIntegrationTests(unittest.TestCase):
# check last grads to cover all proable errors
grad_slice_word
=
model
.
reformer
.
embeddings
.
word_embeddings
.
weight
.
grad
[
0
,
:
5
]
expected_grad_slice_word
=
torch
.
tensor
(
[
-
0.0005
,
0.0001
,
0.0002
,
0.0003
,
0.0006
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
-
0.0005
,
0.0001
,
0.0002
,
0.0003
,
0.0006
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
grad_slice_position_factor_1
=
model
.
reformer
.
embeddings
.
position_embeddings
.
weights
[
0
][
1
,
0
,
-
5
:]
expected_grad_slice_pos_fac_1
=
torch
.
tensor
(
[
0.0037
,
-
1.3793
,
-
1.0231
,
-
1.5230
,
-
2.5306
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
0.0037
,
-
1.3793
,
-
1.0231
,
-
1.5230
,
-
2.5306
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
grad_slice_position_factor_2
=
model
.
reformer
.
embeddings
.
position_embeddings
.
weights
[
1
][
0
,
1
,
:
5
]
expected_grad_slice_pos_fac_2
=
torch
.
tensor
(
[
-
1.3165
,
0.5168
,
0.7785
,
1.0811
,
-
0.9830
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
-
1.3165
,
0.5168
,
0.7785
,
1.0811
,
-
0.9830
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
self
.
assertTrue
(
torch
.
allclose
(
grad_slice_word
,
expected_grad_slice_word
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
grad_slice_position_factor_1
,
expected_grad_slice_pos_fac_1
,
atol
=
1e-3
))
...
...
@@ -1038,15 +1080,21 @@ class ReformerIntegrationTests(unittest.TestCase):
# check last grads to cover all proable errors
grad_slice_word
=
model
.
reformer
.
embeddings
.
word_embeddings
.
weight
.
grad
[
0
,
:
5
]
expected_grad_slice_word
=
torch
.
tensor
(
[
2.6357e-05
,
4.3358e-04
,
-
8.4985e-04
,
1.0094e-04
,
3.8954e-04
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
2.6357e-05
,
4.3358e-04
,
-
8.4985e-04
,
1.0094e-04
,
3.8954e-04
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
grad_slice_position_factor_1
=
model
.
reformer
.
embeddings
.
position_embeddings
.
weights
[
0
][
1
,
0
,
-
5
:]
expected_grad_slice_pos_fac_1
=
torch
.
tensor
(
[
-
0.0984
,
0.6283
,
0.4282
,
1.2960
,
0.6897
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
-
0.0984
,
0.6283
,
0.4282
,
1.2960
,
0.6897
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
grad_slice_position_factor_2
=
model
.
reformer
.
embeddings
.
position_embeddings
.
weights
[
1
][
0
,
1
,
:
5
]
expected_grad_slice_pos_fac_2
=
torch
.
tensor
(
[
0.4626
,
-
0.0231
,
-
0.0172
,
0.1081
,
0.3805
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
0.4626
,
-
0.0231
,
-
0.0172
,
0.1081
,
0.3805
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
self
.
assertTrue
(
torch
.
allclose
(
grad_slice_word
,
expected_grad_slice_word
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
grad_slice_position_factor_1
,
expected_grad_slice_pos_fac_1
,
atol
=
1e-3
))
...
...
tests/test_modeling_roberta.py
View file @
a75c64d8
...
...
@@ -45,7 +45,8 @@ if is_torch_available():
class
RobertaModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
@@ -352,7 +353,7 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
self
.
assertIsNotNone
(
model
)
def
test_create_position_ids_respects_padding_index
(
self
):
"""
Ensure that the default position ids only assign a sequential . This is a regression
"""Ensure that the default position ids only assign a sequential . This is a regression
test for https://github.com/huggingface/transformers/issues/1761
The position ids should be masked with the embedding object's padding index. Therefore, the
...
...
@@ -371,7 +372,7 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
self
.
assertTrue
(
torch
.
all
(
torch
.
eq
(
position_ids
,
expected_positions
)))
def
test_create_position_ids_from_inputs_embeds
(
self
):
"""
Ensure that the default position ids only assign a sequential . This is a regression
"""Ensure that the default position ids only assign a sequential . This is a regression
test for https://github.com/huggingface/transformers/issues/1761
The position ids should be masked with the embedding object's padding index. Therefore, the
...
...
tests/test_modeling_t5.py
View file @
a75c64d8
...
...
@@ -101,7 +101,13 @@ class T5ModelTester:
)
def
check_prepare_lm_labels_via_shift_left
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5Model
(
config
=
config
)
model
.
to
(
torch_device
)
...
...
@@ -134,7 +140,13 @@ class T5ModelTester:
self
.
parent
.
assertListEqual
(
decoder_input_ids_slice
[
1
:].
tolist
(),
lm_labels_slice
[:
-
1
].
tolist
())
def
create_and_check_model
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5Model
(
config
=
config
)
model
.
to
(
torch_device
)
...
...
@@ -160,7 +172,13 @@ class T5ModelTester:
self
.
parent
.
assertEqual
(
len
(
decoder_past
[
1
][
0
]),
4
)
def
create_and_check_with_lm_head
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5ForConditionalGeneration
(
config
=
config
).
to
(
torch_device
).
eval
()
outputs
=
model
(
...
...
@@ -174,7 +192,13 @@ class T5ModelTester:
self
.
parent
.
assertEqual
(
outputs
[
"loss"
].
size
(),
())
def
create_and_check_decoder_model_past
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5Model
(
config
=
config
).
get_decoder
().
to
(
torch_device
).
eval
()
# first forward pass
...
...
@@ -205,7 +229,13 @@ class T5ModelTester:
self
.
parent
.
assertTrue
(
torch
.
allclose
(
output_from_past_slice
,
output_from_no_past_slice
,
atol
=
1e-3
))
def
create_and_check_decoder_model_attention_mask_past
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5Model
(
config
=
config
).
get_decoder
()
model
.
to
(
torch_device
)
...
...
@@ -231,7 +261,8 @@ class T5ModelTester:
# append to next input_ids and attn_mask
next_input_ids
=
torch
.
cat
([
input_ids
,
next_tokens
],
dim
=-
1
)
attn_mask
=
torch
.
cat
(
[
attn_mask
,
torch
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
torch
.
long
,
device
=
torch_device
)],
dim
=
1
,
[
attn_mask
,
torch
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
torch
.
long
,
device
=
torch_device
)],
dim
=
1
,
)
# get two different outputs
...
...
@@ -249,7 +280,13 @@ class T5ModelTester:
self
.
parent
.
assertTrue
(
torch
.
allclose
(
output_from_past_slice
,
output_from_no_past_slice
,
atol
=
1e-3
))
def
create_and_check_generate_with_past_key_value_states
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5ForConditionalGeneration
(
config
=
config
).
to
(
torch_device
).
eval
()
torch
.
manual_seed
(
0
)
...
...
@@ -261,14 +298,26 @@ class T5ModelTester:
self
.
parent
.
assertTrue
(
torch
.
all
(
output_with_past_cache
==
output_without_past_cache
))
def
create_and_check_model_fp16_forward
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5Model
(
config
=
config
).
to
(
torch_device
).
half
().
eval
()
output
=
model
(
input_ids
,
decoder_input_ids
=
input_ids
,
attention_mask
=
attention_mask
)[
"last_hidden_state"
]
self
.
parent
.
assertFalse
(
torch
.
isnan
(
output
).
any
().
item
())
def
create_and_check_encoder_decoder_shared_weights
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
for
model_class
in
[
T5Model
,
T5ForConditionalGeneration
]:
torch
.
manual_seed
(
0
)
...
...
@@ -339,7 +388,14 @@ class T5ModelTester:
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,)
=
config_and_inputs
(
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
...
...
@@ -412,7 +468,11 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase):
model
=
T5Model
(
config_and_inputs
[
0
]).
to
(
torch_device
)
with
tempfile
.
TemporaryDirectory
()
as
tmpdirname
:
torch
.
onnx
.
export
(
model
,
config_and_inputs
[
1
],
f
"
{
tmpdirname
}
/t5_test.onnx"
,
export_params
=
True
,
opset_version
=
9
,
model
,
config_and_inputs
[
1
],
f
"
{
tmpdirname
}
/t5_test.onnx"
,
export_params
=
True
,
opset_version
=
9
,
)
...
...
@@ -469,7 +529,8 @@ class T5ModelIntegrationTests(unittest.TestCase):
)
decoded
=
tok
.
batch_decode
(
hypotheses_batch
,
skip_special_tokens
=
True
,
clean_up_tokenization_spaces
=
False
)
self
.
assertListEqual
(
expected_summaries
,
decoded
,
expected_summaries
,
decoded
,
)
@
slow
...
...
tests/test_modeling_tf_camembert.py
View file @
a75c64d8
...
...
@@ -33,7 +33,8 @@ class TFCamembertModelIntegrationTest(unittest.TestCase):
model
=
TFCamembertModel
.
from_pretrained
(
"jplu/tf-camembert-base"
)
input_ids
=
tf
.
convert_to_tensor
(
[[
5
,
121
,
11
,
660
,
16
,
730
,
25543
,
110
,
83
,
6
]],
dtype
=
tf
.
int32
,
[[
5
,
121
,
11
,
660
,
16
,
730
,
25543
,
110
,
83
,
6
]],
dtype
=
tf
.
int32
,
)
# J'aime le camembert !"
output
=
model
(
input_ids
)[
"last_hidden_state"
]
...
...
@@ -41,7 +42,8 @@ class TFCamembertModelIntegrationTest(unittest.TestCase):
self
.
assertEqual
(
output
.
shape
,
expected_shape
)
# compare the actual values for a slice.
expected_slice
=
tf
.
convert_to_tensor
(
[[[
-
0.0254
,
0.0235
,
0.1027
],
[
0.0606
,
-
0.1811
,
-
0.0418
],
[
-
0.1561
,
-
0.1127
,
0.2687
]]],
dtype
=
tf
.
float32
,
[[[
-
0.0254
,
0.0235
,
0.1027
],
[
0.0606
,
-
0.1811
,
-
0.0418
],
[
-
0.1561
,
-
0.1127
,
0.2687
]]],
dtype
=
tf
.
float32
,
)
# camembert = torch.hub.load('pytorch/fairseq', 'camembert.v0')
# camembert.eval()
...
...
tests/test_modeling_tf_common.py
View file @
a75c64d8
...
...
@@ -155,7 +155,8 @@ class TFModelTesterMixin:
self
.
assertEqual
(
len
(
outputs
),
num_out
)
self
.
assertEqual
(
len
(
hidden_states
),
self
.
model_tester
.
num_hidden_layers
+
1
)
self
.
assertListEqual
(
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
],
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
],
)
@
slow
...
...
@@ -486,7 +487,8 @@ class TFModelTesterMixin:
hidden_states
=
[
t
.
numpy
()
for
t
in
outputs
[
-
1
]]
self
.
assertEqual
(
len
(
hidden_states
),
self
.
model_tester
.
num_hidden_layers
+
1
)
self
.
assertListEqual
(
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
],
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
],
)
for
model_class
in
self
.
all_model_classes
:
...
...
@@ -591,9 +593,15 @@ class TFModelTesterMixin:
x
=
wte
([
input_ids
,
None
,
None
,
None
],
mode
=
"embedding"
)
except
Exception
:
if
hasattr
(
self
.
model_tester
,
"embedding_size"
):
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
embedding_size
],
dtype
=
tf
.
dtypes
.
float32
,)
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
embedding_size
],
dtype
=
tf
.
dtypes
.
float32
,
)
else
:
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
hidden_size
],
dtype
=
tf
.
dtypes
.
float32
,)
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
hidden_size
],
dtype
=
tf
.
dtypes
.
float32
,
)
return
x
def
test_inputs_embeds
(
self
):
...
...
@@ -700,7 +708,14 @@ class TFModelTesterMixin:
model
.
generate
(
input_ids
,
do_sample
=
False
,
num_return_sequences
=
3
,
num_beams
=
2
)
# num_return_sequences > 1, sample
self
.
_check_generated_ids
(
model
.
generate
(
input_ids
,
do_sample
=
True
,
num_beams
=
2
,
num_return_sequences
=
2
,))
self
.
_check_generated_ids
(
model
.
generate
(
input_ids
,
do_sample
=
True
,
num_beams
=
2
,
num_return_sequences
=
2
,
)
)
# num_return_sequences > 1, greedy
self
.
_check_generated_ids
(
model
.
generate
(
input_ids
,
do_sample
=
False
,
num_beams
=
2
,
num_return_sequences
=
2
))
...
...
@@ -895,7 +910,8 @@ class UtilsFunctionsTest(unittest.TestCase):
)
non_inf_expected_idx
=
tf
.
convert_to_tensor
(
[[
0
,
0
],
[
0
,
9
],
[
0
,
10
],
[
0
,
25
],
[
0
,
26
],
[
1
,
13
],
[
1
,
17
],
[
1
,
18
],
[
1
,
20
],
[
1
,
27
]],
dtype
=
tf
.
int32
,
[[
0
,
0
],
[
0
,
9
],
[
0
,
10
],
[
0
,
25
],
[
0
,
26
],
[
1
,
13
],
[
1
,
17
],
[
1
,
18
],
[
1
,
20
],
[
1
,
27
]],
dtype
=
tf
.
int32
,
)
# expected non filtered idx as noted above
non_inf_expected_output
=
tf
.
convert_to_tensor
(
...
...
@@ -907,7 +923,8 @@ class UtilsFunctionsTest(unittest.TestCase):
non_inf_output
=
output
[
output
!=
-
float
(
"inf"
)]
non_inf_idx
=
tf
.
cast
(
tf
.
where
(
tf
.
not_equal
(
output
,
tf
.
constant
(
-
float
(
"inf"
),
dtype
=
tf
.
float32
))),
dtype
=
tf
.
int32
,
tf
.
where
(
tf
.
not_equal
(
output
,
tf
.
constant
(
-
float
(
"inf"
),
dtype
=
tf
.
float32
))),
dtype
=
tf
.
int32
,
)
tf
.
debugging
.
assert_near
(
non_inf_output
,
non_inf_expected_output
,
rtol
=
1e-12
)
...
...
tests/test_modeling_tf_ctrl.py
View file @
a75c64d8
...
...
@@ -31,7 +31,8 @@ if is_tf_available():
class
TFCTRLModelTester
(
object
):
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
tests/test_modeling_tf_distilbert.py
View file @
a75c64d8
...
...
@@ -39,7 +39,8 @@ if is_tf_available():
class
TFDistilBertModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
tests/test_modeling_tf_electra.py
View file @
a75c64d8
...
...
@@ -39,7 +39,8 @@ if is_tf_available():
class
TFElectraModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
tests/test_modeling_tf_flaubert.py
View file @
a75c64d8
...
...
@@ -40,7 +40,8 @@ if is_tf_available():
class
TFFlaubertModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
@@ -337,7 +338,8 @@ class TFFlaubertModelIntegrationTest(unittest.TestCase):
model
=
TFFlaubertModel
.
from_pretrained
(
"jplu/tf-flaubert-small-cased"
)
input_ids
=
tf
.
convert_to_tensor
(
[[
0
,
158
,
735
,
2592
,
1424
,
6727
,
82
,
1
]],
dtype
=
tf
.
int32
,
[[
0
,
158
,
735
,
2592
,
1424
,
6727
,
82
,
1
]],
dtype
=
tf
.
int32
,
)
# "J'aime flaubert !"
output
=
model
(
input_ids
)[
0
]
...
...
tests/test_modeling_tf_gpt2.py
View file @
a75c64d8
...
...
@@ -37,7 +37,8 @@ if is_tf_available():
class
TFGPT2ModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
tests/test_modeling_tf_longformer.py
View file @
a75c64d8
...
...
@@ -45,7 +45,8 @@ if is_tf_available():
class
TFLongformerModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
@@ -228,7 +229,8 @@ class TFLongformerModelTester:
# global attention mask has to be partly defined
# to trace all weights
global_attention_mask
=
tf
.
concat
(
[
tf
.
zeros_like
(
input_ids
)[:,
:
-
1
],
tf
.
ones_like
(
input_ids
)[:,
-
1
:]],
axis
=-
1
,
[
tf
.
zeros_like
(
input_ids
)[:,
:
-
1
],
tf
.
ones_like
(
input_ids
)[:,
-
1
:]],
axis
=-
1
,
)
inputs_dict
=
{
...
...
@@ -267,7 +269,13 @@ class TFLongformerModelTest(TFModelTesterMixin, unittest.TestCase):
test_torchscript
=
False
all_model_classes
=
(
(
TFLongformerModel
,
TFLongformerForMaskedLM
,
TFLongformerForQuestionAnswering
,)
if
is_tf_available
()
else
()
(
TFLongformerModel
,
TFLongformerForMaskedLM
,
TFLongformerForQuestionAnswering
,
)
if
is_tf_available
()
else
()
)
def
setUp
(
self
):
...
...
tests/test_modeling_tf_openai.py
View file @
a75c64d8
...
...
@@ -36,7 +36,8 @@ if is_tf_available():
class
TFOpenAIGPTModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
tests/test_modeling_tf_roberta.py
View file @
a75c64d8
...
...
@@ -40,7 +40,8 @@ if is_tf_available():
class
TFRobertaModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
tests/test_modeling_tf_t5.py
View file @
a75c64d8
...
...
@@ -32,7 +32,8 @@ if is_tf_available():
class
TFT5ModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
@@ -181,7 +182,10 @@ class TFT5ModelTester:
# append to next input_ids and attn_mask
next_input_ids
=
tf
.
concat
([
input_ids
,
next_tokens
],
axis
=-
1
)
attn_mask
=
tf
.
concat
([
attn_mask
,
tf
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
tf
.
int32
)],
axis
=
1
,)
attn_mask
=
tf
.
concat
(
[
attn_mask
,
tf
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
tf
.
int32
)],
axis
=
1
,
)
# get two different outputs
output_from_no_past
=
model
(
next_input_ids
,
attention_mask
=
attn_mask
)[
0
]
...
...
tests/test_modeling_tf_transfo_xl.py
View file @
a75c64d8
...
...
@@ -32,7 +32,8 @@ if is_tf_available():
class
TFTransfoXLModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
tests/test_modeling_tf_xlm.py
View file @
a75c64d8
...
...
@@ -40,7 +40,8 @@ if is_tf_available():
class
TFXLMModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
tests/test_modeling_tf_xlnet.py
View file @
a75c64d8
...
...
@@ -40,7 +40,8 @@ if is_tf_available():
class
TFXLNetModelTester
:
def
__init__
(
self
,
parent
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
...
...
Prev
1
…
5
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment