Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
a75c64d8
Commit
a75c64d8
authored
Aug 26, 2020
by
Lysandre
Browse files
Black 20 release
parent
e78c1103
Changes
191
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
228 additions
and
67 deletions
+228
-67
tests/test_modeling_longformer.py
tests/test_modeling_longformer.py
+2
-1
tests/test_modeling_mobilebert.py
tests/test_modeling_mobilebert.py
+9
-2
tests/test_modeling_openai.py
tests/test_modeling_openai.py
+2
-1
tests/test_modeling_reformer.py
tests/test_modeling_reformer.py
+70
-22
tests/test_modeling_roberta.py
tests/test_modeling_roberta.py
+4
-3
tests/test_modeling_t5.py
tests/test_modeling_t5.py
+73
-12
tests/test_modeling_tf_camembert.py
tests/test_modeling_tf_camembert.py
+4
-2
tests/test_modeling_tf_common.py
tests/test_modeling_tf_common.py
+24
-7
tests/test_modeling_tf_ctrl.py
tests/test_modeling_tf_ctrl.py
+2
-1
tests/test_modeling_tf_distilbert.py
tests/test_modeling_tf_distilbert.py
+2
-1
tests/test_modeling_tf_electra.py
tests/test_modeling_tf_electra.py
+2
-1
tests/test_modeling_tf_flaubert.py
tests/test_modeling_tf_flaubert.py
+4
-2
tests/test_modeling_tf_gpt2.py
tests/test_modeling_tf_gpt2.py
+2
-1
tests/test_modeling_tf_longformer.py
tests/test_modeling_tf_longformer.py
+12
-4
tests/test_modeling_tf_openai.py
tests/test_modeling_tf_openai.py
+2
-1
tests/test_modeling_tf_roberta.py
tests/test_modeling_tf_roberta.py
+2
-1
tests/test_modeling_tf_t5.py
tests/test_modeling_tf_t5.py
+6
-2
tests/test_modeling_tf_transfo_xl.py
tests/test_modeling_tf_transfo_xl.py
+2
-1
tests/test_modeling_tf_xlm.py
tests/test_modeling_tf_xlm.py
+2
-1
tests/test_modeling_tf_xlnet.py
tests/test_modeling_tf_xlnet.py
+2
-1
No files found.
tests/test_modeling_longformer.py
View file @
a75c64d8
...
@@ -40,7 +40,8 @@ if is_torch_available():
...
@@ -40,7 +40,8 @@ if is_torch_available():
class
LongformerModelTester
:
class
LongformerModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
...
tests/test_modeling_mobilebert.py
View file @
a75c64d8
...
@@ -217,7 +217,10 @@ class MobileBertModelTester:
...
@@ -217,7 +217,10 @@ class MobileBertModelTester:
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
result
=
model
(
result
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
next_sentence_label
=
sequence_labels
,
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
next_sentence_label
=
sequence_labels
,
)
)
self
.
parent
.
assertEqual
(
result
.
logits
.
shape
,
(
self
.
batch_size
,
2
))
self
.
parent
.
assertEqual
(
result
.
logits
.
shape
,
(
self
.
batch_size
,
2
))
...
@@ -397,7 +400,11 @@ class MobileBertModelTest(ModelTesterMixin, unittest.TestCase):
...
@@ -397,7 +400,11 @@ class MobileBertModelTest(ModelTesterMixin, unittest.TestCase):
def
_long_tensor
(
tok_lst
):
def
_long_tensor
(
tok_lst
):
return
torch
.
tensor
(
tok_lst
,
dtype
=
torch
.
long
,
device
=
torch_device
,)
return
torch
.
tensor
(
tok_lst
,
dtype
=
torch
.
long
,
device
=
torch_device
,
)
TOLERANCE
=
1e-3
TOLERANCE
=
1e-3
...
...
tests/test_modeling_openai.py
View file @
a75c64d8
...
@@ -37,7 +37,8 @@ if is_torch_available():
...
@@ -37,7 +37,8 @@ if is_torch_available():
class
OpenAIGPTModelTester
:
class
OpenAIGPTModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
...
tests/test_modeling_reformer.py
View file @
a75c64d8
...
@@ -240,14 +240,19 @@ class ReformerModelTester:
...
@@ -240,14 +240,19 @@ class ReformerModelTester:
half_input_ids
=
input_ids
[:,
:
half_seq_len
]
half_input_ids
=
input_ids
[:,
:
half_seq_len
]
# normal padded
# normal padded
attn_mask
=
torch
.
cat
([
torch
.
ones_like
(
half_input_ids
),
torch
.
zeros_like
(
half_input_ids
)],
dim
=-
1
,)
attn_mask
=
torch
.
cat
(
[
torch
.
ones_like
(
half_input_ids
),
torch
.
zeros_like
(
half_input_ids
)],
dim
=-
1
,
)
input_ids_padded
=
torch
.
cat
(
input_ids_padded
=
torch
.
cat
(
[
half_input_ids
,
ids_tensor
((
self
.
batch_size
,
half_seq_len
),
self
.
vocab_size
)],
dim
=-
1
,
[
half_input_ids
,
ids_tensor
((
self
.
batch_size
,
half_seq_len
),
self
.
vocab_size
)],
dim
=-
1
,
)
)
# shifted padded
# shifted padded
input_ids_roll
=
torch
.
cat
(
input_ids_roll
=
torch
.
cat
(
[
half_input_ids
,
ids_tensor
((
self
.
batch_size
,
half_seq_len
),
self
.
vocab_size
)],
dim
=-
1
,
[
half_input_ids
,
ids_tensor
((
self
.
batch_size
,
half_seq_len
),
self
.
vocab_size
)],
dim
=-
1
,
)
)
input_ids_roll
=
torch
.
roll
(
input_ids_roll
,
roll
,
dims
=-
1
)
input_ids_roll
=
torch
.
roll
(
input_ids_roll
,
roll
,
dims
=-
1
)
attn_mask_roll
=
torch
.
roll
(
attn_mask
,
roll
,
dims
=-
1
)
attn_mask_roll
=
torch
.
roll
(
attn_mask
,
roll
,
dims
=-
1
)
...
@@ -283,13 +288,21 @@ class ReformerModelTester:
...
@@ -283,13 +288,21 @@ class ReformerModelTester:
torch
.
manual_seed
(
layer
.
attention_seed
)
torch
.
manual_seed
(
layer
.
attention_seed
)
attn_outputs
=
layer
.
attention
(
hidden_states
,
attention_mask
=
input_mask
)
attn_outputs
=
layer
.
attention
(
hidden_states
,
attention_mask
=
input_mask
)
self
.
parent
.
assertTrue
(
self
.
parent
.
assertTrue
(
torch
.
allclose
(
prev_attn_output
+
attn_outputs
.
hidden_states
,
next_attn_output
,
atol
=
1e-3
,)
torch
.
allclose
(
prev_attn_output
+
attn_outputs
.
hidden_states
,
next_attn_output
,
atol
=
1e-3
,
)
)
)
torch
.
manual_seed
(
layer
.
feed_forward_seed
)
torch
.
manual_seed
(
layer
.
feed_forward_seed
)
feed_forward_hidden_states
=
layer
.
feed_forward
(
next_attn_output
)
feed_forward_hidden_states
=
layer
.
feed_forward
(
next_attn_output
)
self
.
parent
.
assertTrue
(
self
.
parent
.
assertTrue
(
torch
.
allclose
(
next_hidden_states
,
hidden_states
+
feed_forward_hidden_states
,
atol
=
1e-3
,)
torch
.
allclose
(
next_hidden_states
,
hidden_states
+
feed_forward_hidden_states
,
atol
=
1e-3
,
)
)
)
def
create_and_check_reformer_feed_backward_chunking
(
self
,
config
,
input_ids
,
input_mask
,
choice_labels
):
def
create_and_check_reformer_feed_backward_chunking
(
self
,
config
,
input_ids
,
input_mask
,
choice_labels
):
...
@@ -416,7 +429,10 @@ class ReformerModelTester:
...
@@ -416,7 +429,10 @@ class ReformerModelTester:
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
model
.
eval
()
model
.
eval
()
result
=
model
(
result
=
model
(
input_ids
,
attention_mask
=
input_mask
,
start_positions
=
choice_labels
,
end_positions
=
choice_labels
,
input_ids
,
attention_mask
=
input_mask
,
start_positions
=
choice_labels
,
end_positions
=
choice_labels
,
)
)
self
.
parent
.
assertEqual
(
result
.
start_logits
.
shape
,
(
self
.
batch_size
,
self
.
seq_length
))
self
.
parent
.
assertEqual
(
result
.
start_logits
.
shape
,
(
self
.
batch_size
,
self
.
seq_length
))
self
.
parent
.
assertEqual
(
result
.
end_logits
.
shape
,
(
self
.
batch_size
,
self
.
seq_length
))
self
.
parent
.
assertEqual
(
result
.
end_logits
.
shape
,
(
self
.
batch_size
,
self
.
seq_length
))
...
@@ -468,7 +484,7 @@ class ReformerModelTester:
...
@@ -468,7 +484,7 @@ class ReformerModelTester:
class
ReformerTesterMixin
:
class
ReformerTesterMixin
:
"""
"""
Reformer Local and Reformer LSH run essentially the same tests
Reformer Local and Reformer LSH run essentially the same tests
"""
"""
def
test_config
(
self
):
def
test_config
(
self
):
...
@@ -887,7 +903,9 @@ class ReformerIntegrationTests(unittest.TestCase):
...
@@ -887,7 +903,9 @@ class ReformerIntegrationTests(unittest.TestCase):
reformer_output
=
layer
(
prev_attn_output
=
hidden_states
.
clone
(),
hidden_states
=
hidden_states
)
reformer_output
=
layer
(
prev_attn_output
=
hidden_states
.
clone
(),
hidden_states
=
hidden_states
)
output_slice
=
reformer_output
.
hidden_states
[
0
,
0
,
:
5
]
output_slice
=
reformer_output
.
hidden_states
[
0
,
0
,
:
5
]
expected_output_slice
=
torch
.
tensor
(
expected_output_slice
=
torch
.
tensor
(
[
1.6879
,
-
1.3083
,
-
0.4708
,
1.3555
,
-
0.6292
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
1.6879
,
-
1.3083
,
-
0.4708
,
1.3555
,
-
0.6292
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
)
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
...
@@ -902,11 +920,15 @@ class ReformerIntegrationTests(unittest.TestCase):
...
@@ -902,11 +920,15 @@ class ReformerIntegrationTests(unittest.TestCase):
layer
=
ReformerLayer
(
ReformerConfig
(
**
config
)).
to
(
torch_device
)
layer
=
ReformerLayer
(
ReformerConfig
(
**
config
)).
to
(
torch_device
)
layer
.
eval
()
layer
.
eval
()
reformer_output
=
layer
(
reformer_output
=
layer
(
prev_attn_output
=
hidden_states
.
clone
(),
hidden_states
=
hidden_states
,
attention_mask
=
attn_mask
,
prev_attn_output
=
hidden_states
.
clone
(),
hidden_states
=
hidden_states
,
attention_mask
=
attn_mask
,
)
)
output_slice
=
reformer_output
.
hidden_states
[
0
,
0
,
:
5
]
output_slice
=
reformer_output
.
hidden_states
[
0
,
0
,
:
5
]
expected_output_slice
=
torch
.
tensor
(
expected_output_slice
=
torch
.
tensor
(
[
1.6439
,
-
1.2306
,
-
0.5108
,
1.3006
,
-
0.6537
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
1.6439
,
-
1.2306
,
-
0.5108
,
1.3006
,
-
0.6537
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
)
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
...
@@ -922,7 +944,9 @@ class ReformerIntegrationTests(unittest.TestCase):
...
@@ -922,7 +944,9 @@ class ReformerIntegrationTests(unittest.TestCase):
reformer_output
=
layer
(
prev_attn_output
=
hidden_states
,
hidden_states
=
hidden_states
)
reformer_output
=
layer
(
prev_attn_output
=
hidden_states
,
hidden_states
=
hidden_states
)
output_slice
=
reformer_output
.
hidden_states
[
0
,
0
,
:
5
]
output_slice
=
reformer_output
.
hidden_states
[
0
,
0
,
:
5
]
expected_output_slice
=
torch
.
tensor
(
expected_output_slice
=
torch
.
tensor
(
[
1.4212
,
-
2.0576
,
-
0.9688
,
1.4599
,
-
0.1344
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
1.4212
,
-
2.0576
,
-
0.9688
,
1.4599
,
-
0.1344
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
)
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
...
@@ -935,10 +959,16 @@ class ReformerIntegrationTests(unittest.TestCase):
...
@@ -935,10 +959,16 @@ class ReformerIntegrationTests(unittest.TestCase):
torch
.
manual_seed
(
0
)
torch
.
manual_seed
(
0
)
layer
=
ReformerLayer
(
ReformerConfig
(
**
config
)).
to
(
torch_device
)
layer
=
ReformerLayer
(
ReformerConfig
(
**
config
)).
to
(
torch_device
)
layer
.
eval
()
layer
.
eval
()
reformer_output
=
layer
(
prev_attn_output
=
hidden_states
,
hidden_states
=
hidden_states
,
attention_mask
=
attn_mask
,)
reformer_output
=
layer
(
prev_attn_output
=
hidden_states
,
hidden_states
=
hidden_states
,
attention_mask
=
attn_mask
,
)
output_slice
=
reformer_output
.
hidden_states
[
0
,
0
,
:
5
]
output_slice
=
reformer_output
.
hidden_states
[
0
,
0
,
:
5
]
expected_output_slice
=
torch
.
tensor
(
expected_output_slice
=
torch
.
tensor
(
[
1.4750
,
-
2.0235
,
-
0.9743
,
1.4463
,
-
0.1269
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
1.4750
,
-
2.0235
,
-
0.9743
,
1.4463
,
-
0.1269
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
)
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
...
@@ -953,7 +983,9 @@ class ReformerIntegrationTests(unittest.TestCase):
...
@@ -953,7 +983,9 @@ class ReformerIntegrationTests(unittest.TestCase):
hidden_states
=
model
(
input_ids
=
input_ids
,
attention_mask
=
attn_mask
)[
0
]
hidden_states
=
model
(
input_ids
=
input_ids
,
attention_mask
=
attn_mask
)[
0
]
output_slice
=
hidden_states
[
0
,
0
,
:
5
]
output_slice
=
hidden_states
[
0
,
0
,
:
5
]
expected_output_slice
=
torch
.
tensor
(
expected_output_slice
=
torch
.
tensor
(
[
-
0.9896
,
-
0.9396
,
-
1.0831
,
-
0.0597
,
0.2456
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
-
0.9896
,
-
0.9396
,
-
1.0831
,
-
0.0597
,
0.2456
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
)
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
...
@@ -967,7 +999,9 @@ class ReformerIntegrationTests(unittest.TestCase):
...
@@ -967,7 +999,9 @@ class ReformerIntegrationTests(unittest.TestCase):
hidden_states
=
model
(
input_ids
=
input_ids
,
attention_mask
=
attn_mask
)[
0
]
hidden_states
=
model
(
input_ids
=
input_ids
,
attention_mask
=
attn_mask
)[
0
]
output_slice
=
hidden_states
[
0
,
0
,
:
5
]
output_slice
=
hidden_states
[
0
,
0
,
:
5
]
expected_output_slice
=
torch
.
tensor
(
expected_output_slice
=
torch
.
tensor
(
[
-
1.6791
,
0.7171
,
0.1594
,
0.4063
,
1.2584
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
-
1.6791
,
0.7171
,
0.1594
,
0.4063
,
1.2584
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
)
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
...
@@ -983,7 +1017,9 @@ class ReformerIntegrationTests(unittest.TestCase):
...
@@ -983,7 +1017,9 @@ class ReformerIntegrationTests(unittest.TestCase):
hidden_states
=
model
(
input_ids
=
input_ids
,
attention_mask
=
attn_mask
)[
0
]
hidden_states
=
model
(
input_ids
=
input_ids
,
attention_mask
=
attn_mask
)[
0
]
output_slice
=
hidden_states
[
1
,
-
1
,
:
5
]
output_slice
=
hidden_states
[
1
,
-
1
,
:
5
]
expected_output_slice
=
torch
.
tensor
(
expected_output_slice
=
torch
.
tensor
(
[
0.0256
,
-
0.0121
,
0.0636
,
0.0024
,
-
0.0393
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
0.0256
,
-
0.0121
,
0.0636
,
0.0024
,
-
0.0393
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
)
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
output_slice
,
expected_output_slice
,
atol
=
1e-3
))
...
@@ -1005,15 +1041,21 @@ class ReformerIntegrationTests(unittest.TestCase):
...
@@ -1005,15 +1041,21 @@ class ReformerIntegrationTests(unittest.TestCase):
# check last grads to cover all proable errors
# check last grads to cover all proable errors
grad_slice_word
=
model
.
reformer
.
embeddings
.
word_embeddings
.
weight
.
grad
[
0
,
:
5
]
grad_slice_word
=
model
.
reformer
.
embeddings
.
word_embeddings
.
weight
.
grad
[
0
,
:
5
]
expected_grad_slice_word
=
torch
.
tensor
(
expected_grad_slice_word
=
torch
.
tensor
(
[
-
0.0005
,
0.0001
,
0.0002
,
0.0003
,
0.0006
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
-
0.0005
,
0.0001
,
0.0002
,
0.0003
,
0.0006
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
)
grad_slice_position_factor_1
=
model
.
reformer
.
embeddings
.
position_embeddings
.
weights
[
0
][
1
,
0
,
-
5
:]
grad_slice_position_factor_1
=
model
.
reformer
.
embeddings
.
position_embeddings
.
weights
[
0
][
1
,
0
,
-
5
:]
expected_grad_slice_pos_fac_1
=
torch
.
tensor
(
expected_grad_slice_pos_fac_1
=
torch
.
tensor
(
[
0.0037
,
-
1.3793
,
-
1.0231
,
-
1.5230
,
-
2.5306
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
0.0037
,
-
1.3793
,
-
1.0231
,
-
1.5230
,
-
2.5306
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
)
grad_slice_position_factor_2
=
model
.
reformer
.
embeddings
.
position_embeddings
.
weights
[
1
][
0
,
1
,
:
5
]
grad_slice_position_factor_2
=
model
.
reformer
.
embeddings
.
position_embeddings
.
weights
[
1
][
0
,
1
,
:
5
]
expected_grad_slice_pos_fac_2
=
torch
.
tensor
(
expected_grad_slice_pos_fac_2
=
torch
.
tensor
(
[
-
1.3165
,
0.5168
,
0.7785
,
1.0811
,
-
0.9830
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
-
1.3165
,
0.5168
,
0.7785
,
1.0811
,
-
0.9830
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
)
self
.
assertTrue
(
torch
.
allclose
(
grad_slice_word
,
expected_grad_slice_word
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
grad_slice_word
,
expected_grad_slice_word
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
grad_slice_position_factor_1
,
expected_grad_slice_pos_fac_1
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
grad_slice_position_factor_1
,
expected_grad_slice_pos_fac_1
,
atol
=
1e-3
))
...
@@ -1038,15 +1080,21 @@ class ReformerIntegrationTests(unittest.TestCase):
...
@@ -1038,15 +1080,21 @@ class ReformerIntegrationTests(unittest.TestCase):
# check last grads to cover all proable errors
# check last grads to cover all proable errors
grad_slice_word
=
model
.
reformer
.
embeddings
.
word_embeddings
.
weight
.
grad
[
0
,
:
5
]
grad_slice_word
=
model
.
reformer
.
embeddings
.
word_embeddings
.
weight
.
grad
[
0
,
:
5
]
expected_grad_slice_word
=
torch
.
tensor
(
expected_grad_slice_word
=
torch
.
tensor
(
[
2.6357e-05
,
4.3358e-04
,
-
8.4985e-04
,
1.0094e-04
,
3.8954e-04
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
2.6357e-05
,
4.3358e-04
,
-
8.4985e-04
,
1.0094e-04
,
3.8954e-04
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
)
grad_slice_position_factor_1
=
model
.
reformer
.
embeddings
.
position_embeddings
.
weights
[
0
][
1
,
0
,
-
5
:]
grad_slice_position_factor_1
=
model
.
reformer
.
embeddings
.
position_embeddings
.
weights
[
0
][
1
,
0
,
-
5
:]
expected_grad_slice_pos_fac_1
=
torch
.
tensor
(
expected_grad_slice_pos_fac_1
=
torch
.
tensor
(
[
-
0.0984
,
0.6283
,
0.4282
,
1.2960
,
0.6897
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
-
0.0984
,
0.6283
,
0.4282
,
1.2960
,
0.6897
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
)
grad_slice_position_factor_2
=
model
.
reformer
.
embeddings
.
position_embeddings
.
weights
[
1
][
0
,
1
,
:
5
]
grad_slice_position_factor_2
=
model
.
reformer
.
embeddings
.
position_embeddings
.
weights
[
1
][
0
,
1
,
:
5
]
expected_grad_slice_pos_fac_2
=
torch
.
tensor
(
expected_grad_slice_pos_fac_2
=
torch
.
tensor
(
[
0.4626
,
-
0.0231
,
-
0.0172
,
0.1081
,
0.3805
],
dtype
=
torch
.
float
,
device
=
torch_device
,
[
0.4626
,
-
0.0231
,
-
0.0172
,
0.1081
,
0.3805
],
dtype
=
torch
.
float
,
device
=
torch_device
,
)
)
self
.
assertTrue
(
torch
.
allclose
(
grad_slice_word
,
expected_grad_slice_word
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
grad_slice_word
,
expected_grad_slice_word
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
grad_slice_position_factor_1
,
expected_grad_slice_pos_fac_1
,
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
grad_slice_position_factor_1
,
expected_grad_slice_pos_fac_1
,
atol
=
1e-3
))
...
...
tests/test_modeling_roberta.py
View file @
a75c64d8
...
@@ -45,7 +45,8 @@ if is_torch_available():
...
@@ -45,7 +45,8 @@ if is_torch_available():
class
RobertaModelTester
:
class
RobertaModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
@@ -352,7 +353,7 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
...
@@ -352,7 +353,7 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
def
test_create_position_ids_respects_padding_index
(
self
):
def
test_create_position_ids_respects_padding_index
(
self
):
"""
Ensure that the default position ids only assign a sequential . This is a regression
"""Ensure that the default position ids only assign a sequential . This is a regression
test for https://github.com/huggingface/transformers/issues/1761
test for https://github.com/huggingface/transformers/issues/1761
The position ids should be masked with the embedding object's padding index. Therefore, the
The position ids should be masked with the embedding object's padding index. Therefore, the
...
@@ -371,7 +372,7 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
...
@@ -371,7 +372,7 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
self
.
assertTrue
(
torch
.
all
(
torch
.
eq
(
position_ids
,
expected_positions
)))
self
.
assertTrue
(
torch
.
all
(
torch
.
eq
(
position_ids
,
expected_positions
)))
def
test_create_position_ids_from_inputs_embeds
(
self
):
def
test_create_position_ids_from_inputs_embeds
(
self
):
"""
Ensure that the default position ids only assign a sequential . This is a regression
"""Ensure that the default position ids only assign a sequential . This is a regression
test for https://github.com/huggingface/transformers/issues/1761
test for https://github.com/huggingface/transformers/issues/1761
The position ids should be masked with the embedding object's padding index. Therefore, the
The position ids should be masked with the embedding object's padding index. Therefore, the
...
...
tests/test_modeling_t5.py
View file @
a75c64d8
...
@@ -101,7 +101,13 @@ class T5ModelTester:
...
@@ -101,7 +101,13 @@ class T5ModelTester:
)
)
def
check_prepare_lm_labels_via_shift_left
(
def
check_prepare_lm_labels_via_shift_left
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
):
model
=
T5Model
(
config
=
config
)
model
=
T5Model
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
...
@@ -134,7 +140,13 @@ class T5ModelTester:
...
@@ -134,7 +140,13 @@ class T5ModelTester:
self
.
parent
.
assertListEqual
(
decoder_input_ids_slice
[
1
:].
tolist
(),
lm_labels_slice
[:
-
1
].
tolist
())
self
.
parent
.
assertListEqual
(
decoder_input_ids_slice
[
1
:].
tolist
(),
lm_labels_slice
[:
-
1
].
tolist
())
def
create_and_check_model
(
def
create_and_check_model
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
):
model
=
T5Model
(
config
=
config
)
model
=
T5Model
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
...
@@ -160,7 +172,13 @@ class T5ModelTester:
...
@@ -160,7 +172,13 @@ class T5ModelTester:
self
.
parent
.
assertEqual
(
len
(
decoder_past
[
1
][
0
]),
4
)
self
.
parent
.
assertEqual
(
len
(
decoder_past
[
1
][
0
]),
4
)
def
create_and_check_with_lm_head
(
def
create_and_check_with_lm_head
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
):
model
=
T5ForConditionalGeneration
(
config
=
config
).
to
(
torch_device
).
eval
()
model
=
T5ForConditionalGeneration
(
config
=
config
).
to
(
torch_device
).
eval
()
outputs
=
model
(
outputs
=
model
(
...
@@ -174,7 +192,13 @@ class T5ModelTester:
...
@@ -174,7 +192,13 @@ class T5ModelTester:
self
.
parent
.
assertEqual
(
outputs
[
"loss"
].
size
(),
())
self
.
parent
.
assertEqual
(
outputs
[
"loss"
].
size
(),
())
def
create_and_check_decoder_model_past
(
def
create_and_check_decoder_model_past
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
):
model
=
T5Model
(
config
=
config
).
get_decoder
().
to
(
torch_device
).
eval
()
model
=
T5Model
(
config
=
config
).
get_decoder
().
to
(
torch_device
).
eval
()
# first forward pass
# first forward pass
...
@@ -205,7 +229,13 @@ class T5ModelTester:
...
@@ -205,7 +229,13 @@ class T5ModelTester:
self
.
parent
.
assertTrue
(
torch
.
allclose
(
output_from_past_slice
,
output_from_no_past_slice
,
atol
=
1e-3
))
self
.
parent
.
assertTrue
(
torch
.
allclose
(
output_from_past_slice
,
output_from_no_past_slice
,
atol
=
1e-3
))
def
create_and_check_decoder_model_attention_mask_past
(
def
create_and_check_decoder_model_attention_mask_past
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
):
model
=
T5Model
(
config
=
config
).
get_decoder
()
model
=
T5Model
(
config
=
config
).
get_decoder
()
model
.
to
(
torch_device
)
model
.
to
(
torch_device
)
...
@@ -231,7 +261,8 @@ class T5ModelTester:
...
@@ -231,7 +261,8 @@ class T5ModelTester:
# append to next input_ids and attn_mask
# append to next input_ids and attn_mask
next_input_ids
=
torch
.
cat
([
input_ids
,
next_tokens
],
dim
=-
1
)
next_input_ids
=
torch
.
cat
([
input_ids
,
next_tokens
],
dim
=-
1
)
attn_mask
=
torch
.
cat
(
attn_mask
=
torch
.
cat
(
[
attn_mask
,
torch
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
torch
.
long
,
device
=
torch_device
)],
dim
=
1
,
[
attn_mask
,
torch
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
torch
.
long
,
device
=
torch_device
)],
dim
=
1
,
)
)
# get two different outputs
# get two different outputs
...
@@ -249,7 +280,13 @@ class T5ModelTester:
...
@@ -249,7 +280,13 @@ class T5ModelTester:
self
.
parent
.
assertTrue
(
torch
.
allclose
(
output_from_past_slice
,
output_from_no_past_slice
,
atol
=
1e-3
))
self
.
parent
.
assertTrue
(
torch
.
allclose
(
output_from_past_slice
,
output_from_no_past_slice
,
atol
=
1e-3
))
def
create_and_check_generate_with_past_key_value_states
(
def
create_and_check_generate_with_past_key_value_states
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
):
model
=
T5ForConditionalGeneration
(
config
=
config
).
to
(
torch_device
).
eval
()
model
=
T5ForConditionalGeneration
(
config
=
config
).
to
(
torch_device
).
eval
()
torch
.
manual_seed
(
0
)
torch
.
manual_seed
(
0
)
...
@@ -261,14 +298,26 @@ class T5ModelTester:
...
@@ -261,14 +298,26 @@ class T5ModelTester:
self
.
parent
.
assertTrue
(
torch
.
all
(
output_with_past_cache
==
output_without_past_cache
))
self
.
parent
.
assertTrue
(
torch
.
all
(
output_with_past_cache
==
output_without_past_cache
))
def
create_and_check_model_fp16_forward
(
def
create_and_check_model_fp16_forward
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
):
model
=
T5Model
(
config
=
config
).
to
(
torch_device
).
half
().
eval
()
model
=
T5Model
(
config
=
config
).
to
(
torch_device
).
half
().
eval
()
output
=
model
(
input_ids
,
decoder_input_ids
=
input_ids
,
attention_mask
=
attention_mask
)[
"last_hidden_state"
]
output
=
model
(
input_ids
,
decoder_input_ids
=
input_ids
,
attention_mask
=
attention_mask
)[
"last_hidden_state"
]
self
.
parent
.
assertFalse
(
torch
.
isnan
(
output
).
any
().
item
())
self
.
parent
.
assertFalse
(
torch
.
isnan
(
output
).
any
().
item
())
def
create_and_check_encoder_decoder_shared_weights
(
def
create_and_check_encoder_decoder_shared_weights
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
):
for
model_class
in
[
T5Model
,
T5ForConditionalGeneration
]:
for
model_class
in
[
T5Model
,
T5ForConditionalGeneration
]:
torch
.
manual_seed
(
0
)
torch
.
manual_seed
(
0
)
...
@@ -339,7 +388,14 @@ class T5ModelTester:
...
@@ -339,7 +388,14 @@ class T5ModelTester:
def
prepare_config_and_inputs_for_common
(
self
):
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,)
=
config_and_inputs
(
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
)
=
config_and_inputs
inputs_dict
=
{
inputs_dict
=
{
"input_ids"
:
input_ids
,
"input_ids"
:
input_ids
,
...
@@ -412,7 +468,11 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase):
...
@@ -412,7 +468,11 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase):
model
=
T5Model
(
config_and_inputs
[
0
]).
to
(
torch_device
)
model
=
T5Model
(
config_and_inputs
[
0
]).
to
(
torch_device
)
with
tempfile
.
TemporaryDirectory
()
as
tmpdirname
:
with
tempfile
.
TemporaryDirectory
()
as
tmpdirname
:
torch
.
onnx
.
export
(
torch
.
onnx
.
export
(
model
,
config_and_inputs
[
1
],
f
"
{
tmpdirname
}
/t5_test.onnx"
,
export_params
=
True
,
opset_version
=
9
,
model
,
config_and_inputs
[
1
],
f
"
{
tmpdirname
}
/t5_test.onnx"
,
export_params
=
True
,
opset_version
=
9
,
)
)
...
@@ -469,7 +529,8 @@ class T5ModelIntegrationTests(unittest.TestCase):
...
@@ -469,7 +529,8 @@ class T5ModelIntegrationTests(unittest.TestCase):
)
)
decoded
=
tok
.
batch_decode
(
hypotheses_batch
,
skip_special_tokens
=
True
,
clean_up_tokenization_spaces
=
False
)
decoded
=
tok
.
batch_decode
(
hypotheses_batch
,
skip_special_tokens
=
True
,
clean_up_tokenization_spaces
=
False
)
self
.
assertListEqual
(
self
.
assertListEqual
(
expected_summaries
,
decoded
,
expected_summaries
,
decoded
,
)
)
@
slow
@
slow
...
...
tests/test_modeling_tf_camembert.py
View file @
a75c64d8
...
@@ -33,7 +33,8 @@ class TFCamembertModelIntegrationTest(unittest.TestCase):
...
@@ -33,7 +33,8 @@ class TFCamembertModelIntegrationTest(unittest.TestCase):
model
=
TFCamembertModel
.
from_pretrained
(
"jplu/tf-camembert-base"
)
model
=
TFCamembertModel
.
from_pretrained
(
"jplu/tf-camembert-base"
)
input_ids
=
tf
.
convert_to_tensor
(
input_ids
=
tf
.
convert_to_tensor
(
[[
5
,
121
,
11
,
660
,
16
,
730
,
25543
,
110
,
83
,
6
]],
dtype
=
tf
.
int32
,
[[
5
,
121
,
11
,
660
,
16
,
730
,
25543
,
110
,
83
,
6
]],
dtype
=
tf
.
int32
,
)
# J'aime le camembert !"
)
# J'aime le camembert !"
output
=
model
(
input_ids
)[
"last_hidden_state"
]
output
=
model
(
input_ids
)[
"last_hidden_state"
]
...
@@ -41,7 +42,8 @@ class TFCamembertModelIntegrationTest(unittest.TestCase):
...
@@ -41,7 +42,8 @@ class TFCamembertModelIntegrationTest(unittest.TestCase):
self
.
assertEqual
(
output
.
shape
,
expected_shape
)
self
.
assertEqual
(
output
.
shape
,
expected_shape
)
# compare the actual values for a slice.
# compare the actual values for a slice.
expected_slice
=
tf
.
convert_to_tensor
(
expected_slice
=
tf
.
convert_to_tensor
(
[[[
-
0.0254
,
0.0235
,
0.1027
],
[
0.0606
,
-
0.1811
,
-
0.0418
],
[
-
0.1561
,
-
0.1127
,
0.2687
]]],
dtype
=
tf
.
float32
,
[[[
-
0.0254
,
0.0235
,
0.1027
],
[
0.0606
,
-
0.1811
,
-
0.0418
],
[
-
0.1561
,
-
0.1127
,
0.2687
]]],
dtype
=
tf
.
float32
,
)
)
# camembert = torch.hub.load('pytorch/fairseq', 'camembert.v0')
# camembert = torch.hub.load('pytorch/fairseq', 'camembert.v0')
# camembert.eval()
# camembert.eval()
...
...
tests/test_modeling_tf_common.py
View file @
a75c64d8
...
@@ -155,7 +155,8 @@ class TFModelTesterMixin:
...
@@ -155,7 +155,8 @@ class TFModelTesterMixin:
self
.
assertEqual
(
len
(
outputs
),
num_out
)
self
.
assertEqual
(
len
(
outputs
),
num_out
)
self
.
assertEqual
(
len
(
hidden_states
),
self
.
model_tester
.
num_hidden_layers
+
1
)
self
.
assertEqual
(
len
(
hidden_states
),
self
.
model_tester
.
num_hidden_layers
+
1
)
self
.
assertListEqual
(
self
.
assertListEqual
(
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
],
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
],
)
)
@
slow
@
slow
...
@@ -486,7 +487,8 @@ class TFModelTesterMixin:
...
@@ -486,7 +487,8 @@ class TFModelTesterMixin:
hidden_states
=
[
t
.
numpy
()
for
t
in
outputs
[
-
1
]]
hidden_states
=
[
t
.
numpy
()
for
t
in
outputs
[
-
1
]]
self
.
assertEqual
(
len
(
hidden_states
),
self
.
model_tester
.
num_hidden_layers
+
1
)
self
.
assertEqual
(
len
(
hidden_states
),
self
.
model_tester
.
num_hidden_layers
+
1
)
self
.
assertListEqual
(
self
.
assertListEqual
(
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
],
list
(
hidden_states
[
0
].
shape
[
-
2
:]),
[
self
.
model_tester
.
seq_length
,
self
.
model_tester
.
hidden_size
],
)
)
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
...
@@ -591,9 +593,15 @@ class TFModelTesterMixin:
...
@@ -591,9 +593,15 @@ class TFModelTesterMixin:
x
=
wte
([
input_ids
,
None
,
None
,
None
],
mode
=
"embedding"
)
x
=
wte
([
input_ids
,
None
,
None
,
None
],
mode
=
"embedding"
)
except
Exception
:
except
Exception
:
if
hasattr
(
self
.
model_tester
,
"embedding_size"
):
if
hasattr
(
self
.
model_tester
,
"embedding_size"
):
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
embedding_size
],
dtype
=
tf
.
dtypes
.
float32
,)
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
embedding_size
],
dtype
=
tf
.
dtypes
.
float32
,
)
else
:
else
:
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
hidden_size
],
dtype
=
tf
.
dtypes
.
float32
,)
x
=
tf
.
ones
(
input_ids
.
shape
+
[
self
.
model_tester
.
hidden_size
],
dtype
=
tf
.
dtypes
.
float32
,
)
return
x
return
x
def
test_inputs_embeds
(
self
):
def
test_inputs_embeds
(
self
):
...
@@ -700,7 +708,14 @@ class TFModelTesterMixin:
...
@@ -700,7 +708,14 @@ class TFModelTesterMixin:
model
.
generate
(
input_ids
,
do_sample
=
False
,
num_return_sequences
=
3
,
num_beams
=
2
)
model
.
generate
(
input_ids
,
do_sample
=
False
,
num_return_sequences
=
3
,
num_beams
=
2
)
# num_return_sequences > 1, sample
# num_return_sequences > 1, sample
self
.
_check_generated_ids
(
model
.
generate
(
input_ids
,
do_sample
=
True
,
num_beams
=
2
,
num_return_sequences
=
2
,))
self
.
_check_generated_ids
(
model
.
generate
(
input_ids
,
do_sample
=
True
,
num_beams
=
2
,
num_return_sequences
=
2
,
)
)
# num_return_sequences > 1, greedy
# num_return_sequences > 1, greedy
self
.
_check_generated_ids
(
model
.
generate
(
input_ids
,
do_sample
=
False
,
num_beams
=
2
,
num_return_sequences
=
2
))
self
.
_check_generated_ids
(
model
.
generate
(
input_ids
,
do_sample
=
False
,
num_beams
=
2
,
num_return_sequences
=
2
))
...
@@ -895,7 +910,8 @@ class UtilsFunctionsTest(unittest.TestCase):
...
@@ -895,7 +910,8 @@ class UtilsFunctionsTest(unittest.TestCase):
)
)
non_inf_expected_idx
=
tf
.
convert_to_tensor
(
non_inf_expected_idx
=
tf
.
convert_to_tensor
(
[[
0
,
0
],
[
0
,
9
],
[
0
,
10
],
[
0
,
25
],
[
0
,
26
],
[
1
,
13
],
[
1
,
17
],
[
1
,
18
],
[
1
,
20
],
[
1
,
27
]],
dtype
=
tf
.
int32
,
[[
0
,
0
],
[
0
,
9
],
[
0
,
10
],
[
0
,
25
],
[
0
,
26
],
[
1
,
13
],
[
1
,
17
],
[
1
,
18
],
[
1
,
20
],
[
1
,
27
]],
dtype
=
tf
.
int32
,
)
# expected non filtered idx as noted above
)
# expected non filtered idx as noted above
non_inf_expected_output
=
tf
.
convert_to_tensor
(
non_inf_expected_output
=
tf
.
convert_to_tensor
(
...
@@ -907,7 +923,8 @@ class UtilsFunctionsTest(unittest.TestCase):
...
@@ -907,7 +923,8 @@ class UtilsFunctionsTest(unittest.TestCase):
non_inf_output
=
output
[
output
!=
-
float
(
"inf"
)]
non_inf_output
=
output
[
output
!=
-
float
(
"inf"
)]
non_inf_idx
=
tf
.
cast
(
non_inf_idx
=
tf
.
cast
(
tf
.
where
(
tf
.
not_equal
(
output
,
tf
.
constant
(
-
float
(
"inf"
),
dtype
=
tf
.
float32
))),
dtype
=
tf
.
int32
,
tf
.
where
(
tf
.
not_equal
(
output
,
tf
.
constant
(
-
float
(
"inf"
),
dtype
=
tf
.
float32
))),
dtype
=
tf
.
int32
,
)
)
tf
.
debugging
.
assert_near
(
non_inf_output
,
non_inf_expected_output
,
rtol
=
1e-12
)
tf
.
debugging
.
assert_near
(
non_inf_output
,
non_inf_expected_output
,
rtol
=
1e-12
)
...
...
tests/test_modeling_tf_ctrl.py
View file @
a75c64d8
...
@@ -31,7 +31,8 @@ if is_tf_available():
...
@@ -31,7 +31,8 @@ if is_tf_available():
class
TFCTRLModelTester
(
object
):
class
TFCTRLModelTester
(
object
):
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
...
tests/test_modeling_tf_distilbert.py
View file @
a75c64d8
...
@@ -39,7 +39,8 @@ if is_tf_available():
...
@@ -39,7 +39,8 @@ if is_tf_available():
class
TFDistilBertModelTester
:
class
TFDistilBertModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
...
tests/test_modeling_tf_electra.py
View file @
a75c64d8
...
@@ -39,7 +39,8 @@ if is_tf_available():
...
@@ -39,7 +39,8 @@ if is_tf_available():
class
TFElectraModelTester
:
class
TFElectraModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
...
tests/test_modeling_tf_flaubert.py
View file @
a75c64d8
...
@@ -40,7 +40,8 @@ if is_tf_available():
...
@@ -40,7 +40,8 @@ if is_tf_available():
class
TFFlaubertModelTester
:
class
TFFlaubertModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
@@ -337,7 +338,8 @@ class TFFlaubertModelIntegrationTest(unittest.TestCase):
...
@@ -337,7 +338,8 @@ class TFFlaubertModelIntegrationTest(unittest.TestCase):
model
=
TFFlaubertModel
.
from_pretrained
(
"jplu/tf-flaubert-small-cased"
)
model
=
TFFlaubertModel
.
from_pretrained
(
"jplu/tf-flaubert-small-cased"
)
input_ids
=
tf
.
convert_to_tensor
(
input_ids
=
tf
.
convert_to_tensor
(
[[
0
,
158
,
735
,
2592
,
1424
,
6727
,
82
,
1
]],
dtype
=
tf
.
int32
,
[[
0
,
158
,
735
,
2592
,
1424
,
6727
,
82
,
1
]],
dtype
=
tf
.
int32
,
)
# "J'aime flaubert !"
)
# "J'aime flaubert !"
output
=
model
(
input_ids
)[
0
]
output
=
model
(
input_ids
)[
0
]
...
...
tests/test_modeling_tf_gpt2.py
View file @
a75c64d8
...
@@ -37,7 +37,8 @@ if is_tf_available():
...
@@ -37,7 +37,8 @@ if is_tf_available():
class
TFGPT2ModelTester
:
class
TFGPT2ModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
...
tests/test_modeling_tf_longformer.py
View file @
a75c64d8
...
@@ -36,7 +36,7 @@ if is_tf_available():
...
@@ -36,7 +36,7 @@ if is_tf_available():
def
shape_list
(
x
):
def
shape_list
(
x
):
"""
"""
copied from transformers.modeling_tf_utils
copied from transformers.modeling_tf_utils
"""
"""
static
=
x
.
shape
.
as_list
()
static
=
x
.
shape
.
as_list
()
dynamic
=
tf
.
shape
(
x
)
dynamic
=
tf
.
shape
(
x
)
...
@@ -45,7 +45,8 @@ if is_tf_available():
...
@@ -45,7 +45,8 @@ if is_tf_available():
class
TFLongformerModelTester
:
class
TFLongformerModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
@@ -228,7 +229,8 @@ class TFLongformerModelTester:
...
@@ -228,7 +229,8 @@ class TFLongformerModelTester:
# global attention mask has to be partly defined
# global attention mask has to be partly defined
# to trace all weights
# to trace all weights
global_attention_mask
=
tf
.
concat
(
global_attention_mask
=
tf
.
concat
(
[
tf
.
zeros_like
(
input_ids
)[:,
:
-
1
],
tf
.
ones_like
(
input_ids
)[:,
-
1
:]],
axis
=-
1
,
[
tf
.
zeros_like
(
input_ids
)[:,
:
-
1
],
tf
.
ones_like
(
input_ids
)[:,
-
1
:]],
axis
=-
1
,
)
)
inputs_dict
=
{
inputs_dict
=
{
...
@@ -267,7 +269,13 @@ class TFLongformerModelTest(TFModelTesterMixin, unittest.TestCase):
...
@@ -267,7 +269,13 @@ class TFLongformerModelTest(TFModelTesterMixin, unittest.TestCase):
test_torchscript
=
False
test_torchscript
=
False
all_model_classes
=
(
all_model_classes
=
(
(
TFLongformerModel
,
TFLongformerForMaskedLM
,
TFLongformerForQuestionAnswering
,)
if
is_tf_available
()
else
()
(
TFLongformerModel
,
TFLongformerForMaskedLM
,
TFLongformerForQuestionAnswering
,
)
if
is_tf_available
()
else
()
)
)
def
setUp
(
self
):
def
setUp
(
self
):
...
...
tests/test_modeling_tf_openai.py
View file @
a75c64d8
...
@@ -36,7 +36,8 @@ if is_tf_available():
...
@@ -36,7 +36,8 @@ if is_tf_available():
class
TFOpenAIGPTModelTester
:
class
TFOpenAIGPTModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
...
tests/test_modeling_tf_roberta.py
View file @
a75c64d8
...
@@ -40,7 +40,8 @@ if is_tf_available():
...
@@ -40,7 +40,8 @@ if is_tf_available():
class
TFRobertaModelTester
:
class
TFRobertaModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
...
tests/test_modeling_tf_t5.py
View file @
a75c64d8
...
@@ -32,7 +32,8 @@ if is_tf_available():
...
@@ -32,7 +32,8 @@ if is_tf_available():
class
TFT5ModelTester
:
class
TFT5ModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
@@ -181,7 +182,10 @@ class TFT5ModelTester:
...
@@ -181,7 +182,10 @@ class TFT5ModelTester:
# append to next input_ids and attn_mask
# append to next input_ids and attn_mask
next_input_ids
=
tf
.
concat
([
input_ids
,
next_tokens
],
axis
=-
1
)
next_input_ids
=
tf
.
concat
([
input_ids
,
next_tokens
],
axis
=-
1
)
attn_mask
=
tf
.
concat
([
attn_mask
,
tf
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
tf
.
int32
)],
axis
=
1
,)
attn_mask
=
tf
.
concat
(
[
attn_mask
,
tf
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
tf
.
int32
)],
axis
=
1
,
)
# get two different outputs
# get two different outputs
output_from_no_past
=
model
(
next_input_ids
,
attention_mask
=
attn_mask
)[
0
]
output_from_no_past
=
model
(
next_input_ids
,
attention_mask
=
attn_mask
)[
0
]
...
...
tests/test_modeling_tf_transfo_xl.py
View file @
a75c64d8
...
@@ -32,7 +32,8 @@ if is_tf_available():
...
@@ -32,7 +32,8 @@ if is_tf_available():
class
TFTransfoXLModelTester
:
class
TFTransfoXLModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
...
tests/test_modeling_tf_xlm.py
View file @
a75c64d8
...
@@ -40,7 +40,8 @@ if is_tf_available():
...
@@ -40,7 +40,8 @@ if is_tf_available():
class
TFXLMModelTester
:
class
TFXLMModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
...
tests/test_modeling_tf_xlnet.py
View file @
a75c64d8
...
@@ -40,7 +40,8 @@ if is_tf_available():
...
@@ -40,7 +40,8 @@ if is_tf_available():
class
TFXLNetModelTester
:
class
TFXLNetModelTester
:
def
__init__
(
def
__init__
(
self
,
parent
,
self
,
parent
,
):
):
self
.
parent
=
parent
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
batch_size
=
13
...
...
Prev
1
…
5
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment