Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
c852036b
Unverified
Commit
c852036b
authored
Jun 16, 2020
by
Amil Khare
Committed by
GitHub
Jun 16, 2020
Browse files
[cleanup] Hoist ModelTester objects to top level (#4939)
Co-authored-by:
Sam Shleifer
<
sshleifer@gmail.com
>
parent
0c55a384
Changes
25
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3360 additions
and
3738 deletions
+3360
-3738
tests/test_modeling_albert.py
tests/test_modeling_albert.py
+221
-249
tests/test_modeling_ctrl.py
tests/test_modeling_ctrl.py
+135
-157
tests/test_modeling_distilbert.py
tests/test_modeling_distilbert.py
+22
-22
tests/test_modeling_electra.py
tests/test_modeling_electra.py
+243
-268
tests/test_modeling_flaubert.py
tests/test_modeling_flaubert.py
+267
-300
tests/test_modeling_gpt2.py
tests/test_modeling_gpt2.py
+264
-264
tests/test_modeling_longformer.py
tests/test_modeling_longformer.py
+24
-47
tests/test_modeling_openai.py
tests/test_modeling_openai.py
+134
-154
tests/test_modeling_roberta.py
tests/test_modeling_roberta.py
+178
-203
tests/test_modeling_t5.py
tests/test_modeling_t5.py
+263
-295
tests/test_modeling_tf_albert.py
tests/test_modeling_tf_albert.py
+181
-180
tests/test_modeling_tf_bert.py
tests/test_modeling_tf_bert.py
+216
-217
tests/test_modeling_tf_ctrl.py
tests/test_modeling_tf_ctrl.py
+128
-150
tests/test_modeling_tf_distilbert.py
tests/test_modeling_tf_distilbert.py
+123
-144
tests/test_modeling_tf_electra.py
tests/test_modeling_tf_electra.py
+133
-156
tests/test_modeling_tf_gpt2.py
tests/test_modeling_tf_gpt2.py
+233
-255
tests/test_modeling_tf_openai_gpt.py
tests/test_modeling_tf_openai_gpt.py
+150
-172
tests/test_modeling_tf_roberta.py
tests/test_modeling_tf_roberta.py
+134
-157
tests/test_modeling_tf_t5.py
tests/test_modeling_tf_t5.py
+181
-200
tests/test_modeling_tf_transfo_xl.py
tests/test_modeling_tf_transfo_xl.py
+130
-148
No files found.
tests/test_modeling_albert.py
View file @
c852036b
...
...
@@ -37,6 +37,226 @@ if is_torch_available():
from
transformers.modeling_albert
import
ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST
class
AlbertModelTester
:
def
__init__
(
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_input_mask
=
True
self
.
use_token_type_ids
=
True
self
.
use_labels
=
True
self
.
vocab_size
=
99
self
.
embedding_size
=
16
self
.
hidden_size
=
36
self
.
num_hidden_layers
=
6
self
.
num_hidden_groups
=
6
self
.
num_attention_heads
=
6
self
.
intermediate_size
=
37
self
.
hidden_act
=
"gelu"
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0.1
self
.
max_position_embeddings
=
512
self
.
type_vocab_size
=
16
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
scope
=
None
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
AlbertConfig
(
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
intermediate_size
=
self
.
intermediate_size
,
hidden_act
=
self
.
hidden_act
,
hidden_dropout_prob
=
self
.
hidden_dropout_prob
,
attention_probs_dropout_prob
=
self
.
attention_probs_dropout_prob
,
max_position_embeddings
=
self
.
max_position_embeddings
,
type_vocab_size
=
self
.
type_vocab_size
,
initializer_range
=
self
.
initializer_range
,
num_hidden_groups
=
self
.
num_hidden_groups
,
)
return
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_albert_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
AlbertModel
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
sequence_output
,
pooled_output
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
)
sequence_output
,
pooled_output
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
sequence_output
,
pooled_output
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
,
"pooled_output"
:
pooled_output
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"pooled_output"
].
size
()),
[
self
.
batch_size
,
self
.
hidden_size
])
def
create_and_check_albert_for_pretraining
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
AlbertForPreTraining
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
prediction_scores
,
sop_scores
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
,
sentence_order_label
=
sequence_labels
,
)
result
=
{
"loss"
:
loss
,
"prediction_scores"
:
prediction_scores
,
"sop_scores"
:
sop_scores
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"sop_scores"
].
size
()),
[
self
.
batch_size
,
config
.
num_labels
])
self
.
check_loss_output
(
result
)
def
create_and_check_albert_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
AlbertForMaskedLM
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
prediction_scores
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
result
=
{
"loss"
:
loss
,
"prediction_scores"
:
prediction_scores
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
check_loss_output
(
result
)
def
create_and_check_albert_for_question_answering
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
AlbertForQuestionAnswering
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
start_logits
,
end_logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,
)
result
=
{
"loss"
:
loss
,
"start_logits"
:
start_logits
,
"end_logits"
:
end_logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
check_loss_output
(
result
)
def
create_and_check_albert_for_sequence_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
AlbertForSequenceClassification
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
sequence_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
num_labels
])
self
.
check_loss_output
(
result
)
def
create_and_check_albert_for_token_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
AlbertForTokenClassification
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
num_labels
])
self
.
check_loss_output
(
result
)
def
create_and_check_albert_for_multiple_choice
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_choices
=
self
.
num_choices
model
=
AlbertForMultipleChoice
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
multiple_choice_inputs_ids
=
input_ids
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
multiple_choice_token_type_ids
=
token_type_ids
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
multiple_choice_input_mask
=
input_mask
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
loss
,
logits
=
model
(
multiple_choice_inputs_ids
,
attention_mask
=
multiple_choice_input_mask
,
token_type_ids
=
multiple_choice_token_type_ids
,
labels
=
choice_labels
,
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
num_choices
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
@
require_torch
class
AlbertModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
...
...
@@ -54,256 +274,8 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
else
()
)
class
AlbertModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_mask
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
embedding_size
=
16
,
hidden_size
=
36
,
num_hidden_layers
=
6
,
num_hidden_groups
=
6
,
num_attention_heads
=
6
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_input_mask
=
use_input_mask
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
embedding_size
=
embedding_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
hidden_act
=
hidden_act
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scope
self
.
num_hidden_groups
=
num_hidden_groups
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
AlbertConfig
(
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
intermediate_size
=
self
.
intermediate_size
,
hidden_act
=
self
.
hidden_act
,
hidden_dropout_prob
=
self
.
hidden_dropout_prob
,
attention_probs_dropout_prob
=
self
.
attention_probs_dropout_prob
,
max_position_embeddings
=
self
.
max_position_embeddings
,
type_vocab_size
=
self
.
type_vocab_size
,
initializer_range
=
self
.
initializer_range
,
num_hidden_groups
=
self
.
num_hidden_groups
,
)
return
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_albert_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
AlbertModel
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
sequence_output
,
pooled_output
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
)
sequence_output
,
pooled_output
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
sequence_output
,
pooled_output
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
,
"pooled_output"
:
pooled_output
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"pooled_output"
].
size
()),
[
self
.
batch_size
,
self
.
hidden_size
])
def
create_and_check_albert_for_pretraining
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
AlbertForPreTraining
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
prediction_scores
,
sop_scores
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
,
sentence_order_label
=
sequence_labels
,
)
result
=
{
"loss"
:
loss
,
"prediction_scores"
:
prediction_scores
,
"sop_scores"
:
sop_scores
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"sop_scores"
].
size
()),
[
self
.
batch_size
,
config
.
num_labels
])
self
.
check_loss_output
(
result
)
def
create_and_check_albert_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
AlbertForMaskedLM
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
prediction_scores
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
result
=
{
"loss"
:
loss
,
"prediction_scores"
:
prediction_scores
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
check_loss_output
(
result
)
def
create_and_check_albert_for_question_answering
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
AlbertForQuestionAnswering
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
start_logits
,
end_logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,
)
result
=
{
"loss"
:
loss
,
"start_logits"
:
start_logits
,
"end_logits"
:
end_logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
check_loss_output
(
result
)
def
create_and_check_albert_for_sequence_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
AlbertForSequenceClassification
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
sequence_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
num_labels
])
self
.
check_loss_output
(
result
)
def
create_and_check_albert_for_token_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
AlbertForTokenClassification
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
num_labels
]
)
self
.
check_loss_output
(
result
)
def
create_and_check_albert_for_multiple_choice
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_choices
=
self
.
num_choices
model
=
AlbertForMultipleChoice
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
multiple_choice_inputs_ids
=
input_ids
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
multiple_choice_token_type_ids
=
token_type_ids
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
multiple_choice_input_mask
=
input_mask
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
loss
,
logits
=
model
(
multiple_choice_inputs_ids
,
attention_mask
=
multiple_choice_input_mask
,
token_type_ids
=
multiple_choice_token_type_ids
,
labels
=
choice_labels
,
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
num_choices
])
self
.
check_loss_output
(
result
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
AlbertModelTest
.
AlbertModelTester
(
self
)
self
.
model_tester
=
AlbertModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
AlbertConfig
,
hidden_size
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_ctrl.py
View file @
c852036b
...
...
@@ -27,6 +27,140 @@ if is_torch_available():
from
transformers
import
CTRLConfig
,
CTRLModel
,
CTRL_PRETRAINED_MODEL_ARCHIVE_LIST
,
CTRLLMHeadModel
class
CTRLModelTester
:
def
__init__
(
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
14
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_token_type_ids
=
True
self
.
use_input_mask
=
True
self
.
use_labels
=
True
self
.
use_mc_token_ids
=
True
self
.
vocab_size
=
99
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
intermediate_size
=
37
self
.
hidden_act
=
"gelu"
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0.1
self
.
max_position_embeddings
=
512
self
.
type_vocab_size
=
16
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
scope
=
None
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
mc_token_ids
=
None
if
self
.
use_mc_token_ids
:
mc_token_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
num_choices
],
self
.
seq_length
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
CTRLConfig
(
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
# intermediate_size=self.intermediate_size,
# hidden_act=self.hidden_act,
# hidden_dropout_prob=self.hidden_dropout_prob,
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
n_positions
=
self
.
max_position_embeddings
,
n_ctx
=
self
.
max_position_embeddings
# type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range
)
head_mask
=
ids_tensor
([
self
.
num_hidden_layers
,
self
.
num_attention_heads
],
2
)
return
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_ctrl_model
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
CTRLModel
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
head_mask
=
head_mask
)
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
sequence_output
,
presents
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
,
"presents"
:
presents
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertEqual
(
len
(
result
[
"presents"
]),
config
.
n_layer
)
def
create_and_check_lm_head_model
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
CTRLLMHeadModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
lm_logits
,
_
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
labels
=
input_ids
)
result
=
{
"loss"
:
loss
,
"lm_logits"
:
lm_logits
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"head_mask"
:
head_mask
}
return
config
,
inputs_dict
@
require_torch
class
CTRLModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
...
...
@@ -37,164 +171,8 @@ class CTRLModelTest(ModelTesterMixin, unittest.TestCase):
test_resize_embeddings
=
False
test_head_masking
=
False
class
CTRLModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
14
,
seq_length
=
7
,
is_training
=
True
,
use_token_type_ids
=
True
,
use_input_mask
=
True
,
use_labels
=
True
,
use_mc_token_ids
=
True
,
vocab_size
=
99
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_input_mask
=
use_input_mask
self
.
use_labels
=
use_labels
self
.
use_mc_token_ids
=
use_mc_token_ids
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
hidden_act
=
hidden_act
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
mc_token_ids
=
None
if
self
.
use_mc_token_ids
:
mc_token_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
num_choices
],
self
.
seq_length
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
CTRLConfig
(
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
# intermediate_size=self.intermediate_size,
# hidden_act=self.hidden_act,
# hidden_dropout_prob=self.hidden_dropout_prob,
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
n_positions
=
self
.
max_position_embeddings
,
n_ctx
=
self
.
max_position_embeddings
# type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range
)
head_mask
=
ids_tensor
([
self
.
num_hidden_layers
,
self
.
num_attention_heads
],
2
)
return
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_ctrl_model
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
CTRLModel
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
head_mask
=
head_mask
)
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
sequence_output
,
presents
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
,
"presents"
:
presents
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertEqual
(
len
(
result
[
"presents"
]),
config
.
n_layer
)
def
create_and_check_lm_head_model
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
CTRLLMHeadModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
lm_logits
,
_
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
labels
=
input_ids
)
result
=
{
"loss"
:
loss
,
"lm_logits"
:
lm_logits
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"head_mask"
:
head_mask
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
CTRLModelTest
.
CTRLModelTester
(
self
)
self
.
model_tester
=
CTRLModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
CTRLConfig
,
n_embd
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_distilbert.py
View file @
c852036b
...
...
@@ -34,27 +34,6 @@ if is_torch_available():
DistilBertForSequenceClassification
,
)
@
require_torch
class
DistilBertModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
(
DistilBertModel
,
DistilBertForMaskedLM
,
DistilBertForMultipleChoice
,
DistilBertForQuestionAnswering
,
DistilBertForSequenceClassification
,
DistilBertForTokenClassification
,
)
if
is_torch_available
()
else
None
)
test_pruning
=
True
test_torchscript
=
True
test_resize_embeddings
=
True
test_head_masking
=
True
class
DistilBertModelTester
(
object
):
def
__init__
(
self
,
...
...
@@ -245,8 +224,29 @@ class DistilBertModelTest(ModelTesterMixin, unittest.TestCase):
inputs_dict
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
@
require_torch
class
DistilBertModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
(
DistilBertModel
,
DistilBertForMaskedLM
,
DistilBertForMultipleChoice
,
DistilBertForQuestionAnswering
,
DistilBertForSequenceClassification
,
DistilBertForTokenClassification
,
)
if
is_torch_available
()
else
None
)
test_pruning
=
True
test_torchscript
=
True
test_resize_embeddings
=
True
test_head_masking
=
True
def
setUp
(
self
):
self
.
model_tester
=
DistilBertModelTest
.
DistilBertModelTester
(
self
)
self
.
model_tester
=
DistilBertModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
DistilBertConfig
,
dim
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_electra.py
View file @
c852036b
...
...
@@ -36,200 +36,69 @@ if is_torch_available():
from
transformers.modeling_electra
import
ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST
@
require_torch
class
ElectraModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
(
ElectraModel
,
ElectraForPreTraining
,
ElectraForMaskedLM
,
ElectraForTokenClassification
,
ElectraForSequenceClassification
,
ElectraForQuestionAnswering
,
class
ElectraModelTester
:
def
__init__
(
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_input_mask
=
True
self
.
use_token_type_ids
=
True
self
.
use_labels
=
True
self
.
vocab_size
=
99
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
intermediate_size
=
37
self
.
hidden_act
=
"gelu"
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0.1
self
.
max_position_embeddings
=
512
self
.
type_vocab_size
=
16
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
scope
=
None
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
fake_token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
1
)
config
=
ElectraConfig
(
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
intermediate_size
=
self
.
intermediate_size
,
hidden_act
=
self
.
hidden_act
,
hidden_dropout_prob
=
self
.
hidden_dropout_prob
,
attention_probs_dropout_prob
=
self
.
attention_probs_dropout_prob
,
max_position_embeddings
=
self
.
max_position_embeddings
,
type_vocab_size
=
self
.
type_vocab_size
,
is_decoder
=
False
,
initializer_range
=
self
.
initializer_range
,
)
if
is_torch_available
()
else
()
)
class
ElectraModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_mask
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_input_mask
=
use_input_mask
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
hidden_act
=
hidden_act
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
fake_token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
1
)
config
=
ElectraConfig
(
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
intermediate_size
=
self
.
intermediate_size
,
hidden_act
=
self
.
hidden_act
,
hidden_dropout_prob
=
self
.
hidden_dropout_prob
,
attention_probs_dropout_prob
=
self
.
attention_probs_dropout_prob
,
max_position_embeddings
=
self
.
max_position_embeddings
,
type_vocab_size
=
self
.
type_vocab_size
,
is_decoder
=
False
,
initializer_range
=
self
.
initializer_range
,
)
return
(
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
fake_token_labels
,
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_electra_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
fake_token_labels
,
):
model
=
ElectraModel
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
(
sequence_output
,)
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
)
(
sequence_output
,)
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
(
sequence_output
,)
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
def
create_and_check_electra_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
fake_token_labels
,
):
model
=
ElectraForMaskedLM
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
prediction_scores
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
result
=
{
"loss"
:
loss
,
"prediction_scores"
:
prediction_scores
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
check_loss_output
(
result
)
def
create_and_check_electra_for_token_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
fake_token_labels
,
):
config
.
num_labels
=
self
.
num_labels
model
=
ElectraForTokenClassification
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
num_labels
]
)
self
.
check_loss_output
(
result
)
def
create_and_check_electra_for_pretraining
(
self
,
return
(
config
,
input_ids
,
token_type_ids
,
...
...
@@ -238,48 +107,168 @@ class ElectraModelTest(ModelTesterMixin, unittest.TestCase):
token_labels
,
choice_labels
,
fake_token_labels
,
):
config
.
num_labels
=
self
.
num_labels
model
=
ElectraForPreTraining
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
fake_token_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
check_loss_output
(
result
)
def
create_and_check_electra_for_sequence_classification
(
self
,
config
,
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_electra_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
fake_token_labels
,
):
model
=
ElectraModel
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
(
sequence_output
,)
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
)
(
sequence_output
,)
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
(
sequence_output
,)
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
def
create_and_check_electra_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
fake_token_labels
,
):
model
=
ElectraForMaskedLM
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
prediction_scores
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
result
=
{
"loss"
:
loss
,
"prediction_scores"
:
prediction_scores
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
check_loss_output
(
result
)
def
create_and_check_electra_for_token_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
fake_token_labels
,
):
config
.
num_labels
=
self
.
num_labels
model
=
ElectraForTokenClassification
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
num_labels
])
self
.
check_loss_output
(
result
)
def
create_and_check_electra_for_pretraining
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
fake_token_labels
,
):
config
.
num_labels
=
self
.
num_labels
model
=
ElectraForPreTraining
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
fake_token_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
check_loss_output
(
result
)
def
create_and_check_electra_for_sequence_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
fake_token_labels
,
):
config
.
num_labels
=
self
.
num_labels
model
=
ElectraForSequenceClassification
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
sequence_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
num_labels
])
self
.
check_loss_output
(
result
)
def
create_and_check_electra_for_question_answering
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
fake_token_labels
,
):
model
=
ElectraForQuestionAnswering
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
start_logits
,
end_logits
=
model
(
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
fake_token_labels
,
):
config
.
num_labels
=
self
.
num_labels
model
=
ElectraForSequenceClassification
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
sequence_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
num_labels
])
self
.
check_loss_output
(
result
)
def
create_and_check_electra_for_question_answering
(
self
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,
)
result
=
{
"loss"
:
loss
,
"start_logits"
:
start_logits
,
"end_logits"
:
end_logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
check_loss_output
(
result
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
...
...
@@ -288,43 +277,29 @@ class ElectraModelTest(ModelTesterMixin, unittest.TestCase):
token_labels
,
choice_labels
,
fake_token_labels
,
):
model
=
ElectraForQuestionAnswering
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
start_logits
,
end_logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,
)
result
=
{
"loss"
:
loss
,
"start_logits"
:
start_logits
,
"end_logits"
:
end_logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
check_loss_output
(
result
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
fake_token_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
@
require_torch
class
ElectraModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
(
ElectraModel
,
ElectraForPreTraining
,
ElectraForMaskedLM
,
ElectraForTokenClassification
,
ElectraForSequenceClassification
,
ElectraForQuestionAnswering
,
)
if
is_torch_available
()
else
()
)
def
setUp
(
self
):
self
.
model_tester
=
ElectraModelTest
.
ElectraModelTester
(
self
)
self
.
model_tester
=
ElectraModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
ElectraConfig
,
hidden_size
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_flaubert.py
View file @
c852036b
...
...
@@ -35,140 +35,81 @@ if is_torch_available():
from
transformers.modeling_flaubert
import
FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST
@
require_torch
class
FlaubertModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
(
FlaubertModel
,
FlaubertWithLMHeadModel
,
FlaubertForQuestionAnswering
,
FlaubertForQuestionAnsweringSimple
,
FlaubertForSequenceClassification
,
class
FlaubertModelTester
(
object
):
def
__init__
(
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_input_lengths
=
True
self
.
use_token_type_ids
=
True
self
.
use_labels
=
True
self
.
gelu_activation
=
True
self
.
sinusoidal_embeddings
=
False
self
.
causal
=
False
self
.
asm
=
False
self
.
n_langs
=
2
self
.
vocab_size
=
99
self
.
n_special
=
0
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0.1
self
.
max_position_embeddings
=
512
self
.
type_vocab_size
=
12
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
summary_type
=
"last"
self
.
use_proj
=
None
self
.
scope
=
None
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
2
).
float
()
input_lengths
=
None
if
self
.
use_input_lengths
:
input_lengths
=
(
ids_tensor
([
self
.
batch_size
],
vocab_size
=
2
)
+
self
.
seq_length
-
2
)
# small variation of seq_length
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
n_langs
)
sequence_labels
=
None
token_labels
=
None
is_impossible_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
is_impossible_labels
=
ids_tensor
([
self
.
batch_size
],
2
).
float
()
config
=
FlaubertConfig
(
vocab_size
=
self
.
vocab_size
,
n_special
=
self
.
n_special
,
emb_dim
=
self
.
hidden_size
,
n_layers
=
self
.
num_hidden_layers
,
n_heads
=
self
.
num_attention_heads
,
dropout
=
self
.
hidden_dropout_prob
,
attention_dropout
=
self
.
attention_probs_dropout_prob
,
gelu_activation
=
self
.
gelu_activation
,
sinusoidal_embeddings
=
self
.
sinusoidal_embeddings
,
asm
=
self
.
asm
,
causal
=
self
.
causal
,
n_langs
=
self
.
n_langs
,
max_position_embeddings
=
self
.
max_position_embeddings
,
initializer_range
=
self
.
initializer_range
,
summary_type
=
self
.
summary_type
,
use_proj
=
self
.
use_proj
,
)
if
is_torch_available
()
else
()
)
class
FlaubertModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_lengths
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
gelu_activation
=
True
,
sinusoidal_embeddings
=
False
,
causal
=
False
,
asm
=
False
,
n_langs
=
2
,
vocab_size
=
99
,
n_special
=
0
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
summary_type
=
"last"
,
use_proj
=
True
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_input_lengths
=
use_input_lengths
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_labels
=
use_labels
self
.
gelu_activation
=
gelu_activation
self
.
sinusoidal_embeddings
=
sinusoidal_embeddings
self
.
asm
=
asm
self
.
n_langs
=
n_langs
self
.
vocab_size
=
vocab_size
self
.
n_special
=
n_special
self
.
summary_type
=
summary_type
self
.
causal
=
causal
self
.
use_proj
=
use_proj
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
n_langs
=
n_langs
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
summary_type
=
summary_type
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
2
).
float
()
input_lengths
=
None
if
self
.
use_input_lengths
:
input_lengths
=
(
ids_tensor
([
self
.
batch_size
],
vocab_size
=
2
)
+
self
.
seq_length
-
2
)
# small variation of seq_length
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
n_langs
)
sequence_labels
=
None
token_labels
=
None
is_impossible_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
is_impossible_labels
=
ids_tensor
([
self
.
batch_size
],
2
).
float
()
config
=
FlaubertConfig
(
vocab_size
=
self
.
vocab_size
,
n_special
=
self
.
n_special
,
emb_dim
=
self
.
hidden_size
,
n_layers
=
self
.
num_hidden_layers
,
n_heads
=
self
.
num_attention_heads
,
dropout
=
self
.
hidden_dropout_prob
,
attention_dropout
=
self
.
attention_probs_dropout_prob
,
gelu_activation
=
self
.
gelu_activation
,
sinusoidal_embeddings
=
self
.
sinusoidal_embeddings
,
asm
=
self
.
asm
,
causal
=
self
.
causal
,
n_langs
=
self
.
n_langs
,
max_position_embeddings
=
self
.
max_position_embeddings
,
initializer_range
=
self
.
initializer_range
,
summary_type
=
self
.
summary_type
,
use_proj
=
self
.
use_proj
,
)
return
(
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_flaubert_model
(
self
,
return
(
config
,
input_ids
,
token_type_ids
,
...
...
@@ -177,146 +118,185 @@ class FlaubertModelTest(ModelTesterMixin, unittest.TestCase):
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
FlaubertModel
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
,
lengths
=
input_lengths
,
langs
=
token_type_ids
)
outputs
=
model
(
input_ids
,
langs
=
token_type_ids
)
outputs
=
model
(
input_ids
)
sequence_output
=
outputs
[
0
]
result
=
{
"sequence_output"
:
sequence_output
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
def
create_and_check_flaubert_lm_head
(
self
,
config
,
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_flaubert_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
FlaubertModel
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
,
lengths
=
input_lengths
,
langs
=
token_type_ids
)
outputs
=
model
(
input_ids
,
langs
=
token_type_ids
)
outputs
=
model
(
input_ids
)
sequence_output
=
outputs
[
0
]
result
=
{
"sequence_output"
:
sequence_output
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
def
create_and_check_flaubert_lm_head
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
FlaubertWithLMHeadModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
])
def
create_and_check_flaubert_simple_qa
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
FlaubertForQuestionAnsweringSimple
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
)
outputs
=
model
(
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
)
loss
,
start_logits
,
end_logits
=
outputs
result
=
{
"loss"
:
loss
,
"start_logits"
:
start_logits
,
"end_logits"
:
end_logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
check_loss_output
(
result
)
def
create_and_check_flaubert_qa
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
FlaubertForQuestionAnswering
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
)
start_top_log_probs
,
start_top_index
,
end_top_log_probs
,
end_top_index
,
cls_logits
=
outputs
outputs
=
model
(
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
FlaubertWithLMHeadModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,
cls_index
=
sequence_labels
,
is_impossible
=
is_impossible_labels
,
p_mask
=
input_mask
,
)
loss
,
logits
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
outputs
=
model
(
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,
cls_index
=
sequence_labels
,
is_impossible
=
is_impossible_labels
,
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
(
total_loss
,)
=
outputs
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
outputs
=
model
(
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
)
def
create_and_check_flaubert_simple_qa
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
FlaubertForQuestionAnsweringSimple
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
)
outputs
=
model
(
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
)
loss
,
start_logits
,
end_logits
=
outputs
result
=
{
"loss"
:
loss
,
"start_logits"
:
start_logits
,
"end_logits"
:
end_logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
check_loss_output
(
result
)
def
create_and_check_flaubert_qa
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
FlaubertForQuestionAnswering
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
)
start_top_log_probs
,
start_top_index
,
end_top_log_probs
,
end_top_index
,
cls_logits
=
outputs
outputs
=
model
(
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,
cls_index
=
sequence_labels
,
is_impossible
=
is_impossible_labels
,
p_mask
=
input_mask
,
)
outputs
=
model
(
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,
cls_index
=
sequence_labels
,
is_impossible
=
is_impossible_labels
,
)
(
total_loss
,)
=
outputs
outputs
=
model
(
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
)
(
total_loss
,)
=
outputs
result
=
{
"loss"
:
total_loss
,
"start_top_log_probs"
:
start_top_log_probs
,
"start_top_index"
:
start_top_index
,
"end_top_log_probs"
:
end_top_log_probs
,
"end_top_index"
:
end_top_index
,
"cls_logits"
:
cls_logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_top_log_probs"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_top_index"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_top_log_probs"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
*
model
.
config
.
end_n_top
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_top_index"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
*
model
.
config
.
end_n_top
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"cls_logits"
].
size
()),
[
self
.
batch_size
])
def
create_and_check_flaubert_sequence_classif
(
self
,
(
total_loss
,)
=
outputs
result
=
{
"loss"
:
total_loss
,
"start_top_log_probs"
:
start_top_log_probs
,
"start_top_index"
:
start_top_index
,
"end_top_log_probs"
:
end_top_log_probs
,
"end_top_index"
:
end_top_index
,
"cls_logits"
:
cls_logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_top_log_probs"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_top_index"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_top_log_probs"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
*
model
.
config
.
end_n_top
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_top_index"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
*
model
.
config
.
end_n_top
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"cls_logits"
].
size
()),
[
self
.
batch_size
])
def
create_and_check_flaubert_sequence_classif
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
FlaubertForSequenceClassification
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
(
logits
,)
=
model
(
input_ids
)
loss
,
logits
=
model
(
input_ids
,
labels
=
sequence_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
type_sequence_label_size
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
...
...
@@ -325,41 +305,28 @@ class FlaubertModelTest(ModelTesterMixin, unittest.TestCase):
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
FlaubertForSequenceClassification
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
(
logits
,)
=
model
(
input_ids
)
loss
,
logits
=
model
(
input_ids
,
labels
=
sequence_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
type_sequence_label_size
]
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"lengths"
:
input_lengths
}
return
config
,
inputs_dict
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"lengths"
:
input_lengths
}
return
config
,
inputs_dict
@
require_torch
class
FlaubertModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
(
FlaubertModel
,
FlaubertWithLMHeadModel
,
FlaubertForQuestionAnswering
,
FlaubertForQuestionAnsweringSimple
,
FlaubertForSequenceClassification
,
)
if
is_torch_available
()
else
()
)
def
setUp
(
self
):
self
.
model_tester
=
FlaubertModelTest
.
FlaubertModelTester
(
self
)
self
.
model_tester
=
FlaubertModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
FlaubertConfig
,
emb_dim
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_gpt2.py
View file @
c852036b
...
...
@@ -34,6 +34,269 @@ if is_torch_available():
)
class
GPT2ModelTester
:
def
__init__
(
self
,
parent
,
batch_size
=
14
,
seq_length
=
7
,
is_training
=
True
,
use_token_type_ids
=
True
,
use_input_mask
=
True
,
use_labels
=
True
,
use_mc_token_ids
=
True
,
vocab_size
=
99
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
14
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_token_type_ids
=
True
self
.
use_input_mask
=
True
self
.
use_labels
=
True
self
.
use_mc_token_ids
=
True
self
.
vocab_size
=
99
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
intermediate_size
=
37
self
.
hidden_act
=
"gelu"
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0
,
1
self
.
max_position_embeddings
=
512
self
.
type_vocab_size
=
16
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
scope
=
None
self
.
bos_token_id
=
vocab_size
-
1
self
.
eos_token_id
=
vocab_size
-
1
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
mc_token_ids
=
None
if
self
.
use_mc_token_ids
:
mc_token_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
num_choices
],
self
.
seq_length
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
GPT2Config
(
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
# intermediate_size=self.intermediate_size,
# hidden_act=self.hidden_act,
# hidden_dropout_prob=self.hidden_dropout_prob,
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
n_positions
=
self
.
max_position_embeddings
,
n_ctx
=
self
.
max_position_embeddings
,
# type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range
bos_token_id
=
self
.
bos_token_id
,
eos_token_id
=
self
.
eos_token_id
,
)
head_mask
=
ids_tensor
([
self
.
num_hidden_layers
,
self
.
num_attention_heads
],
2
)
return
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_gpt2_model
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
GPT2Model
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
head_mask
=
head_mask
)
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
sequence_output
,
presents
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
,
"presents"
:
presents
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
],
)
self
.
parent
.
assertEqual
(
len
(
result
[
"presents"
]),
config
.
n_layer
)
def
create_and_check_gpt2_model_past
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
GPT2Model
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
# first forward pass
output
,
past
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
# create hypothetical next token and extent to next_input_ids
next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
)
next_token_types
=
ids_tensor
([
self
.
batch_size
,
1
],
self
.
type_vocab_size
)
# append to next input_ids and token_type_ids
next_input_ids
=
torch
.
cat
([
input_ids
,
next_tokens
],
dim
=-
1
)
next_token_type_ids
=
torch
.
cat
([
token_type_ids
,
next_token_types
],
dim
=-
1
)
output_from_no_past
,
_
=
model
(
next_input_ids
,
token_type_ids
=
next_token_type_ids
)
output_from_past
,
_
=
model
(
next_tokens
,
token_type_ids
=
next_token_types
,
past
=
past
)
# select random slice
random_slice_idx
=
ids_tensor
((
1
,),
output_from_past
.
shape
[
-
1
]).
item
()
output_from_no_past_slice
=
output_from_no_past
[:,
-
1
,
random_slice_idx
].
detach
()
output_from_past_slice
=
output_from_past
[:,
0
,
random_slice_idx
].
detach
()
# test that outputs are equal for slice
self
.
parent
.
assertTrue
(
torch
.
allclose
(
output_from_past_slice
,
output_from_no_past_slice
,
atol
=
1e-3
))
def
create_and_check_gpt2_model_attention_mask_past
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
GPT2Model
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
# create attention mask
attn_mask
=
torch
.
ones
(
input_ids
.
shape
,
dtype
=
torch
.
long
,
device
=
torch_device
)
half_seq_length
=
self
.
seq_length
//
2
attn_mask
[:,
half_seq_length
:]
=
0
# first forward pass
output
,
past
=
model
(
input_ids
,
attention_mask
=
attn_mask
)
# create hypothetical next token and extent to next_input_ids
next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
)
# change a random masked slice from input_ids
random_seq_idx_to_change
=
ids_tensor
((
1
,),
half_seq_length
).
item
()
+
1
random_other_next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
).
squeeze
(
-
1
)
input_ids
[:,
-
random_seq_idx_to_change
]
=
random_other_next_tokens
# append to next input_ids and attn_mask
next_input_ids
=
torch
.
cat
([
input_ids
,
next_tokens
],
dim
=-
1
)
attn_mask
=
torch
.
cat
(
[
attn_mask
,
torch
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
torch
.
long
,
device
=
torch_device
)],
dim
=
1
,
)
# get two different outputs
output_from_no_past
,
_
=
model
(
next_input_ids
,
attention_mask
=
attn_mask
)
output_from_past
,
_
=
model
(
next_tokens
,
past
=
past
,
attention_mask
=
attn_mask
)
# select random slice
random_slice_idx
=
ids_tensor
((
1
,),
output_from_past
.
shape
[
-
1
]).
item
()
output_from_no_past_slice
=
output_from_no_past
[:,
-
1
,
random_slice_idx
].
detach
()
output_from_past_slice
=
output_from_past
[:,
0
,
random_slice_idx
].
detach
()
# test that outputs are equal for slice
self
.
parent
.
assertTrue
(
torch
.
allclose
(
output_from_past_slice
,
output_from_no_past_slice
,
atol
=
1e-3
))
def
create_and_check_lm_head_model
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
GPT2LMHeadModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
lm_logits
,
_
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
labels
=
input_ids
)
result
=
{
"loss"
:
loss
,
"lm_logits"
:
lm_logits
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
],
)
def
create_and_check_double_lm_head_model
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
*
args
):
model
=
GPT2DoubleHeadsModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
multiple_choice_inputs_ids
=
input_ids
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
multiple_choice_input_mask
=
input_mask
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
multiple_choice_token_type_ids
=
token_type_ids
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
inputs
=
{
"input_ids"
:
multiple_choice_inputs_ids
,
"mc_token_ids"
:
mc_token_ids
,
"attention_mask"
:
multiple_choice_input_mask
,
"token_type_ids"
:
multiple_choice_token_type_ids
,
"labels"
:
multiple_choice_inputs_ids
,
}
loss
,
lm_logits
,
mc_logits
,
_
=
model
(
**
inputs
)
result
=
{
"loss"
:
loss
,
"lm_logits"
:
lm_logits
,
"mc_logits"
:
mc_logits
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits"
].
size
()),
[
self
.
batch_size
,
self
.
num_choices
,
self
.
seq_length
,
self
.
vocab_size
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"mc_logits"
].
size
()),
[
self
.
batch_size
,
self
.
num_choices
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"head_mask"
:
head_mask
,
}
return
config
,
inputs_dict
@
require_torch
class
GPT2ModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
...
...
@@ -42,271 +305,8 @@ class GPT2ModelTest(ModelTesterMixin, unittest.TestCase):
(
GPT2LMHeadModel
,)
if
is_torch_available
()
else
()
)
# TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
class
GPT2ModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
14
,
seq_length
=
7
,
is_training
=
True
,
use_token_type_ids
=
True
,
use_input_mask
=
True
,
use_labels
=
True
,
use_mc_token_ids
=
True
,
vocab_size
=
99
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_input_mask
=
use_input_mask
self
.
use_labels
=
use_labels
self
.
use_mc_token_ids
=
use_mc_token_ids
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
hidden_act
=
hidden_act
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scope
self
.
bos_token_id
=
vocab_size
-
1
self
.
eos_token_id
=
vocab_size
-
1
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
mc_token_ids
=
None
if
self
.
use_mc_token_ids
:
mc_token_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
num_choices
],
self
.
seq_length
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
GPT2Config
(
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
# intermediate_size=self.intermediate_size,
# hidden_act=self.hidden_act,
# hidden_dropout_prob=self.hidden_dropout_prob,
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
n_positions
=
self
.
max_position_embeddings
,
n_ctx
=
self
.
max_position_embeddings
,
# type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range
bos_token_id
=
self
.
bos_token_id
,
eos_token_id
=
self
.
eos_token_id
,
)
head_mask
=
ids_tensor
([
self
.
num_hidden_layers
,
self
.
num_attention_heads
],
2
)
return
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_gpt2_model
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
GPT2Model
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
head_mask
=
head_mask
)
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
sequence_output
,
presents
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
,
"presents"
:
presents
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
],
)
self
.
parent
.
assertEqual
(
len
(
result
[
"presents"
]),
config
.
n_layer
)
def
create_and_check_gpt2_model_past
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
GPT2Model
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
# first forward pass
output
,
past
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
# create hypothetical next token and extent to next_input_ids
next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
)
next_token_types
=
ids_tensor
([
self
.
batch_size
,
1
],
self
.
type_vocab_size
)
# append to next input_ids and token_type_ids
next_input_ids
=
torch
.
cat
([
input_ids
,
next_tokens
],
dim
=-
1
)
next_token_type_ids
=
torch
.
cat
([
token_type_ids
,
next_token_types
],
dim
=-
1
)
output_from_no_past
,
_
=
model
(
next_input_ids
,
token_type_ids
=
next_token_type_ids
)
output_from_past
,
_
=
model
(
next_tokens
,
token_type_ids
=
next_token_types
,
past
=
past
)
# select random slice
random_slice_idx
=
ids_tensor
((
1
,),
output_from_past
.
shape
[
-
1
]).
item
()
output_from_no_past_slice
=
output_from_no_past
[:,
-
1
,
random_slice_idx
].
detach
()
output_from_past_slice
=
output_from_past
[:,
0
,
random_slice_idx
].
detach
()
# test that outputs are equal for slice
self
.
parent
.
assertTrue
(
torch
.
allclose
(
output_from_past_slice
,
output_from_no_past_slice
,
atol
=
1e-3
))
def
create_and_check_gpt2_model_attention_mask_past
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
GPT2Model
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
# create attention mask
attn_mask
=
torch
.
ones
(
input_ids
.
shape
,
dtype
=
torch
.
long
,
device
=
torch_device
)
half_seq_length
=
self
.
seq_length
//
2
attn_mask
[:,
half_seq_length
:]
=
0
# first forward pass
output
,
past
=
model
(
input_ids
,
attention_mask
=
attn_mask
)
# create hypothetical next token and extent to next_input_ids
next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
)
# change a random masked slice from input_ids
random_seq_idx_to_change
=
ids_tensor
((
1
,),
half_seq_length
).
item
()
+
1
random_other_next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
).
squeeze
(
-
1
)
input_ids
[:,
-
random_seq_idx_to_change
]
=
random_other_next_tokens
# append to next input_ids and attn_mask
next_input_ids
=
torch
.
cat
([
input_ids
,
next_tokens
],
dim
=-
1
)
attn_mask
=
torch
.
cat
(
[
attn_mask
,
torch
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
torch
.
long
,
device
=
torch_device
)],
dim
=
1
,
)
# get two different outputs
output_from_no_past
,
_
=
model
(
next_input_ids
,
attention_mask
=
attn_mask
)
output_from_past
,
_
=
model
(
next_tokens
,
past
=
past
,
attention_mask
=
attn_mask
)
# select random slice
random_slice_idx
=
ids_tensor
((
1
,),
output_from_past
.
shape
[
-
1
]).
item
()
output_from_no_past_slice
=
output_from_no_past
[:,
-
1
,
random_slice_idx
].
detach
()
output_from_past_slice
=
output_from_past
[:,
0
,
random_slice_idx
].
detach
()
# test that outputs are equal for slice
self
.
parent
.
assertTrue
(
torch
.
allclose
(
output_from_past_slice
,
output_from_no_past_slice
,
atol
=
1e-3
))
def
create_and_check_lm_head_model
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
GPT2LMHeadModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
lm_logits
,
_
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
labels
=
input_ids
)
result
=
{
"loss"
:
loss
,
"lm_logits"
:
lm_logits
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
],
)
def
create_and_check_double_lm_head_model
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
*
args
):
model
=
GPT2DoubleHeadsModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
multiple_choice_inputs_ids
=
input_ids
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
multiple_choice_input_mask
=
input_mask
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
multiple_choice_token_type_ids
=
token_type_ids
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
inputs
=
{
"input_ids"
:
multiple_choice_inputs_ids
,
"mc_token_ids"
:
mc_token_ids
,
"attention_mask"
:
multiple_choice_input_mask
,
"token_type_ids"
:
multiple_choice_token_type_ids
,
"labels"
:
multiple_choice_inputs_ids
,
}
loss
,
lm_logits
,
mc_logits
,
_
=
model
(
**
inputs
)
result
=
{
"loss"
:
loss
,
"lm_logits"
:
lm_logits
,
"mc_logits"
:
mc_logits
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits"
].
size
()),
[
self
.
batch_size
,
self
.
num_choices
,
self
.
seq_length
,
self
.
vocab_size
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"mc_logits"
].
size
()),
[
self
.
batch_size
,
self
.
num_choices
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"head_mask"
:
head_mask
,
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
GPT2ModelTest
.
GPT2ModelTester
(
self
)
self
.
model_tester
=
GPT2ModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
GPT2Config
,
n_embd
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_longformer.py
View file @
c852036b
...
...
@@ -36,56 +36,33 @@ if is_torch_available():
)
class
LongformerModelTester
(
object
)
:
class
LongformerModelTester
:
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_mask
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
attention_window
=
4
,
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_input_mask
=
use_input_mask
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
hidden_act
=
hidden_act
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scop
e
self
.
attention_window
=
attention_window
self
.
batch_size
=
13
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_input_mask
=
True
self
.
use_token_type_ids
=
True
self
.
use_labels
=
True
self
.
vocab_size
=
99
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
intermediate_size
=
37
self
.
hidden_act
=
"gelu"
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0.1
self
.
max_position_embeddings
=
512
self
.
type_vocab_size
=
16
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
scope
=
Non
e
self
.
attention_window
=
4
# `ModelTesterMixin.test_attention_outputs` is expecting attention tensors to be of size
# [num_attention_heads, encoder_seq_length, encoder_key_length], but LongformerSelfAttention
...
...
tests/test_modeling_openai.py
View file @
c852036b
...
...
@@ -34,6 +34,139 @@ if is_torch_available():
)
class
OpenAIGPTModelTester
:
def
__init__
(
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_token_type_ids
=
True
self
.
use_labels
=
True
self
.
vocab_size
=
99
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
intermediate_size
=
37
self
.
hidden_act
=
"gelu"
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0.1
self
.
max_position_embeddings
=
512
self
.
type_vocab_size
=
16
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
scope
=
None
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
OpenAIGPTConfig
(
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
# intermediate_size=self.intermediate_size,
# hidden_act=self.hidden_act,
# hidden_dropout_prob=self.hidden_dropout_prob,
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
n_positions
=
self
.
max_position_embeddings
,
n_ctx
=
self
.
max_position_embeddings
# type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range
)
head_mask
=
ids_tensor
([
self
.
num_hidden_layers
,
self
.
num_attention_heads
],
2
)
return
(
config
,
input_ids
,
head_mask
,
token_type_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_openai_gpt_model
(
self
,
config
,
input_ids
,
head_mask
,
token_type_ids
,
*
args
):
model
=
OpenAIGPTModel
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
head_mask
=
head_mask
)
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
(
sequence_output
,)
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
],
)
def
create_and_check_lm_head_model
(
self
,
config
,
input_ids
,
head_mask
,
token_type_ids
,
*
args
):
model
=
OpenAIGPTLMHeadModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
lm_logits
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
labels
=
input_ids
)
result
=
{
"loss"
:
loss
,
"lm_logits"
:
lm_logits
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
],
)
def
create_and_check_double_lm_head_model
(
self
,
config
,
input_ids
,
head_mask
,
token_type_ids
,
*
args
):
model
=
OpenAIGPTDoubleHeadsModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
lm_logits
,
mc_logits
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
labels
=
input_ids
)
result
=
{
"loss"
:
loss
,
"lm_logits"
:
lm_logits
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
],
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
head_mask
,
token_type_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"head_mask"
:
head_mask
,
}
return
config
,
inputs_dict
@
require_torch
class
OpenAIGPTModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
...
...
@@ -44,161 +177,8 @@ class OpenAIGPTModelTest(ModelTesterMixin, unittest.TestCase):
(
OpenAIGPTLMHeadModel
,)
if
is_torch_available
()
else
()
)
# TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
class
OpenAIGPTModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
hidden_act
=
hidden_act
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
OpenAIGPTConfig
(
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
# intermediate_size=self.intermediate_size,
# hidden_act=self.hidden_act,
# hidden_dropout_prob=self.hidden_dropout_prob,
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
n_positions
=
self
.
max_position_embeddings
,
n_ctx
=
self
.
max_position_embeddings
# type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range
)
head_mask
=
ids_tensor
([
self
.
num_hidden_layers
,
self
.
num_attention_heads
],
2
)
return
(
config
,
input_ids
,
head_mask
,
token_type_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_openai_gpt_model
(
self
,
config
,
input_ids
,
head_mask
,
token_type_ids
,
*
args
):
model
=
OpenAIGPTModel
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
head_mask
=
head_mask
)
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
(
sequence_output
,)
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
],
)
def
create_and_check_lm_head_model
(
self
,
config
,
input_ids
,
head_mask
,
token_type_ids
,
*
args
):
model
=
OpenAIGPTLMHeadModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
lm_logits
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
labels
=
input_ids
)
result
=
{
"loss"
:
loss
,
"lm_logits"
:
lm_logits
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
],
)
def
create_and_check_double_lm_head_model
(
self
,
config
,
input_ids
,
head_mask
,
token_type_ids
,
*
args
):
model
=
OpenAIGPTDoubleHeadsModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
lm_logits
,
mc_logits
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
labels
=
input_ids
)
result
=
{
"loss"
:
loss
,
"lm_logits"
:
lm_logits
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
],
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
head_mask
,
token_type_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"head_mask"
:
head_mask
,
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
OpenAIGPTModelTest
.
OpenAIGPTModelTester
(
self
)
self
.
model_tester
=
OpenAIGPTModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
OpenAIGPTConfig
,
n_embd
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_roberta.py
View file @
c852036b
...
...
@@ -39,6 +39,183 @@ if is_torch_available():
from
transformers.modeling_utils
import
create_position_ids_from_input_ids
class
RobertaModelTester
:
def
__init__
(
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_input_mask
=
True
self
.
use_token_type_ids
=
True
self
.
use_labels
=
True
self
.
vocab_size
=
99
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
intermediate_size
=
37
self
.
hidden_act
=
"gelu"
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0.1
self
.
max_position_embeddings
=
512
self
.
type_vocab_size
=
16
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
scope
=
None
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
RobertaConfig
(
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
intermediate_size
=
self
.
intermediate_size
,
hidden_act
=
self
.
hidden_act
,
hidden_dropout_prob
=
self
.
hidden_dropout_prob
,
attention_probs_dropout_prob
=
self
.
attention_probs_dropout_prob
,
max_position_embeddings
=
self
.
max_position_embeddings
,
type_vocab_size
=
self
.
type_vocab_size
,
initializer_range
=
self
.
initializer_range
,
)
return
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_roberta_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
RobertaModel
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
sequence_output
,
pooled_output
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
)
sequence_output
,
pooled_output
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
sequence_output
,
pooled_output
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
,
"pooled_output"
:
pooled_output
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"pooled_output"
].
size
()),
[
self
.
batch_size
,
self
.
hidden_size
])
def
create_and_check_roberta_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
RobertaForMaskedLM
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
prediction_scores
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
result
=
{
"loss"
:
loss
,
"prediction_scores"
:
prediction_scores
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
check_loss_output
(
result
)
def
create_and_check_roberta_for_token_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
RobertaForTokenClassification
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
num_labels
])
self
.
check_loss_output
(
result
)
def
create_and_check_roberta_for_multiple_choice
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_choices
=
self
.
num_choices
model
=
RobertaForMultipleChoice
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
multiple_choice_inputs_ids
=
input_ids
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
multiple_choice_token_type_ids
=
token_type_ids
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
multiple_choice_input_mask
=
input_mask
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
loss
,
logits
=
model
(
multiple_choice_inputs_ids
,
attention_mask
=
multiple_choice_input_mask
,
token_type_ids
=
multiple_choice_token_type_ids
,
labels
=
choice_labels
,
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
num_choices
])
self
.
check_loss_output
(
result
)
def
create_and_check_roberta_for_question_answering
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
RobertaForQuestionAnswering
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
start_logits
,
end_logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,
)
result
=
{
"loss"
:
loss
,
"start_logits"
:
start_logits
,
"end_logits"
:
end_logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
check_loss_output
(
result
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
@
require_torch
class
RobertaModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
...
...
@@ -55,210 +232,8 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
else
()
)
class
RobertaModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_mask
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_input_mask
=
use_input_mask
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
hidden_act
=
hidden_act
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
RobertaConfig
(
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
intermediate_size
=
self
.
intermediate_size
,
hidden_act
=
self
.
hidden_act
,
hidden_dropout_prob
=
self
.
hidden_dropout_prob
,
attention_probs_dropout_prob
=
self
.
attention_probs_dropout_prob
,
max_position_embeddings
=
self
.
max_position_embeddings
,
type_vocab_size
=
self
.
type_vocab_size
,
initializer_range
=
self
.
initializer_range
,
)
return
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_roberta_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
RobertaModel
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
sequence_output
,
pooled_output
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
)
sequence_output
,
pooled_output
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
sequence_output
,
pooled_output
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
,
"pooled_output"
:
pooled_output
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"pooled_output"
].
size
()),
[
self
.
batch_size
,
self
.
hidden_size
])
def
create_and_check_roberta_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
RobertaForMaskedLM
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
prediction_scores
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
result
=
{
"loss"
:
loss
,
"prediction_scores"
:
prediction_scores
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
check_loss_output
(
result
)
def
create_and_check_roberta_for_token_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
RobertaForTokenClassification
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
num_labels
]
)
self
.
check_loss_output
(
result
)
def
create_and_check_roberta_for_multiple_choice
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_choices
=
self
.
num_choices
model
=
RobertaForMultipleChoice
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
multiple_choice_inputs_ids
=
input_ids
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
multiple_choice_token_type_ids
=
token_type_ids
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
multiple_choice_input_mask
=
input_mask
.
unsqueeze
(
1
).
expand
(
-
1
,
self
.
num_choices
,
-
1
).
contiguous
()
loss
,
logits
=
model
(
multiple_choice_inputs_ids
,
attention_mask
=
multiple_choice_input_mask
,
token_type_ids
=
multiple_choice_token_type_ids
,
labels
=
choice_labels
,
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
num_choices
])
self
.
check_loss_output
(
result
)
def
create_and_check_roberta_for_question_answering
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
RobertaForQuestionAnswering
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
start_logits
,
end_logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
token_type_ids
=
token_type_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,
)
result
=
{
"loss"
:
loss
,
"start_logits"
:
start_logits
,
"end_logits"
:
end_logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
check_loss_output
(
result
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
RobertaModelTest
.
RobertaModelTester
(
self
)
self
.
model_tester
=
RobertaModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
RobertaConfig
,
hidden_size
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_t5.py
View file @
c852036b
...
...
@@ -30,6 +30,268 @@ if is_torch_available():
from
transformers.tokenization_t5
import
T5Tokenizer
class
T5ModelTester
:
def
__init__
(
self
,
parent
):
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
encoder_seq_length
=
7
self
.
decoder_seq_length
=
9
self
.
is_training
=
True
self
.
use_attention_mask
=
True
self
.
use_labels
=
True
self
.
vocab_size
=
99
self
.
n_positions
=
14
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
d_ff
=
37
self
.
relative_attention_num_buckets
=
8
self
.
dropout_rate
=
0.1
self
.
initializer_factor
=
0.002
self
.
eos_token_id
=
1
self
.
pad_token_id
=
0
self
.
decoder_start_token_id
=
0
self
.
scope
=
None
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
encoder_seq_length
],
self
.
vocab_size
)
decoder_input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
decoder_seq_length
],
self
.
vocab_size
)
attention_mask
=
None
decoder_attention_mask
=
None
if
self
.
use_attention_mask
:
attention_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
encoder_seq_length
],
vocab_size
=
2
)
decoder_attention_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
decoder_seq_length
],
vocab_size
=
2
)
lm_labels
=
None
if
self
.
use_labels
:
lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
decoder_seq_length
],
self
.
vocab_size
)
config
=
T5Config
(
vocab_size
=
self
.
vocab_size
,
n_positions
=
self
.
n_positions
,
d_model
=
self
.
hidden_size
,
d_ff
=
self
.
d_ff
,
d_kv
=
self
.
hidden_size
//
self
.
num_attention_heads
,
num_layers
=
self
.
num_hidden_layers
,
num_heads
=
self
.
num_attention_heads
,
relative_attention_num_buckets
=
self
.
relative_attention_num_buckets
,
dropout_rate
=
self
.
dropout_rate
,
initializer_factor
=
self
.
initializer_factor
,
eos_token_id
=
self
.
eos_token_id
,
bos_token_id
=
self
.
pad_token_id
,
pad_token_id
=
self
.
pad_token_id
,
decoder_start_token_id
=
self
.
decoder_start_token_id
,
)
return
(
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
check_prepare_lm_labels_via_shift_left
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5Model
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
# make sure that lm_labels are correctly padded from the right
lm_labels
.
masked_fill_
((
lm_labels
==
self
.
decoder_start_token_id
),
self
.
eos_token_id
)
# add casaul pad token mask
triangular_mask
=
torch
.
tril
(
lm_labels
.
new_ones
(
lm_labels
.
shape
)).
logical_not
()
lm_labels
.
masked_fill_
(
triangular_mask
,
self
.
pad_token_id
)
decoder_input_ids
=
model
.
_shift_right
(
lm_labels
)
for
i
,
(
decoder_input_ids_slice
,
lm_labels_slice
)
in
enumerate
(
zip
(
decoder_input_ids
,
lm_labels
)):
# first item
self
.
parent
.
assertEqual
(
decoder_input_ids_slice
[
0
].
item
(),
self
.
decoder_start_token_id
)
if
i
<
decoder_input_ids_slice
.
shape
[
-
1
]:
if
i
<
decoder_input_ids
.
shape
[
-
1
]
-
1
:
# items before diagonal
self
.
parent
.
assertListEqual
(
decoder_input_ids_slice
[
1
:
i
+
1
].
tolist
(),
lm_labels_slice
[:
i
].
tolist
()
)
# pad items after diagonal
if
i
<
decoder_input_ids
.
shape
[
-
1
]
-
2
:
self
.
parent
.
assertListEqual
(
decoder_input_ids_slice
[
i
+
2
:].
tolist
(),
lm_labels_slice
[
i
+
1
:
-
1
].
tolist
()
)
else
:
# all items after square
self
.
parent
.
assertListEqual
(
decoder_input_ids_slice
[
1
:].
tolist
(),
lm_labels_slice
[:
-
1
].
tolist
())
def
create_and_check_t5_model
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5Model
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
decoder_output
,
decoder_past
,
encoder_output
=
model
(
input_ids
=
input_ids
,
decoder_input_ids
=
decoder_input_ids
,
attention_mask
=
attention_mask
,
decoder_attention_mask
=
decoder_attention_mask
,
)
decoder_output
,
decoder_past
,
encoder_output
=
model
(
input_ids
=
input_ids
,
decoder_input_ids
=
decoder_input_ids
)
result
=
{
"encoder_output"
:
encoder_output
,
"decoder_output"
:
decoder_output
,
"decoder_past"
:
decoder_past
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"encoder_output"
].
size
()),
[
self
.
batch_size
,
self
.
encoder_seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"decoder_output"
].
size
()),
[
self
.
batch_size
,
self
.
decoder_seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertEqual
(
len
(
decoder_past
),
2
)
# decoder_past[0] should correspond to encoder output
self
.
parent
.
assertTrue
(
torch
.
all
(
decoder_past
[
0
][
0
]
==
encoder_output
))
# There should be `num_layers` key value embeddings stored in decoder_past[1]
self
.
parent
.
assertEqual
(
len
(
decoder_past
[
1
]),
config
.
num_layers
)
# There should be a self attn key, a self attn value, a cross attn key and a cross attn value stored in each decoder_past[1] tuple
self
.
parent
.
assertEqual
(
len
(
decoder_past
[
1
][
0
]),
4
)
def
create_and_check_t5_with_lm_head
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5ForConditionalGeneration
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
=
input_ids
,
decoder_input_ids
=
decoder_input_ids
,
decoder_attention_mask
=
decoder_attention_mask
,
labels
=
lm_labels
,
)
loss
,
prediction_scores
,
_
,
_
=
outputs
self
.
parent
.
assertEqual
(
len
(
outputs
),
4
)
result
=
{
"loss"
:
loss
,
"prediction_scores"
:
prediction_scores
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
size
()),
[
self
.
batch_size
,
self
.
decoder_seq_length
,
self
.
vocab_size
]
)
self
.
check_loss_output
(
result
)
def
create_and_check_t5_decoder_model_past
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5Model
(
config
=
config
).
get_decoder
()
model
.
to
(
torch_device
)
model
.
eval
()
# first forward pass
output
,
past_key_value_states
=
model
(
input_ids
,
use_cache
=
True
)
# create hypothetical next token and extent to next_input_ids
next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
)
# append to next input_ids and
next_input_ids
=
torch
.
cat
([
input_ids
,
next_tokens
],
dim
=-
1
)
output_from_no_past
=
model
(
next_input_ids
)[
0
]
output_from_past
=
model
(
next_tokens
,
past_key_value_states
=
past_key_value_states
)[
0
]
# select random slice
random_slice_idx
=
ids_tensor
((
1
,),
output_from_past
.
shape
[
-
1
]).
item
()
output_from_no_past_slice
=
output_from_no_past
[:,
-
1
,
random_slice_idx
].
detach
()
output_from_past_slice
=
output_from_past
[:,
0
,
random_slice_idx
].
detach
()
# test that outputs are equal for slice
self
.
parent
.
assertTrue
(
torch
.
allclose
(
output_from_past_slice
,
output_from_no_past_slice
,
atol
=
1e-3
))
def
create_and_check_t5_decoder_model_attention_mask_past
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5Model
(
config
=
config
).
get_decoder
()
model
.
to
(
torch_device
)
model
.
eval
()
# create attention mask
attn_mask
=
torch
.
ones
(
input_ids
.
shape
,
dtype
=
torch
.
long
,
device
=
torch_device
)
half_seq_length
=
input_ids
.
shape
[
-
1
]
//
2
attn_mask
[:,
half_seq_length
:]
=
0
# first forward pass
output
,
past_key_value_states
=
model
(
input_ids
,
attention_mask
=
attn_mask
,
use_cache
=
True
)
# create hypothetical next token and extent to next_input_ids
next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
)
# change a random masked slice from input_ids
random_seq_idx_to_change
=
ids_tensor
((
1
,),
half_seq_length
).
item
()
+
1
random_other_next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
).
squeeze
(
-
1
)
input_ids
[:,
-
random_seq_idx_to_change
]
=
random_other_next_tokens
# append to next input_ids and attn_mask
next_input_ids
=
torch
.
cat
([
input_ids
,
next_tokens
],
dim
=-
1
)
attn_mask
=
torch
.
cat
(
[
attn_mask
,
torch
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
torch
.
long
,
device
=
torch_device
)],
dim
=
1
,
)
# get two different outputs
output_from_no_past
=
model
(
next_input_ids
,
attention_mask
=
attn_mask
)[
0
]
output_from_past
=
model
(
next_tokens
,
past_key_value_states
=
past_key_value_states
,
attention_mask
=
attn_mask
)[
0
]
# select random slice
random_slice_idx
=
ids_tensor
((
1
,),
output_from_past
.
shape
[
-
1
]).
item
()
output_from_no_past_slice
=
output_from_no_past
[:,
-
1
,
random_slice_idx
].
detach
()
output_from_past_slice
=
output_from_past
[:,
0
,
random_slice_idx
].
detach
()
# test that outputs are equal for slice
self
.
parent
.
assertTrue
(
torch
.
allclose
(
output_from_past_slice
,
output_from_no_past_slice
,
atol
=
1e-3
))
def
create_t5_and_check_t5_generate_with_past_key_value_states
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5ForConditionalGeneration
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
torch
.
manual_seed
(
0
)
output_without_past_cache
=
model
.
generate
(
input_ids
[:
1
],
num_beams
=
2
,
max_length
=
5
,
do_sample
=
True
,
use_cache
=
False
)
torch
.
manual_seed
(
0
)
output_with_past_cache
=
model
.
generate
(
input_ids
[:
1
],
num_beams
=
2
,
max_length
=
5
,
do_sample
=
True
)
self
.
parent
.
assertTrue
(
torch
.
all
(
output_with_past_cache
==
output_without_past_cache
))
def
create_and_check_t5_model_fp16_forward
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5Model
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
half
()
model
.
eval
()
output
=
model
(
input_ids
,
decoder_input_ids
=
input_ids
,
attention_mask
=
attention_mask
)[
0
]
self
.
parent
.
assertFalse
(
torch
.
isnan
(
output
).
any
().
item
())
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
attention_mask
,
"decoder_input_ids"
:
decoder_input_ids
,
"decoder_attention_mask"
:
decoder_attention_mask
,
"use_cache"
:
False
,
}
return
config
,
inputs_dict
@
require_torch
class
T5ModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
...
...
@@ -40,302 +302,8 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase):
test_resize_embeddings
=
False
is_encoder_decoder
=
True
class
T5ModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
encoder_seq_length
=
7
,
decoder_seq_length
=
9
,
is_training
=
True
,
use_attention_mask
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
n_positions
=
14
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
d_ff
=
37
,
relative_attention_num_buckets
=
8
,
dropout_rate
=
0.1
,
initializer_factor
=
0.002
,
eos_token_id
=
1
,
pad_token_id
=
0
,
decoder_start_token_id
=
0
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
encoder_seq_length
=
encoder_seq_length
self
.
decoder_seq_length
=
decoder_seq_length
self
.
is_training
=
is_training
self
.
use_attention_mask
=
use_attention_mask
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
n_positions
=
n_positions
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
d_ff
=
d_ff
self
.
relative_attention_num_buckets
=
relative_attention_num_buckets
self
.
dropout_rate
=
dropout_rate
self
.
initializer_factor
=
initializer_factor
self
.
scope
=
scope
self
.
eos_token_id
=
eos_token_id
self
.
pad_token_id
=
pad_token_id
self
.
decoder_start_token_id
=
decoder_start_token_id
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
encoder_seq_length
],
self
.
vocab_size
)
decoder_input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
decoder_seq_length
],
self
.
vocab_size
)
attention_mask
=
None
decoder_attention_mask
=
None
if
self
.
use_attention_mask
:
attention_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
encoder_seq_length
],
vocab_size
=
2
)
decoder_attention_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
decoder_seq_length
],
vocab_size
=
2
)
lm_labels
=
None
if
self
.
use_labels
:
lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
decoder_seq_length
],
self
.
vocab_size
)
config
=
T5Config
(
vocab_size
=
self
.
vocab_size
,
n_positions
=
self
.
n_positions
,
d_model
=
self
.
hidden_size
,
d_ff
=
self
.
d_ff
,
d_kv
=
self
.
hidden_size
//
self
.
num_attention_heads
,
num_layers
=
self
.
num_hidden_layers
,
num_heads
=
self
.
num_attention_heads
,
relative_attention_num_buckets
=
self
.
relative_attention_num_buckets
,
dropout_rate
=
self
.
dropout_rate
,
initializer_factor
=
self
.
initializer_factor
,
eos_token_id
=
self
.
eos_token_id
,
bos_token_id
=
self
.
pad_token_id
,
pad_token_id
=
self
.
pad_token_id
,
decoder_start_token_id
=
self
.
decoder_start_token_id
,
)
return
(
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
check_prepare_lm_labels_via_shift_left
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5Model
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
# make sure that lm_labels are correctly padded from the right
lm_labels
.
masked_fill_
((
lm_labels
==
self
.
decoder_start_token_id
),
self
.
eos_token_id
)
# add casaul pad token mask
triangular_mask
=
torch
.
tril
(
lm_labels
.
new_ones
(
lm_labels
.
shape
)).
logical_not
()
lm_labels
.
masked_fill_
(
triangular_mask
,
self
.
pad_token_id
)
decoder_input_ids
=
model
.
_shift_right
(
lm_labels
)
for
i
,
(
decoder_input_ids_slice
,
lm_labels_slice
)
in
enumerate
(
zip
(
decoder_input_ids
,
lm_labels
)):
# first item
self
.
parent
.
assertEqual
(
decoder_input_ids_slice
[
0
].
item
(),
self
.
decoder_start_token_id
)
if
i
<
decoder_input_ids_slice
.
shape
[
-
1
]:
if
i
<
decoder_input_ids
.
shape
[
-
1
]
-
1
:
# items before diagonal
self
.
parent
.
assertListEqual
(
decoder_input_ids_slice
[
1
:
i
+
1
].
tolist
(),
lm_labels_slice
[:
i
].
tolist
()
)
# pad items after diagonal
if
i
<
decoder_input_ids
.
shape
[
-
1
]
-
2
:
self
.
parent
.
assertListEqual
(
decoder_input_ids_slice
[
i
+
2
:].
tolist
(),
lm_labels_slice
[
i
+
1
:
-
1
].
tolist
()
)
else
:
# all items after square
self
.
parent
.
assertListEqual
(
decoder_input_ids_slice
[
1
:].
tolist
(),
lm_labels_slice
[:
-
1
].
tolist
())
def
create_and_check_t5_model
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5Model
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
decoder_output
,
decoder_past
,
encoder_output
=
model
(
input_ids
=
input_ids
,
decoder_input_ids
=
decoder_input_ids
,
attention_mask
=
attention_mask
,
decoder_attention_mask
=
decoder_attention_mask
,
)
decoder_output
,
decoder_past
,
encoder_output
=
model
(
input_ids
=
input_ids
,
decoder_input_ids
=
decoder_input_ids
)
result
=
{
"encoder_output"
:
encoder_output
,
"decoder_output"
:
decoder_output
,
"decoder_past"
:
decoder_past
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"encoder_output"
].
size
()),
[
self
.
batch_size
,
self
.
encoder_seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"decoder_output"
].
size
()),
[
self
.
batch_size
,
self
.
decoder_seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertEqual
(
len
(
decoder_past
),
2
)
# decoder_past[0] should correspond to encoder output
self
.
parent
.
assertTrue
(
torch
.
all
(
decoder_past
[
0
][
0
]
==
encoder_output
))
# There should be `num_layers` key value embeddings stored in decoder_past[1]
self
.
parent
.
assertEqual
(
len
(
decoder_past
[
1
]),
config
.
num_layers
)
# There should be a self attn key, a self attn value, a cross attn key and a cross attn value stored in each decoder_past[1] tuple
self
.
parent
.
assertEqual
(
len
(
decoder_past
[
1
][
0
]),
4
)
def
create_and_check_t5_with_lm_head
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5ForConditionalGeneration
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
=
input_ids
,
decoder_input_ids
=
decoder_input_ids
,
decoder_attention_mask
=
decoder_attention_mask
,
labels
=
lm_labels
,
)
loss
,
prediction_scores
,
_
,
_
=
outputs
self
.
parent
.
assertEqual
(
len
(
outputs
),
4
)
result
=
{
"loss"
:
loss
,
"prediction_scores"
:
prediction_scores
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
size
()),
[
self
.
batch_size
,
self
.
decoder_seq_length
,
self
.
vocab_size
]
)
self
.
check_loss_output
(
result
)
def
create_and_check_t5_decoder_model_past
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5Model
(
config
=
config
).
get_decoder
()
model
.
to
(
torch_device
)
model
.
eval
()
# first forward pass
output
,
past_key_value_states
=
model
(
input_ids
,
use_cache
=
True
)
# create hypothetical next token and extent to next_input_ids
next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
)
# append to next input_ids and
next_input_ids
=
torch
.
cat
([
input_ids
,
next_tokens
],
dim
=-
1
)
output_from_no_past
=
model
(
next_input_ids
)[
0
]
output_from_past
=
model
(
next_tokens
,
past_key_value_states
=
past_key_value_states
)[
0
]
# select random slice
random_slice_idx
=
ids_tensor
((
1
,),
output_from_past
.
shape
[
-
1
]).
item
()
output_from_no_past_slice
=
output_from_no_past
[:,
-
1
,
random_slice_idx
].
detach
()
output_from_past_slice
=
output_from_past
[:,
0
,
random_slice_idx
].
detach
()
# test that outputs are equal for slice
self
.
parent
.
assertTrue
(
torch
.
allclose
(
output_from_past_slice
,
output_from_no_past_slice
,
atol
=
1e-3
))
def
create_and_check_t5_decoder_model_attention_mask_past
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5Model
(
config
=
config
).
get_decoder
()
model
.
to
(
torch_device
)
model
.
eval
()
# create attention mask
attn_mask
=
torch
.
ones
(
input_ids
.
shape
,
dtype
=
torch
.
long
,
device
=
torch_device
)
half_seq_length
=
input_ids
.
shape
[
-
1
]
//
2
attn_mask
[:,
half_seq_length
:]
=
0
# first forward pass
output
,
past_key_value_states
=
model
(
input_ids
,
attention_mask
=
attn_mask
,
use_cache
=
True
)
# create hypothetical next token and extent to next_input_ids
next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
)
# change a random masked slice from input_ids
random_seq_idx_to_change
=
ids_tensor
((
1
,),
half_seq_length
).
item
()
+
1
random_other_next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
).
squeeze
(
-
1
)
input_ids
[:,
-
random_seq_idx_to_change
]
=
random_other_next_tokens
# append to next input_ids and attn_mask
next_input_ids
=
torch
.
cat
([
input_ids
,
next_tokens
],
dim
=-
1
)
attn_mask
=
torch
.
cat
(
[
attn_mask
,
torch
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
torch
.
long
,
device
=
torch_device
)],
dim
=
1
,
)
# get two different outputs
output_from_no_past
=
model
(
next_input_ids
,
attention_mask
=
attn_mask
)[
0
]
output_from_past
=
model
(
next_tokens
,
past_key_value_states
=
past_key_value_states
,
attention_mask
=
attn_mask
)[
0
]
# select random slice
random_slice_idx
=
ids_tensor
((
1
,),
output_from_past
.
shape
[
-
1
]).
item
()
output_from_no_past_slice
=
output_from_no_past
[:,
-
1
,
random_slice_idx
].
detach
()
output_from_past_slice
=
output_from_past
[:,
0
,
random_slice_idx
].
detach
()
# test that outputs are equal for slice
self
.
parent
.
assertTrue
(
torch
.
allclose
(
output_from_past_slice
,
output_from_no_past_slice
,
atol
=
1e-3
))
def
create_t5_and_check_t5_generate_with_past_key_value_states
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5ForConditionalGeneration
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
torch
.
manual_seed
(
0
)
output_without_past_cache
=
model
.
generate
(
input_ids
[:
1
],
num_beams
=
2
,
max_length
=
5
,
do_sample
=
True
,
use_cache
=
False
)
torch
.
manual_seed
(
0
)
output_with_past_cache
=
model
.
generate
(
input_ids
[:
1
],
num_beams
=
2
,
max_length
=
5
,
do_sample
=
True
)
self
.
parent
.
assertTrue
(
torch
.
all
(
output_with_past_cache
==
output_without_past_cache
))
def
create_and_check_t5_model_fp16_forward
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
):
model
=
T5Model
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
half
()
model
.
eval
()
output
=
model
(
input_ids
,
decoder_input_ids
=
input_ids
,
attention_mask
=
attention_mask
)[
0
]
self
.
parent
.
assertFalse
(
torch
.
isnan
(
output
).
any
().
item
())
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
decoder_input_ids
,
attention_mask
,
decoder_attention_mask
,
lm_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
attention_mask
,
"decoder_input_ids"
:
decoder_input_ids
,
"decoder_attention_mask"
:
decoder_attention_mask
,
"use_cache"
:
False
,
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
T5ModelTest
.
T5ModelTester
(
self
)
self
.
model_tester
=
T5ModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
T5Config
,
d_model
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_tf_albert.py
View file @
c852036b
...
...
@@ -34,6 +34,186 @@ if is_tf_available():
)
class
TFAlbertModelTester
:
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_mask
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
embedding_size
=
16
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_input_mask
=
True
self
.
use_token_type_ids
=
True
self
.
use_labels
=
True
self
.
vocab_size
=
99
self
.
embedding_size
=
16
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
intermediate_size
=
37
self
.
hidden_act
=
"gelu"
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0.1
self
.
max_position_embeddings
=
512
self
.
type_vocab_size
=
16
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
scope
=
None
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
AlbertConfig
(
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
intermediate_size
=
self
.
intermediate_size
,
hidden_act
=
self
.
hidden_act
,
hidden_dropout_prob
=
self
.
hidden_dropout_prob
,
attention_probs_dropout_prob
=
self
.
attention_probs_dropout_prob
,
max_position_embeddings
=
self
.
max_position_embeddings
,
type_vocab_size
=
self
.
type_vocab_size
,
initializer_range
=
self
.
initializer_range
,
)
return
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
def
create_and_check_albert_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFAlbertModel
(
config
=
config
)
# inputs = {'input_ids': input_ids,
# 'attention_mask': input_mask,
# 'token_type_ids': token_type_ids}
# sequence_output, pooled_output = model(**inputs)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
sequence_output
,
pooled_output
=
model
(
inputs
)
inputs
=
[
input_ids
,
input_mask
]
sequence_output
,
pooled_output
=
model
(
inputs
)
sequence_output
,
pooled_output
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
"pooled_output"
:
pooled_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"pooled_output"
].
shape
),
[
self
.
batch_size
,
self
.
hidden_size
])
def
create_and_check_albert_for_pretraining
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
TFAlbertForPreTraining
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
prediction_scores
,
sop_scores
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
"sop_scores"
:
sop_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"sop_scores"
].
shape
),
[
self
.
batch_size
,
self
.
num_labels
])
def
create_and_check_albert_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFAlbertForMaskedLM
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
prediction_scores
,)
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
create_and_check_albert_for_sequence_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
TFAlbertForSequenceClassification
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
logits
,)
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_labels
])
def
create_and_check_albert_for_question_answering
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFAlbertForQuestionAnswering
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
start_logits
,
end_logits
=
model
(
inputs
)
result
=
{
"start_logits"
:
start_logits
.
numpy
(),
"end_logits"
:
end_logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
@
require_tf
class
TFAlbertModelTest
(
TFModelTesterMixin
,
unittest
.
TestCase
):
...
...
@@ -49,187 +229,8 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
else
()
)
class
TFAlbertModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_mask
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
embedding_size
=
16
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_input_mask
=
use_input_mask
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
embedding_size
=
embedding_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
hidden_act
=
hidden_act
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
AlbertConfig
(
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
intermediate_size
=
self
.
intermediate_size
,
hidden_act
=
self
.
hidden_act
,
hidden_dropout_prob
=
self
.
hidden_dropout_prob
,
attention_probs_dropout_prob
=
self
.
attention_probs_dropout_prob
,
max_position_embeddings
=
self
.
max_position_embeddings
,
type_vocab_size
=
self
.
type_vocab_size
,
initializer_range
=
self
.
initializer_range
,
)
return
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
def
create_and_check_albert_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFAlbertModel
(
config
=
config
)
# inputs = {'input_ids': input_ids,
# 'attention_mask': input_mask,
# 'token_type_ids': token_type_ids}
# sequence_output, pooled_output = model(**inputs)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
sequence_output
,
pooled_output
=
model
(
inputs
)
inputs
=
[
input_ids
,
input_mask
]
sequence_output
,
pooled_output
=
model
(
inputs
)
sequence_output
,
pooled_output
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
"pooled_output"
:
pooled_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"pooled_output"
].
shape
),
[
self
.
batch_size
,
self
.
hidden_size
])
def
create_and_check_albert_for_pretraining
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
TFAlbertForPreTraining
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
prediction_scores
,
sop_scores
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
"sop_scores"
:
sop_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"sop_scores"
].
shape
),
[
self
.
batch_size
,
self
.
num_labels
])
def
create_and_check_albert_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFAlbertForMaskedLM
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
prediction_scores
,)
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
create_and_check_albert_for_sequence_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
TFAlbertForSequenceClassification
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
logits
,)
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_labels
])
def
create_and_check_albert_for_question_answering
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFAlbertForQuestionAnswering
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
start_logits
,
end_logits
=
model
(
inputs
)
result
=
{
"start_logits"
:
start_logits
.
numpy
(),
"end_logits"
:
end_logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
TFAlbertModelTest
.
TFAlbertModelTester
(
self
)
self
.
model_tester
=
TFAlbertModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
AlbertConfig
,
hidden_size
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_tf_bert.py
View file @
c852036b
...
...
@@ -37,6 +37,221 @@ if is_tf_available():
)
class
TFBertModelTester
:
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_mask
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_input_mask
=
True
self
.
use_token_type_ids
=
True
self
.
use_labels
=
True
self
.
vocab_size
=
99
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
intermediate_size
=
37
self
.
hidden_act
=
"gelu"
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0.1
self
.
max_position_embeddings
=
512
self
.
type_vocab_size
=
16
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
scope
=
None
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
BertConfig
(
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
intermediate_size
=
self
.
intermediate_size
,
hidden_act
=
self
.
hidden_act
,
hidden_dropout_prob
=
self
.
hidden_dropout_prob
,
attention_probs_dropout_prob
=
self
.
attention_probs_dropout_prob
,
max_position_embeddings
=
self
.
max_position_embeddings
,
type_vocab_size
=
self
.
type_vocab_size
,
initializer_range
=
self
.
initializer_range
,
)
return
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
def
create_and_check_bert_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFBertModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
sequence_output
,
pooled_output
=
model
(
inputs
)
inputs
=
[
input_ids
,
input_mask
]
sequence_output
,
pooled_output
=
model
(
inputs
)
sequence_output
,
pooled_output
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
"pooled_output"
:
pooled_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"pooled_output"
].
shape
),
[
self
.
batch_size
,
self
.
hidden_size
])
def
create_and_check_bert_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFBertForMaskedLM
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
prediction_scores
,)
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
create_and_check_bert_for_next_sequence_prediction
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFBertForNextSentencePrediction
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
seq_relationship_score
,)
=
model
(
inputs
)
result
=
{
"seq_relationship_score"
:
seq_relationship_score
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"seq_relationship_score"
].
shape
),
[
self
.
batch_size
,
2
])
def
create_and_check_bert_for_pretraining
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFBertForPreTraining
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
prediction_scores
,
seq_relationship_score
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
"seq_relationship_score"
:
seq_relationship_score
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"seq_relationship_score"
].
shape
),
[
self
.
batch_size
,
2
])
def
create_and_check_bert_for_sequence_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
TFBertForSequenceClassification
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
logits
,)
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_labels
])
def
create_and_check_bert_for_multiple_choice
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_choices
=
self
.
num_choices
model
=
TFBertForMultipleChoice
(
config
=
config
)
multiple_choice_inputs_ids
=
tf
.
tile
(
tf
.
expand_dims
(
input_ids
,
1
),
(
1
,
self
.
num_choices
,
1
))
multiple_choice_input_mask
=
tf
.
tile
(
tf
.
expand_dims
(
input_mask
,
1
),
(
1
,
self
.
num_choices
,
1
))
multiple_choice_token_type_ids
=
tf
.
tile
(
tf
.
expand_dims
(
token_type_ids
,
1
),
(
1
,
self
.
num_choices
,
1
))
inputs
=
{
"input_ids"
:
multiple_choice_inputs_ids
,
"attention_mask"
:
multiple_choice_input_mask
,
"token_type_ids"
:
multiple_choice_token_type_ids
,
}
(
logits
,)
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_choices
])
def
create_and_check_bert_for_token_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
TFBertForTokenClassification
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
logits
,)
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
num_labels
])
def
create_and_check_bert_for_question_answering
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFBertForQuestionAnswering
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
start_logits
,
end_logits
=
model
(
inputs
)
result
=
{
"start_logits"
:
start_logits
.
numpy
(),
"end_logits"
:
end_logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
@
require_tf
class
TFBertModelTest
(
TFModelTesterMixin
,
unittest
.
TestCase
):
...
...
@@ -55,224 +270,8 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
else
()
)
class
TFBertModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_mask
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_input_mask
=
use_input_mask
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
hidden_act
=
hidden_act
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
BertConfig
(
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
intermediate_size
=
self
.
intermediate_size
,
hidden_act
=
self
.
hidden_act
,
hidden_dropout_prob
=
self
.
hidden_dropout_prob
,
attention_probs_dropout_prob
=
self
.
attention_probs_dropout_prob
,
max_position_embeddings
=
self
.
max_position_embeddings
,
type_vocab_size
=
self
.
type_vocab_size
,
initializer_range
=
self
.
initializer_range
,
)
return
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
def
create_and_check_bert_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFBertModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
sequence_output
,
pooled_output
=
model
(
inputs
)
inputs
=
[
input_ids
,
input_mask
]
sequence_output
,
pooled_output
=
model
(
inputs
)
sequence_output
,
pooled_output
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
"pooled_output"
:
pooled_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"pooled_output"
].
shape
),
[
self
.
batch_size
,
self
.
hidden_size
])
def
create_and_check_bert_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFBertForMaskedLM
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
prediction_scores
,)
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
create_and_check_bert_for_next_sequence_prediction
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFBertForNextSentencePrediction
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
seq_relationship_score
,)
=
model
(
inputs
)
result
=
{
"seq_relationship_score"
:
seq_relationship_score
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"seq_relationship_score"
].
shape
),
[
self
.
batch_size
,
2
])
def
create_and_check_bert_for_pretraining
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFBertForPreTraining
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
prediction_scores
,
seq_relationship_score
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
"seq_relationship_score"
:
seq_relationship_score
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"seq_relationship_score"
].
shape
),
[
self
.
batch_size
,
2
])
def
create_and_check_bert_for_sequence_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
TFBertForSequenceClassification
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
logits
,)
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_labels
])
def
create_and_check_bert_for_multiple_choice
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_choices
=
self
.
num_choices
model
=
TFBertForMultipleChoice
(
config
=
config
)
multiple_choice_inputs_ids
=
tf
.
tile
(
tf
.
expand_dims
(
input_ids
,
1
),
(
1
,
self
.
num_choices
,
1
))
multiple_choice_input_mask
=
tf
.
tile
(
tf
.
expand_dims
(
input_mask
,
1
),
(
1
,
self
.
num_choices
,
1
))
multiple_choice_token_type_ids
=
tf
.
tile
(
tf
.
expand_dims
(
token_type_ids
,
1
),
(
1
,
self
.
num_choices
,
1
))
inputs
=
{
"input_ids"
:
multiple_choice_inputs_ids
,
"attention_mask"
:
multiple_choice_input_mask
,
"token_type_ids"
:
multiple_choice_token_type_ids
,
}
(
logits
,)
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_choices
])
def
create_and_check_bert_for_token_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
TFBertForTokenClassification
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
logits
,)
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
num_labels
]
)
def
create_and_check_bert_for_question_answering
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFBertForQuestionAnswering
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
start_logits
,
end_logits
=
model
(
inputs
)
result
=
{
"start_logits"
:
start_logits
.
numpy
(),
"end_logits"
:
end_logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
TFBertModelTest
.
TFBertModelTester
(
self
)
self
.
model_tester
=
TFBertModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
BertConfig
,
hidden_size
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_tf_ctrl.py
View file @
c852036b
...
...
@@ -28,163 +28,141 @@ if is_tf_available():
from
transformers.modeling_tf_ctrl
import
TFCTRLModel
,
TFCTRLLMHeadModel
,
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST
class
TFCTRLModelTester
(
object
):
def
__init__
(
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_token_type_ids
=
True
self
.
use_input_mask
=
True
self
.
use_labels
=
True
self
.
use_mc_token_ids
=
True
self
.
vocab_size
=
99
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
intermediate_size
=
37
self
.
hidden_act
=
"gelu"
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0.1
self
.
max_position_embeddings
=
512
self
.
type_vocab_size
=
16
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
scope
=
None
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
mc_token_ids
=
None
if
self
.
use_mc_token_ids
:
mc_token_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
num_choices
],
self
.
seq_length
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
CTRLConfig
(
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
# intermediate_size=self.intermediate_size,
# hidden_act=self.hidden_act,
# hidden_dropout_prob=self.hidden_dropout_prob,
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
n_positions
=
self
.
max_position_embeddings
,
n_ctx
=
self
.
max_position_embeddings
# type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range
)
head_mask
=
ids_tensor
([
self
.
num_hidden_layers
,
self
.
num_attention_heads
],
2
)
return
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
def
create_and_check_ctrl_model
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
TFCTRLModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
sequence_output
=
model
(
inputs
)[
0
]
inputs
=
[
input_ids
,
None
,
input_mask
]
# None is the input for 'past'
sequence_output
=
model
(
inputs
)[
0
]
sequence_output
=
model
(
input_ids
)[
0
]
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
def
create_and_check_ctrl_lm_head
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
TFCTRLLMHeadModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
prediction_scores
=
model
(
inputs
)[
0
]
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
@
require_tf
class
TFCTRLModelTest
(
TFModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
TFCTRLModel
,
TFCTRLLMHeadModel
)
if
is_tf_available
()
else
()
all_generative_model_classes
=
(
TFCTRLLMHeadModel
,)
if
is_tf_available
()
else
()
class
TFCTRLModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_token_type_ids
=
True
,
use_input_mask
=
True
,
use_labels
=
True
,
use_mc_token_ids
=
True
,
vocab_size
=
99
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_input_mask
=
use_input_mask
self
.
use_labels
=
use_labels
self
.
use_mc_token_ids
=
use_mc_token_ids
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
hidden_act
=
hidden_act
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
mc_token_ids
=
None
if
self
.
use_mc_token_ids
:
mc_token_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
num_choices
],
self
.
seq_length
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
CTRLConfig
(
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
# intermediate_size=self.intermediate_size,
# hidden_act=self.hidden_act,
# hidden_dropout_prob=self.hidden_dropout_prob,
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
n_positions
=
self
.
max_position_embeddings
,
n_ctx
=
self
.
max_position_embeddings
# type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range
)
head_mask
=
ids_tensor
([
self
.
num_hidden_layers
,
self
.
num_attention_heads
],
2
)
return
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
def
create_and_check_ctrl_model
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
TFCTRLModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
sequence_output
=
model
(
inputs
)[
0
]
inputs
=
[
input_ids
,
None
,
input_mask
]
# None is the input for 'past'
sequence_output
=
model
(
inputs
)[
0
]
sequence_output
=
model
(
input_ids
)[
0
]
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
def
create_and_check_ctrl_lm_head
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
TFCTRLLMHeadModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
prediction_scores
=
model
(
inputs
)[
0
]
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
TFCTRLModelTest
.
TFCTRLModelTester
(
self
)
self
.
model_tester
=
TFCTRLModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
CTRLConfig
,
n_embd
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_tf_distilbert.py
View file @
c852036b
...
...
@@ -32,6 +32,128 @@ if is_tf_available():
)
class
TFDistilBertModelTester
:
def
__init__
(
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_input_mask
=
True
self
.
use_token_type_ids
=
False
self
.
use_labels
=
True
self
.
vocab_size
=
99
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
intermediate_size
=
37
self
.
hidden_act
=
"gelu"
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0.1
self
.
max_position_embeddings
=
512
self
.
type_vocab_size
=
16
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
scope
=
None
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
DistilBertConfig
(
vocab_size
=
self
.
vocab_size
,
dim
=
self
.
hidden_size
,
n_layers
=
self
.
num_hidden_layers
,
n_heads
=
self
.
num_attention_heads
,
hidden_dim
=
self
.
intermediate_size
,
hidden_act
=
self
.
hidden_act
,
dropout
=
self
.
hidden_dropout_prob
,
attention_dropout
=
self
.
attention_probs_dropout_prob
,
max_position_embeddings
=
self
.
max_position_embeddings
,
initializer_range
=
self
.
initializer_range
,
)
return
config
,
input_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
def
create_and_check_distilbert_model
(
self
,
config
,
input_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFDistilBertModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
}
outputs
=
model
(
inputs
)
sequence_output
=
outputs
[
0
]
inputs
=
[
input_ids
,
input_mask
]
(
sequence_output
,)
=
model
(
inputs
)
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
def
create_and_check_distilbert_for_masked_lm
(
self
,
config
,
input_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFDistilBertForMaskedLM
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
}
(
prediction_scores
,)
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
create_and_check_distilbert_for_question_answering
(
self
,
config
,
input_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFDistilBertForQuestionAnswering
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
}
start_logits
,
end_logits
=
model
(
inputs
)
result
=
{
"start_logits"
:
start_logits
.
numpy
(),
"end_logits"
:
end_logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
def
create_and_check_distilbert_for_sequence_classification
(
self
,
config
,
input_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
TFDistilBertForSequenceClassification
(
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
}
(
logits
,)
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_labels
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
@
require_tf
class
TFDistilBertModelTest
(
TFModelTesterMixin
,
unittest
.
TestCase
):
...
...
@@ -50,151 +172,8 @@ class TFDistilBertModelTest(TFModelTesterMixin, unittest.TestCase):
test_resize_embeddings
=
True
test_head_masking
=
True
class
TFDistilBertModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_mask
=
True
,
use_token_type_ids
=
False
,
use_labels
=
True
,
vocab_size
=
99
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_input_mask
=
use_input_mask
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
hidden_act
=
hidden_act
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
DistilBertConfig
(
vocab_size
=
self
.
vocab_size
,
dim
=
self
.
hidden_size
,
n_layers
=
self
.
num_hidden_layers
,
n_heads
=
self
.
num_attention_heads
,
hidden_dim
=
self
.
intermediate_size
,
hidden_act
=
self
.
hidden_act
,
dropout
=
self
.
hidden_dropout_prob
,
attention_dropout
=
self
.
attention_probs_dropout_prob
,
max_position_embeddings
=
self
.
max_position_embeddings
,
initializer_range
=
self
.
initializer_range
,
)
return
config
,
input_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
def
create_and_check_distilbert_model
(
self
,
config
,
input_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFDistilBertModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
}
outputs
=
model
(
inputs
)
sequence_output
=
outputs
[
0
]
inputs
=
[
input_ids
,
input_mask
]
(
sequence_output
,)
=
model
(
inputs
)
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
def
create_and_check_distilbert_for_masked_lm
(
self
,
config
,
input_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFDistilBertForMaskedLM
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
}
(
prediction_scores
,)
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
create_and_check_distilbert_for_question_answering
(
self
,
config
,
input_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFDistilBertForQuestionAnswering
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
}
start_logits
,
end_logits
=
model
(
inputs
)
result
=
{
"start_logits"
:
start_logits
.
numpy
(),
"end_logits"
:
end_logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
def
create_and_check_distilbert_for_sequence_classification
(
self
,
config
,
input_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
TFDistilBertForSequenceClassification
(
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
}
(
logits
,)
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_labels
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
TFDistilBertModelTest
.
TFDistilBertModelTester
(
self
)
self
.
model_tester
=
TFDistilBertModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
DistilBertConfig
,
dim
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_tf_electra.py
View file @
c852036b
...
...
@@ -32,6 +32,138 @@ if is_tf_available():
)
class
TFElectraModelTester
:
def
__init__
(
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_input_mask
=
True
self
.
use_token_type_ids
=
True
self
.
use_labels
=
True
self
.
vocab_size
=
99
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
intermediate_size
=
37
self
.
hidden_act
=
"gelu"
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0.1
self
.
max_position_embeddings
=
512
self
.
type_vocab_size
=
16
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
scope
=
None
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
ElectraConfig
(
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
intermediate_size
=
self
.
intermediate_size
,
hidden_act
=
self
.
hidden_act
,
hidden_dropout_prob
=
self
.
hidden_dropout_prob
,
attention_probs_dropout_prob
=
self
.
attention_probs_dropout_prob
,
max_position_embeddings
=
self
.
max_position_embeddings
,
type_vocab_size
=
self
.
type_vocab_size
,
initializer_range
=
self
.
initializer_range
,
)
return
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
def
create_and_check_electra_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFElectraModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
sequence_output
,)
=
model
(
inputs
)
inputs
=
[
input_ids
,
input_mask
]
(
sequence_output
,)
=
model
(
inputs
)
(
sequence_output
,)
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
def
create_and_check_electra_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFElectraForMaskedLM
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
prediction_scores
,)
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
create_and_check_electra_for_pretraining
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFElectraForPreTraining
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
prediction_scores
,)
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
def
create_and_check_electra_for_token_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
TFElectraForTokenClassification
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
logits
,)
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
num_labels
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
@
require_tf
class
TFElectraModelTest
(
TFModelTesterMixin
,
unittest
.
TestCase
):
...
...
@@ -41,163 +173,8 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):
else
()
)
class
TFElectraModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_mask
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_input_mask
=
use_input_mask
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
hidden_act
=
hidden_act
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
ElectraConfig
(
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
intermediate_size
=
self
.
intermediate_size
,
hidden_act
=
self
.
hidden_act
,
hidden_dropout_prob
=
self
.
hidden_dropout_prob
,
attention_probs_dropout_prob
=
self
.
attention_probs_dropout_prob
,
max_position_embeddings
=
self
.
max_position_embeddings
,
type_vocab_size
=
self
.
type_vocab_size
,
initializer_range
=
self
.
initializer_range
,
)
return
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
def
create_and_check_electra_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFElectraModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
sequence_output
,)
=
model
(
inputs
)
inputs
=
[
input_ids
,
input_mask
]
(
sequence_output
,)
=
model
(
inputs
)
(
sequence_output
,)
=
model
(
input_ids
)
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
def
create_and_check_electra_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFElectraForMaskedLM
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
prediction_scores
,)
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
create_and_check_electra_for_pretraining
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFElectraForPreTraining
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
prediction_scores
,)
=
model
(
inputs
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
def
create_and_check_electra_for_token_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
TFElectraForTokenClassification
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
logits
,)
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
num_labels
]
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
TFElectraModelTest
.
TFElectraModelTester
(
self
)
self
.
model_tester
=
TFElectraModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
ElectraConfig
,
hidden_size
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_tf_gpt2.py
View file @
c852036b
...
...
@@ -34,268 +34,246 @@ if is_tf_available():
)
class
TFGPT2ModelTester
:
def
__init__
(
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_token_type_ids
=
True
self
.
use_input_mask
=
True
self
.
use_labels
=
True
self
.
use_mc_token_ids
=
True
self
.
vocab_size
=
99
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
intermediate_size
=
37
self
.
hidden_act
=
"gelu"
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0.1
self
.
max_position_embeddings
=
512
self
.
type_vocab_size
=
16
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
scope
=
None
self
.
bos_token_id
=
self
.
vocab_size
-
1
self
.
eos_token_id
=
self
.
vocab_size
-
1
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
mc_token_ids
=
None
if
self
.
use_mc_token_ids
:
mc_token_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
num_choices
],
self
.
seq_length
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
GPT2Config
(
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
# intermediate_size=self.intermediate_size,
# hidden_act=self.hidden_act,
# hidden_dropout_prob=self.hidden_dropout_prob,
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
n_positions
=
self
.
max_position_embeddings
,
n_ctx
=
self
.
max_position_embeddings
,
# type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range
bos_token_id
=
self
.
bos_token_id
,
eos_token_id
=
self
.
eos_token_id
,
)
head_mask
=
ids_tensor
([
self
.
num_hidden_layers
,
self
.
num_attention_heads
],
2
)
return
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
def
create_and_check_gpt2_model
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
TFGPT2Model
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
,
}
sequence_output
=
model
(
inputs
)[
0
]
inputs
=
[
input_ids
,
None
,
input_mask
]
# None is the input for 'past'
sequence_output
=
model
(
inputs
)[
0
]
sequence_output
=
model
(
input_ids
)[
0
]
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
],
)
def
create_and_check_gpt2_model_past
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
TFGPT2Model
(
config
=
config
)
# first forward pass
output
,
past
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
# create hypothetical next token and extent to next_input_ids
next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
)
next_token_types
=
ids_tensor
([
self
.
batch_size
,
1
],
self
.
type_vocab_size
)
# append to next input_ids and token_type_ids
next_input_ids
=
tf
.
concat
([
input_ids
,
next_tokens
],
axis
=-
1
)
next_token_type_ids
=
tf
.
concat
([
token_type_ids
,
next_token_types
],
axis
=-
1
)
output_from_no_past
,
_
=
model
(
next_input_ids
,
token_type_ids
=
next_token_type_ids
)
output_from_past
,
_
=
model
(
next_tokens
,
token_type_ids
=
next_token_types
,
past
=
past
)
# select random slice
random_slice_idx
=
int
(
ids_tensor
((
1
,),
shape_list
(
output_from_past
)[
-
1
]))
output_from_no_past_slice
=
output_from_no_past
[:,
-
1
,
random_slice_idx
]
output_from_past_slice
=
output_from_past
[:,
0
,
random_slice_idx
]
# test that outputs are equal for slice
tf
.
debugging
.
assert_near
(
output_from_past_slice
,
output_from_no_past_slice
,
rtol
=
1e-6
)
def
create_and_check_gpt2_model_attention_mask_past
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
TFGPT2Model
(
config
=
config
)
# create attention mask
half_seq_length
=
self
.
seq_length
//
2
attn_mask_begin
=
tf
.
ones
((
self
.
batch_size
,
half_seq_length
),
dtype
=
tf
.
int32
)
attn_mask_end
=
tf
.
zeros
((
self
.
batch_size
,
self
.
seq_length
-
half_seq_length
),
dtype
=
tf
.
int32
)
attn_mask
=
tf
.
concat
([
attn_mask_begin
,
attn_mask_end
],
axis
=
1
)
# first forward pass
output
,
past
=
model
(
input_ids
,
attention_mask
=
attn_mask
)
# create hypothetical next token and extent to next_input_ids
next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
)
# change a random masked slice from input_ids
random_seq_idx_to_change
=
ids_tensor
((
1
,),
half_seq_length
).
numpy
()
+
1
random_other_next_tokens
=
ids_tensor
((
self
.
batch_size
,
self
.
seq_length
),
config
.
vocab_size
)
vector_condition
=
tf
.
range
(
self
.
seq_length
)
==
(
self
.
seq_length
-
random_seq_idx_to_change
)
condition
=
tf
.
transpose
(
tf
.
broadcast_to
(
tf
.
expand_dims
(
vector_condition
,
-
1
),
(
self
.
seq_length
,
self
.
batch_size
))
)
input_ids
=
tf
.
where
(
condition
,
random_other_next_tokens
,
input_ids
)
# append to next input_ids and attn_mask
next_input_ids
=
tf
.
concat
([
input_ids
,
next_tokens
],
axis
=-
1
)
attn_mask
=
tf
.
concat
([
attn_mask
,
tf
.
ones
((
shape_list
(
attn_mask
)[
0
],
1
),
dtype
=
tf
.
int32
)],
axis
=
1
)
# get two different outputs
output_from_no_past
,
_
=
model
(
next_input_ids
,
attention_mask
=
attn_mask
)
output_from_past
,
_
=
model
(
next_tokens
,
past
=
past
,
attention_mask
=
attn_mask
)
# select random slice
random_slice_idx
=
int
(
ids_tensor
((
1
,),
shape_list
(
output_from_past
)[
-
1
]))
output_from_no_past_slice
=
output_from_no_past
[:,
-
1
,
random_slice_idx
]
output_from_past_slice
=
output_from_past
[:,
0
,
random_slice_idx
]
# test that outputs are equal for slice
tf
.
debugging
.
assert_near
(
output_from_past_slice
,
output_from_no_past_slice
,
rtol
=
1e-12
)
def
create_and_check_gpt2_lm_head
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
TFGPT2LMHeadModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
,
}
prediction_scores
=
model
(
inputs
)[
0
]
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
],
)
def
create_and_check_gpt2_double_head
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
*
args
):
model
=
TFGPT2DoubleHeadsModel
(
config
=
config
)
multiple_choice_inputs_ids
=
tf
.
tile
(
tf
.
expand_dims
(
input_ids
,
1
),
(
1
,
self
.
num_choices
,
1
))
multiple_choice_input_mask
=
tf
.
tile
(
tf
.
expand_dims
(
input_mask
,
1
),
(
1
,
self
.
num_choices
,
1
))
multiple_choice_token_type_ids
=
tf
.
tile
(
tf
.
expand_dims
(
token_type_ids
,
1
),
(
1
,
self
.
num_choices
,
1
))
inputs
=
{
"input_ids"
:
multiple_choice_inputs_ids
,
"mc_token_ids"
:
mc_token_ids
,
"attention_mask"
:
multiple_choice_input_mask
,
"token_type_ids"
:
multiple_choice_token_type_ids
,
}
lm_logits
,
mc_logits
=
model
(
inputs
)[:
2
]
result
=
{
"lm_logits"
:
lm_logits
.
numpy
(),
"mc_logits"
:
mc_logits
.
numpy
()}
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_choices
,
self
.
seq_length
,
self
.
vocab_size
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"mc_logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_choices
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
,
}
return
config
,
inputs_dict
@
require_tf
class
TFGPT2ModelTest
(
TFModelTesterMixin
,
unittest
.
TestCase
):
all_model_classes
=
(
TFGPT2Model
,
TFGPT2LMHeadModel
,
TFGPT2DoubleHeadsModel
)
if
is_tf_available
()
else
()
all_generative_model_classes
=
(
TFGPT2LMHeadModel
,)
if
is_tf_available
()
else
()
class
TFGPT2ModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_token_type_ids
=
True
,
use_input_mask
=
True
,
use_labels
=
True
,
use_mc_token_ids
=
True
,
vocab_size
=
99
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_input_mask
=
use_input_mask
self
.
use_labels
=
use_labels
self
.
use_mc_token_ids
=
use_mc_token_ids
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
hidden_act
=
hidden_act
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scope
self
.
bos_token_id
=
vocab_size
-
1
self
.
eos_token_id
=
vocab_size
-
1
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
mc_token_ids
=
None
if
self
.
use_mc_token_ids
:
mc_token_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
num_choices
],
self
.
seq_length
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
GPT2Config
(
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
# intermediate_size=self.intermediate_size,
# hidden_act=self.hidden_act,
# hidden_dropout_prob=self.hidden_dropout_prob,
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
n_positions
=
self
.
max_position_embeddings
,
n_ctx
=
self
.
max_position_embeddings
,
# type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range
bos_token_id
=
self
.
bos_token_id
,
eos_token_id
=
self
.
eos_token_id
,
)
head_mask
=
ids_tensor
([
self
.
num_hidden_layers
,
self
.
num_attention_heads
],
2
)
return
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
def
create_and_check_gpt2_model
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
TFGPT2Model
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
,
}
sequence_output
=
model
(
inputs
)[
0
]
inputs
=
[
input_ids
,
None
,
input_mask
]
# None is the input for 'past'
sequence_output
=
model
(
inputs
)[
0
]
sequence_output
=
model
(
input_ids
)[
0
]
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
],
)
def
create_and_check_gpt2_model_past
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
TFGPT2Model
(
config
=
config
)
# first forward pass
output
,
past
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
)
# create hypothetical next token and extent to next_input_ids
next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
)
next_token_types
=
ids_tensor
([
self
.
batch_size
,
1
],
self
.
type_vocab_size
)
# append to next input_ids and token_type_ids
next_input_ids
=
tf
.
concat
([
input_ids
,
next_tokens
],
axis
=-
1
)
next_token_type_ids
=
tf
.
concat
([
token_type_ids
,
next_token_types
],
axis
=-
1
)
output_from_no_past
,
_
=
model
(
next_input_ids
,
token_type_ids
=
next_token_type_ids
)
output_from_past
,
_
=
model
(
next_tokens
,
token_type_ids
=
next_token_types
,
past
=
past
)
# select random slice
random_slice_idx
=
int
(
ids_tensor
((
1
,),
shape_list
(
output_from_past
)[
-
1
]))
output_from_no_past_slice
=
output_from_no_past
[:,
-
1
,
random_slice_idx
]
output_from_past_slice
=
output_from_past
[:,
0
,
random_slice_idx
]
# test that outputs are equal for slice
tf
.
debugging
.
assert_near
(
output_from_past_slice
,
output_from_no_past_slice
,
rtol
=
1e-6
)
def
create_and_check_gpt2_model_attention_mask_past
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
TFGPT2Model
(
config
=
config
)
# create attention mask
half_seq_length
=
self
.
seq_length
//
2
attn_mask_begin
=
tf
.
ones
((
self
.
batch_size
,
half_seq_length
),
dtype
=
tf
.
int32
)
attn_mask_end
=
tf
.
zeros
((
self
.
batch_size
,
self
.
seq_length
-
half_seq_length
),
dtype
=
tf
.
int32
)
attn_mask
=
tf
.
concat
([
attn_mask_begin
,
attn_mask_end
],
axis
=
1
)
# first forward pass
output
,
past
=
model
(
input_ids
,
attention_mask
=
attn_mask
)
# create hypothetical next token and extent to next_input_ids
next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
)
# change a random masked slice from input_ids
random_seq_idx_to_change
=
ids_tensor
((
1
,),
half_seq_length
).
numpy
()
+
1
random_other_next_tokens
=
ids_tensor
((
self
.
batch_size
,
self
.
seq_length
),
config
.
vocab_size
)
vector_condition
=
tf
.
range
(
self
.
seq_length
)
==
(
self
.
seq_length
-
random_seq_idx_to_change
)
condition
=
tf
.
transpose
(
tf
.
broadcast_to
(
tf
.
expand_dims
(
vector_condition
,
-
1
),
(
self
.
seq_length
,
self
.
batch_size
))
)
input_ids
=
tf
.
where
(
condition
,
random_other_next_tokens
,
input_ids
)
# append to next input_ids and attn_mask
next_input_ids
=
tf
.
concat
([
input_ids
,
next_tokens
],
axis
=-
1
)
attn_mask
=
tf
.
concat
([
attn_mask
,
tf
.
ones
((
shape_list
(
attn_mask
)[
0
],
1
),
dtype
=
tf
.
int32
)],
axis
=
1
)
# get two different outputs
output_from_no_past
,
_
=
model
(
next_input_ids
,
attention_mask
=
attn_mask
)
output_from_past
,
_
=
model
(
next_tokens
,
past
=
past
,
attention_mask
=
attn_mask
)
# select random slice
random_slice_idx
=
int
(
ids_tensor
((
1
,),
shape_list
(
output_from_past
)[
-
1
]))
output_from_no_past_slice
=
output_from_no_past
[:,
-
1
,
random_slice_idx
]
output_from_past_slice
=
output_from_past
[:,
0
,
random_slice_idx
]
# test that outputs are equal for slice
tf
.
debugging
.
assert_near
(
output_from_past_slice
,
output_from_no_past_slice
,
rtol
=
1e-12
)
def
create_and_check_gpt2_lm_head
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
TFGPT2LMHeadModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
,
}
prediction_scores
=
model
(
inputs
)[
0
]
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
],
)
def
create_and_check_gpt2_double_head
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
*
args
):
model
=
TFGPT2DoubleHeadsModel
(
config
=
config
)
multiple_choice_inputs_ids
=
tf
.
tile
(
tf
.
expand_dims
(
input_ids
,
1
),
(
1
,
self
.
num_choices
,
1
))
multiple_choice_input_mask
=
tf
.
tile
(
tf
.
expand_dims
(
input_mask
,
1
),
(
1
,
self
.
num_choices
,
1
))
multiple_choice_token_type_ids
=
tf
.
tile
(
tf
.
expand_dims
(
token_type_ids
,
1
),
(
1
,
self
.
num_choices
,
1
))
inputs
=
{
"input_ids"
:
multiple_choice_inputs_ids
,
"mc_token_ids"
:
mc_token_ids
,
"attention_mask"
:
multiple_choice_input_mask
,
"token_type_ids"
:
multiple_choice_token_type_ids
,
}
lm_logits
,
mc_logits
=
model
(
inputs
)[:
2
]
result
=
{
"lm_logits"
:
lm_logits
.
numpy
(),
"mc_logits"
:
mc_logits
.
numpy
()}
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_choices
,
self
.
seq_length
,
self
.
vocab_size
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"mc_logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_choices
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
,
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
TFGPT2ModelTest
.
TFGPT2ModelTester
(
self
)
self
.
model_tester
=
TFGPT2ModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
GPT2Config
,
n_embd
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_tf_openai_gpt.py
View file @
c852036b
...
...
@@ -33,6 +33,155 @@ if is_tf_available():
)
class
TFOpenAIGPTModelTester
:
def
__init__
(
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_token_type_ids
=
True
self
.
use_input_mask
=
True
self
.
use_labels
=
True
self
.
use_mc_token_ids
=
True
self
.
vocab_size
=
99
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
intermediate_size
=
37
self
.
hidden_act
=
"gelu"
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0.1
self
.
max_position_embeddings
=
512
self
.
type_vocab_size
=
16
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
scope
=
None
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
mc_token_ids
=
None
if
self
.
use_mc_token_ids
:
mc_token_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
num_choices
],
self
.
seq_length
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
OpenAIGPTConfig
(
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
# intermediate_size=self.intermediate_size,
# hidden_act=self.hidden_act,
# hidden_dropout_prob=self.hidden_dropout_prob,
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
n_positions
=
self
.
max_position_embeddings
,
n_ctx
=
self
.
max_position_embeddings
# type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range
)
head_mask
=
ids_tensor
([
self
.
num_hidden_layers
,
self
.
num_attention_heads
],
2
)
return
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
def
create_and_check_openai_gpt_model
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
TFOpenAIGPTModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
sequence_output
=
model
(
inputs
)[
0
]
inputs
=
[
input_ids
,
input_mask
]
sequence_output
=
model
(
inputs
)[
0
]
sequence_output
=
model
(
input_ids
)[
0
]
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
def
create_and_check_openai_gpt_lm_head
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
TFOpenAIGPTLMHeadModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
prediction_scores
=
model
(
inputs
)[
0
]
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
create_and_check_openai_gpt_double_head
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
*
args
):
model
=
TFOpenAIGPTDoubleHeadsModel
(
config
=
config
)
multiple_choice_inputs_ids
=
tf
.
tile
(
tf
.
expand_dims
(
input_ids
,
1
),
(
1
,
self
.
num_choices
,
1
))
multiple_choice_input_mask
=
tf
.
tile
(
tf
.
expand_dims
(
input_mask
,
1
),
(
1
,
self
.
num_choices
,
1
))
multiple_choice_token_type_ids
=
tf
.
tile
(
tf
.
expand_dims
(
token_type_ids
,
1
),
(
1
,
self
.
num_choices
,
1
))
inputs
=
{
"input_ids"
:
multiple_choice_inputs_ids
,
"mc_token_ids"
:
mc_token_ids
,
"attention_mask"
:
multiple_choice_input_mask
,
"token_type_ids"
:
multiple_choice_token_type_ids
,
}
lm_logits
,
mc_logits
=
model
(
inputs
)[:
2
]
result
=
{
"lm_logits"
:
lm_logits
.
numpy
(),
"mc_logits"
:
mc_logits
.
numpy
()}
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_choices
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"mc_logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_choices
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
@
require_tf
class
TFOpenAIGPTModelTest
(
TFModelTesterMixin
,
unittest
.
TestCase
):
...
...
@@ -43,179 +192,8 @@ class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase):
(
TFOpenAIGPTLMHeadModel
,)
if
is_tf_available
()
else
()
)
# TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
class
TFOpenAIGPTModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_token_type_ids
=
True
,
use_input_mask
=
True
,
use_labels
=
True
,
use_mc_token_ids
=
True
,
vocab_size
=
99
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_input_mask
=
use_input_mask
self
.
use_labels
=
use_labels
self
.
use_mc_token_ids
=
use_mc_token_ids
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
hidden_act
=
hidden_act
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
mc_token_ids
=
None
if
self
.
use_mc_token_ids
:
mc_token_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
num_choices
],
self
.
seq_length
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
OpenAIGPTConfig
(
vocab_size
=
self
.
vocab_size
,
n_embd
=
self
.
hidden_size
,
n_layer
=
self
.
num_hidden_layers
,
n_head
=
self
.
num_attention_heads
,
# intermediate_size=self.intermediate_size,
# hidden_act=self.hidden_act,
# hidden_dropout_prob=self.hidden_dropout_prob,
# attention_probs_dropout_prob=self.attention_probs_dropout_prob,
n_positions
=
self
.
max_position_embeddings
,
n_ctx
=
self
.
max_position_embeddings
# type_vocab_size=self.type_vocab_size,
# initializer_range=self.initializer_range
)
head_mask
=
ids_tensor
([
self
.
num_hidden_layers
,
self
.
num_attention_heads
],
2
)
return
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
def
create_and_check_openai_gpt_model
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
TFOpenAIGPTModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
sequence_output
=
model
(
inputs
)[
0
]
inputs
=
[
input_ids
,
input_mask
]
sequence_output
=
model
(
inputs
)[
0
]
sequence_output
=
model
(
input_ids
)[
0
]
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
def
create_and_check_openai_gpt_lm_head
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
*
args
):
model
=
TFOpenAIGPTLMHeadModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
prediction_scores
=
model
(
inputs
)[
0
]
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
create_and_check_openai_gpt_double_head
(
self
,
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
*
args
):
model
=
TFOpenAIGPTDoubleHeadsModel
(
config
=
config
)
multiple_choice_inputs_ids
=
tf
.
tile
(
tf
.
expand_dims
(
input_ids
,
1
),
(
1
,
self
.
num_choices
,
1
))
multiple_choice_input_mask
=
tf
.
tile
(
tf
.
expand_dims
(
input_mask
,
1
),
(
1
,
self
.
num_choices
,
1
))
multiple_choice_token_type_ids
=
tf
.
tile
(
tf
.
expand_dims
(
token_type_ids
,
1
),
(
1
,
self
.
num_choices
,
1
))
inputs
=
{
"input_ids"
:
multiple_choice_inputs_ids
,
"mc_token_ids"
:
mc_token_ids
,
"attention_mask"
:
multiple_choice_input_mask
,
"token_type_ids"
:
multiple_choice_token_type_ids
,
}
lm_logits
,
mc_logits
=
model
(
inputs
)[:
2
]
result
=
{
"lm_logits"
:
lm_logits
.
numpy
(),
"mc_logits"
:
mc_logits
.
numpy
()}
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_choices
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"mc_logits"
].
shape
),
[
self
.
batch_size
,
self
.
num_choices
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
head_mask
,
token_type_ids
,
mc_token_ids
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
TFOpenAIGPTModelTest
.
TFOpenAIGPTModelTester
(
self
)
self
.
model_tester
=
TFOpenAIGPTModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
OpenAIGPTConfig
,
n_embd
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_tf_roberta.py
View file @
c852036b
...
...
@@ -36,6 +36,139 @@ if is_tf_available():
)
class
TFRobertaModelTester
:
def
__init__
(
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_input_mask
=
True
self
.
use_token_type_ids
=
True
self
.
use_labels
=
True
self
.
vocab_size
=
99
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
intermediate_size
=
37
self
.
hidden_act
=
"gelu"
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0.1
self
.
max_position_embeddings
=
512
self
.
type_vocab_size
=
16
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
scope
=
None
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
RobertaConfig
(
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
intermediate_size
=
self
.
intermediate_size
,
hidden_act
=
self
.
hidden_act
,
hidden_dropout_prob
=
self
.
hidden_dropout_prob
,
attention_probs_dropout_prob
=
self
.
attention_probs_dropout_prob
,
max_position_embeddings
=
self
.
max_position_embeddings
,
type_vocab_size
=
self
.
type_vocab_size
,
initializer_range
=
self
.
initializer_range
,
)
return
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
def
create_and_check_roberta_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFRobertaModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
sequence_output
=
model
(
inputs
)[
0
]
inputs
=
[
input_ids
,
input_mask
]
sequence_output
=
model
(
inputs
)[
0
]
sequence_output
=
model
(
input_ids
)[
0
]
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
def
create_and_check_roberta_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFRobertaForMaskedLM
(
config
=
config
)
prediction_scores
=
model
([
input_ids
,
input_mask
,
token_type_ids
])[
0
]
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
create_and_check_roberta_for_token_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
TFRobertaForTokenClassification
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
logits
,)
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
num_labels
])
def
create_and_check_roberta_for_question_answering
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFRobertaForQuestionAnswering
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
start_logits
,
end_logits
=
model
(
inputs
)
result
=
{
"start_logits"
:
start_logits
.
numpy
(),
"end_logits"
:
end_logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
@
require_tf
class
TFRobertaModelTest
(
TFModelTesterMixin
,
unittest
.
TestCase
):
...
...
@@ -51,164 +184,8 @@ class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase):
else
()
)
class
TFRobertaModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_mask
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
intermediate_size
=
37
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_input_mask
=
use_input_mask
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
intermediate_size
=
intermediate_size
self
.
hidden_act
=
hidden_act
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
sequence_labels
=
None
token_labels
=
None
choice_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
choice_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
num_choices
)
config
=
RobertaConfig
(
vocab_size
=
self
.
vocab_size
,
hidden_size
=
self
.
hidden_size
,
num_hidden_layers
=
self
.
num_hidden_layers
,
num_attention_heads
=
self
.
num_attention_heads
,
intermediate_size
=
self
.
intermediate_size
,
hidden_act
=
self
.
hidden_act
,
hidden_dropout_prob
=
self
.
hidden_dropout_prob
,
attention_probs_dropout_prob
=
self
.
attention_probs_dropout_prob
,
max_position_embeddings
=
self
.
max_position_embeddings
,
type_vocab_size
=
self
.
type_vocab_size
,
initializer_range
=
self
.
initializer_range
,
)
return
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
def
create_and_check_roberta_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFRobertaModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
sequence_output
=
model
(
inputs
)[
0
]
inputs
=
[
input_ids
,
input_mask
]
sequence_output
=
model
(
inputs
)[
0
]
sequence_output
=
model
(
input_ids
)[
0
]
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
def
create_and_check_roberta_for_masked_lm
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFRobertaForMaskedLM
(
config
=
config
)
prediction_scores
=
model
([
input_ids
,
input_mask
,
token_type_ids
])[
0
]
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
create_and_check_roberta_for_token_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
config
.
num_labels
=
self
.
num_labels
model
=
TFRobertaForTokenClassification
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
(
logits
,)
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
num_labels
]
)
def
create_and_check_roberta_for_question_answering
(
self
,
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
):
model
=
TFRobertaForQuestionAnswering
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
token_type_ids
}
start_logits
,
end_logits
=
model
(
inputs
)
result
=
{
"start_logits"
:
start_logits
.
numpy
(),
"end_logits"
:
end_logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_mask
,
sequence_labels
,
token_labels
,
choice_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"attention_mask"
:
input_mask
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
TFRobertaModelTest
.
TFRobertaModelTester
(
self
)
self
.
model_tester
=
TFRobertaModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
RobertaConfig
,
hidden_size
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_tf_t5.py
View file @
c852036b
...
...
@@ -28,6 +28,186 @@ if is_tf_available():
from
transformers
import
TFT5Model
,
TFT5ForConditionalGeneration
,
T5Tokenizer
class
TFT5ModelTester
:
def
__init__
(
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
seq_length
=
7
self
.
is_training
=
True
self
.
use_input_mask
=
True
self
.
use_labels
=
True
self
.
vocab_size
=
99
self
.
n_positions
=
14
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
d_ff
=
37
self
.
relative_attention_num_buckets
=
8
self
.
dropout_rate
=
0.1
self
.
initializer_factor
=
0.002
self
.
eos_token_id
=
1
self
.
pad_token_id
=
0
self
.
scope
=
None
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_labels
=
None
if
self
.
use_labels
:
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
config
=
T5Config
(
vocab_size
=
self
.
vocab_size
,
n_positions
=
self
.
n_positions
,
d_model
=
self
.
hidden_size
,
d_ff
=
self
.
d_ff
,
d_kv
=
self
.
hidden_size
//
self
.
num_attention_heads
,
num_layers
=
self
.
num_hidden_layers
,
num_heads
=
self
.
num_attention_heads
,
relative_attention_num_buckets
=
self
.
relative_attention_num_buckets
,
dropout_rate
=
self
.
dropout_rate
,
initializer_factor
=
self
.
initializer_factor
,
eos_token_id
=
self
.
eos_token_id
,
bos_token_id
=
self
.
pad_token_id
,
pad_token_id
=
self
.
pad_token_id
,
)
return
(
config
,
input_ids
,
input_mask
,
token_labels
)
def
create_and_check_t5_model
(
self
,
config
,
input_ids
,
input_mask
,
token_labels
):
model
=
TFT5Model
(
config
=
config
)
inputs
=
{
"inputs"
:
input_ids
,
"decoder_input_ids"
:
input_ids
,
"decoder_attention_mask"
:
input_mask
,
}
decoder_output
,
decoder_past
,
encoder_output
=
model
(
inputs
)
decoder_output
,
decoder_past
,
encoder_output
=
model
(
input_ids
,
decoder_attention_mask
=
input_mask
,
decoder_input_ids
=
input_ids
)
result
=
{
"encoder_output"
:
encoder_output
.
numpy
(),
"decoder_past"
:
decoder_past
,
"decoder_output"
:
decoder_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"encoder_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"decoder_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertEqual
(
len
(
decoder_past
),
2
)
# decoder_past[0] should correspond to encoder output
self
.
parent
.
assertTrue
(
tf
.
reduce_all
(
tf
.
math
.
equal
(
decoder_past
[
0
][
0
],
encoder_output
)))
# There should be `num_layers` key value embeddings stored in decoder_past[1]
self
.
parent
.
assertEqual
(
len
(
decoder_past
[
1
]),
config
.
num_layers
)
# There should be a self attn key, a self attn value, a cross attn key and a cross attn value stored in each decoder_past[1] tuple
self
.
parent
.
assertEqual
(
len
(
decoder_past
[
1
][
0
]),
4
)
def
create_and_check_t5_with_lm_head
(
self
,
config
,
input_ids
,
input_mask
,
token_labels
):
model
=
TFT5ForConditionalGeneration
(
config
=
config
)
inputs_dict
=
{
"inputs"
:
input_ids
,
"decoder_input_ids"
:
input_ids
,
"decoder_attention_mask"
:
input_mask
,
}
prediction_scores
,
_
,
_
=
model
(
inputs_dict
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
create_and_check_t5_decoder_model_past
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
):
model
=
TFT5Model
(
config
=
config
).
get_decoder
()
input_ids
=
input_ids
[:
1
,
:]
self
.
batch_size
=
1
# first forward pass
_
,
past_key_value_states
=
model
(
input_ids
,
use_cache
=
True
)
# create hypothetical next token and extent to next_input_ids
next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
)
# append to next input_ids and
next_input_ids
=
tf
.
concat
([
input_ids
,
next_tokens
],
axis
=-
1
)
output_from_no_past
=
model
(
next_input_ids
)[
0
]
output_from_past
=
model
(
next_tokens
,
past_key_value_states
=
past_key_value_states
)[
0
]
# select random slice
random_slice_idx
=
int
(
ids_tensor
((
1
,),
output_from_past
.
shape
[
-
1
]))
output_from_no_past_slice
=
output_from_no_past
[:,
-
1
,
random_slice_idx
]
output_from_past_slice
=
output_from_past
[:,
0
,
random_slice_idx
]
# test that outputs are equal for slice
tf
.
debugging
.
assert_near
(
output_from_past_slice
,
output_from_no_past_slice
,
rtol
=
1e-3
)
def
create_and_check_t5_decoder_model_attention_mask_past
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
):
model
=
TFT5Model
(
config
=
config
).
get_decoder
()
# create attention mask
half_seq_length
=
self
.
seq_length
//
2
attn_mask_begin
=
tf
.
ones
((
self
.
batch_size
,
half_seq_length
),
dtype
=
tf
.
int32
)
attn_mask_end
=
tf
.
zeros
((
self
.
batch_size
,
self
.
seq_length
-
half_seq_length
),
dtype
=
tf
.
int32
)
attn_mask
=
tf
.
concat
([
attn_mask_begin
,
attn_mask_end
],
axis
=
1
)
# first forward pass
_
,
past_key_value_states
=
model
(
input_ids
,
attention_mask
=
attn_mask
,
use_cache
=
True
)
# create hypothetical next token and extent to next_input_ids
next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
)
# change a random masked slice from input_ids
random_seq_idx_to_change
=
ids_tensor
((
1
,),
half_seq_length
).
numpy
()
+
1
random_other_next_tokens
=
ids_tensor
((
self
.
batch_size
,
self
.
seq_length
),
config
.
vocab_size
)
vector_condition
=
tf
.
range
(
self
.
seq_length
)
==
(
self
.
seq_length
-
random_seq_idx_to_change
)
condition
=
tf
.
transpose
(
tf
.
broadcast_to
(
tf
.
expand_dims
(
vector_condition
,
-
1
),
(
self
.
seq_length
,
self
.
batch_size
))
)
input_ids
=
tf
.
where
(
condition
,
random_other_next_tokens
,
input_ids
)
# append to next input_ids and attn_mask
next_input_ids
=
tf
.
concat
([
input_ids
,
next_tokens
],
axis
=-
1
)
attn_mask
=
tf
.
concat
([
attn_mask
,
tf
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
tf
.
int32
)],
axis
=
1
,)
# get two different outputs
output_from_no_past
=
model
(
next_input_ids
,
attention_mask
=
attn_mask
)[
0
]
output_from_past
=
model
(
next_tokens
,
past_key_value_states
=
past_key_value_states
,
attention_mask
=
attn_mask
)[
0
]
# select random slice
random_slice_idx
=
ids_tensor
((
1
,),
output_from_past
.
shape
[
-
1
]).
numpy
().
item
()
output_from_no_past_slice
=
output_from_no_past
[:,
-
1
,
random_slice_idx
]
output_from_past_slice
=
output_from_past
[:,
0
,
random_slice_idx
]
# test that outputs are equal for slice
tf
.
debugging
.
assert_near
(
output_from_past_slice
,
output_from_no_past_slice
,
rtol
=
1e-3
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
token_labels
)
=
config_and_inputs
inputs_dict
=
{
"inputs"
:
input_ids
,
"decoder_input_ids"
:
input_ids
,
"decoder_attention_mask"
:
input_mask
,
"use_cache"
:
tf
.
convert_to_tensor
([
False
]),
}
return
config
,
inputs_dict
@
require_tf
class
TFT5ModelTest
(
TFModelTesterMixin
,
unittest
.
TestCase
):
...
...
@@ -35,207 +215,8 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes
=
(
TFT5Model
,
TFT5ForConditionalGeneration
)
if
is_tf_available
()
else
()
all_generative_model_classes
=
(
TFT5ForConditionalGeneration
,)
if
is_tf_available
()
else
()
class
TFT5ModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_mask
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
n_positions
=
14
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
d_ff
=
37
,
relative_attention_num_buckets
=
8
,
dropout_rate
=
0.1
,
initializer_factor
=
0.002
,
eos_token_id
=
1
,
pad_token_id
=
0
,
scope
=
None
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_input_mask
=
use_input_mask
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
n_positions
=
n_positions
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
d_ff
=
d_ff
self
.
relative_attention_num_buckets
=
relative_attention_num_buckets
self
.
dropout_rate
=
dropout_rate
self
.
initializer_factor
=
initializer_factor
self
.
eos_token_id
=
eos_token_id
self
.
pad_token_id
=
pad_token_id
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
None
if
self
.
use_input_mask
:
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
vocab_size
=
2
)
token_labels
=
None
if
self
.
use_labels
:
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
config
=
T5Config
(
vocab_size
=
self
.
vocab_size
,
n_positions
=
self
.
n_positions
,
d_model
=
self
.
hidden_size
,
d_ff
=
self
.
d_ff
,
d_kv
=
self
.
hidden_size
//
self
.
num_attention_heads
,
num_layers
=
self
.
num_hidden_layers
,
num_heads
=
self
.
num_attention_heads
,
relative_attention_num_buckets
=
self
.
relative_attention_num_buckets
,
dropout_rate
=
self
.
dropout_rate
,
initializer_factor
=
self
.
initializer_factor
,
eos_token_id
=
self
.
eos_token_id
,
bos_token_id
=
self
.
pad_token_id
,
pad_token_id
=
self
.
pad_token_id
,
)
return
(
config
,
input_ids
,
input_mask
,
token_labels
)
def
create_and_check_t5_model
(
self
,
config
,
input_ids
,
input_mask
,
token_labels
):
model
=
TFT5Model
(
config
=
config
)
inputs
=
{
"inputs"
:
input_ids
,
"decoder_input_ids"
:
input_ids
,
"decoder_attention_mask"
:
input_mask
,
}
decoder_output
,
decoder_past
,
encoder_output
=
model
(
inputs
)
decoder_output
,
decoder_past
,
encoder_output
=
model
(
input_ids
,
decoder_attention_mask
=
input_mask
,
decoder_input_ids
=
input_ids
)
result
=
{
"encoder_output"
:
encoder_output
.
numpy
(),
"decoder_past"
:
decoder_past
,
"decoder_output"
:
decoder_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"encoder_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"decoder_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertEqual
(
len
(
decoder_past
),
2
)
# decoder_past[0] should correspond to encoder output
self
.
parent
.
assertTrue
(
tf
.
reduce_all
(
tf
.
math
.
equal
(
decoder_past
[
0
][
0
],
encoder_output
)))
# There should be `num_layers` key value embeddings stored in decoder_past[1]
self
.
parent
.
assertEqual
(
len
(
decoder_past
[
1
]),
config
.
num_layers
)
# There should be a self attn key, a self attn value, a cross attn key and a cross attn value stored in each decoder_past[1] tuple
self
.
parent
.
assertEqual
(
len
(
decoder_past
[
1
][
0
]),
4
)
def
create_and_check_t5_with_lm_head
(
self
,
config
,
input_ids
,
input_mask
,
token_labels
):
model
=
TFT5ForConditionalGeneration
(
config
=
config
)
inputs_dict
=
{
"inputs"
:
input_ids
,
"decoder_input_ids"
:
input_ids
,
"decoder_attention_mask"
:
input_mask
,
}
prediction_scores
,
_
,
_
=
model
(
inputs_dict
)
result
=
{
"prediction_scores"
:
prediction_scores
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"prediction_scores"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
create_and_check_t5_decoder_model_past
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
):
model
=
TFT5Model
(
config
=
config
).
get_decoder
()
input_ids
=
input_ids
[:
1
,
:]
self
.
batch_size
=
1
# first forward pass
_
,
past_key_value_states
=
model
(
input_ids
,
use_cache
=
True
)
# create hypothetical next token and extent to next_input_ids
next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
)
# append to next input_ids and
next_input_ids
=
tf
.
concat
([
input_ids
,
next_tokens
],
axis
=-
1
)
output_from_no_past
=
model
(
next_input_ids
)[
0
]
output_from_past
=
model
(
next_tokens
,
past_key_value_states
=
past_key_value_states
)[
0
]
# select random slice
random_slice_idx
=
int
(
ids_tensor
((
1
,),
output_from_past
.
shape
[
-
1
]))
output_from_no_past_slice
=
output_from_no_past
[:,
-
1
,
random_slice_idx
]
output_from_past_slice
=
output_from_past
[:,
0
,
random_slice_idx
]
# test that outputs are equal for slice
tf
.
debugging
.
assert_near
(
output_from_past_slice
,
output_from_no_past_slice
,
rtol
=
1e-3
)
def
create_and_check_t5_decoder_model_attention_mask_past
(
self
,
config
,
input_ids
,
decoder_input_ids
,
attention_mask
):
model
=
TFT5Model
(
config
=
config
).
get_decoder
()
# create attention mask
half_seq_length
=
self
.
seq_length
//
2
attn_mask_begin
=
tf
.
ones
((
self
.
batch_size
,
half_seq_length
),
dtype
=
tf
.
int32
)
attn_mask_end
=
tf
.
zeros
((
self
.
batch_size
,
self
.
seq_length
-
half_seq_length
),
dtype
=
tf
.
int32
)
attn_mask
=
tf
.
concat
([
attn_mask_begin
,
attn_mask_end
],
axis
=
1
)
# first forward pass
_
,
past_key_value_states
=
model
(
input_ids
,
attention_mask
=
attn_mask
,
use_cache
=
True
)
# create hypothetical next token and extent to next_input_ids
next_tokens
=
ids_tensor
((
self
.
batch_size
,
1
),
config
.
vocab_size
)
# change a random masked slice from input_ids
random_seq_idx_to_change
=
ids_tensor
((
1
,),
half_seq_length
).
numpy
()
+
1
random_other_next_tokens
=
ids_tensor
((
self
.
batch_size
,
self
.
seq_length
),
config
.
vocab_size
)
vector_condition
=
tf
.
range
(
self
.
seq_length
)
==
(
self
.
seq_length
-
random_seq_idx_to_change
)
condition
=
tf
.
transpose
(
tf
.
broadcast_to
(
tf
.
expand_dims
(
vector_condition
,
-
1
),
(
self
.
seq_length
,
self
.
batch_size
))
)
input_ids
=
tf
.
where
(
condition
,
random_other_next_tokens
,
input_ids
)
# append to next input_ids and attn_mask
next_input_ids
=
tf
.
concat
([
input_ids
,
next_tokens
],
axis
=-
1
)
attn_mask
=
tf
.
concat
([
attn_mask
,
tf
.
ones
((
attn_mask
.
shape
[
0
],
1
),
dtype
=
tf
.
int32
)],
axis
=
1
,)
# get two different outputs
output_from_no_past
=
model
(
next_input_ids
,
attention_mask
=
attn_mask
)[
0
]
output_from_past
=
model
(
next_tokens
,
past_key_value_states
=
past_key_value_states
,
attention_mask
=
attn_mask
)[
0
]
# select random slice
random_slice_idx
=
ids_tensor
((
1
,),
output_from_past
.
shape
[
-
1
]).
numpy
().
item
()
output_from_no_past_slice
=
output_from_no_past
[:,
-
1
,
random_slice_idx
]
output_from_past_slice
=
output_from_past
[:,
0
,
random_slice_idx
]
# test that outputs are equal for slice
tf
.
debugging
.
assert_near
(
output_from_past_slice
,
output_from_no_past_slice
,
rtol
=
1e-3
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
input_mask
,
token_labels
)
=
config_and_inputs
inputs_dict
=
{
"inputs"
:
input_ids
,
"decoder_input_ids"
:
input_ids
,
"decoder_attention_mask"
:
input_mask
,
"use_cache"
:
tf
.
convert_to_tensor
([
False
]),
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
TFT5ModelTest
.
TFT5ModelTester
(
self
)
self
.
model_tester
=
TFT5ModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
T5Config
,
d_model
=
37
)
def
test_config
(
self
):
...
...
tests/test_modeling_tf_transfo_xl.py
View file @
c852036b
...
...
@@ -33,6 +33,135 @@ if is_tf_available():
)
class
TFTransfoXLModelTester
:
def
__init__
(
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
13
self
.
seq_length
=
7
self
.
mem_len
=
30
self
.
key_length
=
self
.
seq_length
+
self
.
mem_len
self
.
clamp_len
=
15
self
.
is_training
=
True
self
.
use_labels
=
True
self
.
vocab_size
=
99
self
.
cutoffs
=
[
10
,
50
,
80
]
self
.
hidden_size
=
32
self
.
d_embed
=
32
self
.
num_attention_heads
=
4
self
.
d_head
=
8
self
.
d_inner
=
128
self
.
div_val
=
2
self
.
num_hidden_layers
=
5
self
.
scope
=
None
self
.
seed
=
1
self
.
eos_token_id
=
0
def
prepare_config_and_inputs
(
self
):
input_ids_1
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_ids_2
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
lm_labels
=
None
if
self
.
use_labels
:
lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
config
=
TransfoXLConfig
(
vocab_size
=
self
.
vocab_size
,
mem_len
=
self
.
mem_len
,
clamp_len
=
self
.
clamp_len
,
cutoffs
=
self
.
cutoffs
,
d_model
=
self
.
hidden_size
,
d_embed
=
self
.
d_embed
,
n_head
=
self
.
num_attention_heads
,
d_head
=
self
.
d_head
,
d_inner
=
self
.
d_inner
,
div_val
=
self
.
div_val
,
n_layer
=
self
.
num_hidden_layers
,
eos_token_id
=
self
.
eos_token_id
,
)
return
(
config
,
input_ids_1
,
input_ids_2
,
lm_labels
)
def
set_seed
(
self
):
random
.
seed
(
self
.
seed
)
tf
.
random
.
set_seed
(
self
.
seed
)
def
create_and_check_transfo_xl_model
(
self
,
config
,
input_ids_1
,
input_ids_2
,
lm_labels
):
model
=
TFTransfoXLModel
(
config
)
hidden_states_1
,
mems_1
=
model
(
input_ids_1
)
inputs
=
{
"input_ids"
:
input_ids_2
,
"mems"
:
mems_1
}
hidden_states_2
,
mems_2
=
model
(
inputs
)
result
=
{
"hidden_states_1"
:
hidden_states_1
.
numpy
(),
"mems_1"
:
[
mem
.
numpy
()
for
mem
in
mems_1
],
"hidden_states_2"
:
hidden_states_2
.
numpy
(),
"mems_2"
:
[
mem
.
numpy
()
for
mem
in
mems_2
],
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"hidden_states_1"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"hidden_states_2"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_1"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_2"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
create_and_check_transfo_xl_lm_head
(
self
,
config
,
input_ids_1
,
input_ids_2
,
lm_labels
):
model
=
TFTransfoXLLMHeadModel
(
config
)
lm_logits_1
,
mems_1
=
model
(
input_ids_1
)
inputs
=
{
"input_ids"
:
input_ids_1
,
"labels"
:
lm_labels
}
_
,
mems_1
=
model
(
inputs
)
lm_logits_2
,
mems_2
=
model
([
input_ids_2
,
mems_1
])
inputs
=
{
"input_ids"
:
input_ids_1
,
"mems"
:
mems_1
,
"labels"
:
lm_labels
}
_
,
mems_2
=
model
(
inputs
)
result
=
{
"mems_1"
:
[
mem
.
numpy
()
for
mem
in
mems_1
],
"lm_logits_1"
:
lm_logits_1
.
numpy
(),
"mems_2"
:
[
mem
.
numpy
()
for
mem
in
mems_2
],
"lm_logits_2"
:
lm_logits_2
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits_1"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_1"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits_2"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_2"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids_1
,
input_ids_2
,
lm_labels
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids_1
}
return
config
,
inputs_dict
@
require_tf
class
TFTransfoXLModelTest
(
TFModelTesterMixin
,
unittest
.
TestCase
):
...
...
@@ -43,155 +172,8 @@ class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase):
test_torchscript
=
False
test_resize_embeddings
=
False
class
TFTransfoXLModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
mem_len
=
30
,
clamp_len
=
15
,
is_training
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
cutoffs
=
[
10
,
50
,
80
],
hidden_size
=
32
,
d_embed
=
32
,
num_attention_heads
=
4
,
d_head
=
8
,
d_inner
=
128
,
div_val
=
2
,
num_hidden_layers
=
5
,
scope
=
None
,
seed
=
1
,
eos_token_id
=
0
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
mem_len
=
mem_len
self
.
key_length
=
seq_length
+
mem_len
self
.
clamp_len
=
clamp_len
self
.
is_training
=
is_training
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
cutoffs
=
cutoffs
self
.
hidden_size
=
hidden_size
self
.
d_embed
=
d_embed
self
.
num_attention_heads
=
num_attention_heads
self
.
d_head
=
d_head
self
.
d_inner
=
d_inner
self
.
div_val
=
div_val
self
.
num_hidden_layers
=
num_hidden_layers
self
.
scope
=
scope
self
.
seed
=
seed
self
.
eos_token_id
=
eos_token_id
def
prepare_config_and_inputs
(
self
):
input_ids_1
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_ids_2
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
lm_labels
=
None
if
self
.
use_labels
:
lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
config
=
TransfoXLConfig
(
vocab_size
=
self
.
vocab_size
,
mem_len
=
self
.
mem_len
,
clamp_len
=
self
.
clamp_len
,
cutoffs
=
self
.
cutoffs
,
d_model
=
self
.
hidden_size
,
d_embed
=
self
.
d_embed
,
n_head
=
self
.
num_attention_heads
,
d_head
=
self
.
d_head
,
d_inner
=
self
.
d_inner
,
div_val
=
self
.
div_val
,
n_layer
=
self
.
num_hidden_layers
,
eos_token_id
=
self
.
eos_token_id
,
)
return
(
config
,
input_ids_1
,
input_ids_2
,
lm_labels
)
def
set_seed
(
self
):
random
.
seed
(
self
.
seed
)
tf
.
random
.
set_seed
(
self
.
seed
)
def
create_and_check_transfo_xl_model
(
self
,
config
,
input_ids_1
,
input_ids_2
,
lm_labels
):
model
=
TFTransfoXLModel
(
config
)
hidden_states_1
,
mems_1
=
model
(
input_ids_1
)
inputs
=
{
"input_ids"
:
input_ids_2
,
"mems"
:
mems_1
}
hidden_states_2
,
mems_2
=
model
(
inputs
)
result
=
{
"hidden_states_1"
:
hidden_states_1
.
numpy
(),
"mems_1"
:
[
mem
.
numpy
()
for
mem
in
mems_1
],
"hidden_states_2"
:
hidden_states_2
.
numpy
(),
"mems_2"
:
[
mem
.
numpy
()
for
mem
in
mems_2
],
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"hidden_states_1"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"hidden_states_2"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_1"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_2"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
create_and_check_transfo_xl_lm_head
(
self
,
config
,
input_ids_1
,
input_ids_2
,
lm_labels
):
model
=
TFTransfoXLLMHeadModel
(
config
)
lm_logits_1
,
mems_1
=
model
(
input_ids_1
)
inputs
=
{
"input_ids"
:
input_ids_1
,
"labels"
:
lm_labels
}
_
,
mems_1
=
model
(
inputs
)
lm_logits_2
,
mems_2
=
model
([
input_ids_2
,
mems_1
])
inputs
=
{
"input_ids"
:
input_ids_1
,
"mems"
:
mems_1
,
"labels"
:
lm_labels
}
_
,
mems_2
=
model
(
inputs
)
result
=
{
"mems_1"
:
[
mem
.
numpy
()
for
mem
in
mems_1
],
"lm_logits_1"
:
lm_logits_1
.
numpy
(),
"mems_2"
:
[
mem
.
numpy
()
for
mem
in
mems_2
],
"lm_logits_2"
:
lm_logits_2
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits_1"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_1"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits_2"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_2"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids_1
,
input_ids_2
,
lm_labels
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids_1
}
return
config
,
inputs_dict
def
setUp
(
self
):
self
.
model_tester
=
TFTransfoXLModelTest
.
TFTransfoXLModelTester
(
self
)
self
.
model_tester
=
TFTransfoXLModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
TransfoXLConfig
,
d_embed
=
37
)
def
test_config
(
self
):
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment