Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
c852036b
Unverified
Commit
c852036b
authored
Jun 16, 2020
by
Amil Khare
Committed by
GitHub
Jun 16, 2020
Browse files
[cleanup] Hoist ModelTester objects to top level (#4939)
Co-authored-by:
Sam Shleifer
<
sshleifer@gmail.com
>
parent
0c55a384
Changes
25
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
1297 additions
and
1410 deletions
+1297
-1410
tests/test_modeling_tf_xlm.py
tests/test_modeling_tf_xlm.py
+198
-229
tests/test_modeling_tf_xlnet.py
tests/test_modeling_tf_xlnet.py
+275
-298
tests/test_modeling_transfo_xl.py
tests/test_modeling_transfo_xl.py
+132
-150
tests/test_modeling_xlm.py
tests/test_modeling_xlm.py
+294
-331
tests/test_modeling_xlnet.py
tests/test_modeling_xlnet.py
+398
-402
No files found.
tests/test_modeling_tf_xlm.py
View file @
c852036b
...
@@ -35,137 +35,83 @@ if is_tf_available():
...
@@ -35,137 +35,83 @@ if is_tf_available():
)
)
@
require_tf
class
TFXLMModelTester
:
class
TFXLMModelTest
(
TFModelTesterMixin
,
unittest
.
TestCase
):
def
__init__
(
self
,
parent
,
all_model_classes
=
(
):
(
TFXLMModel
,
TFXLMWithLMHeadModel
,
TFXLMForSequenceClassification
,
TFXLMForQuestionAnsweringSimple
)
self
.
parent
=
parent
if
is_tf_available
()
self
.
batch_size
=
13
else
()
self
.
seq_length
=
7
)
self
.
is_training
=
True
all_generative_model_classes
=
(
self
.
use_input_lengths
=
True
(
TFXLMWithLMHeadModel
,)
if
is_tf_available
()
else
()
self
.
use_token_type_ids
=
True
)
# TODO (PVP): Check other models whether language generation is also applicable
self
.
use_labels
=
True
self
.
gelu_activation
=
True
class
TFXLMModelTester
(
object
):
self
.
sinusoidal_embeddings
=
False
def
__init__
(
self
.
causal
=
False
self
,
self
.
asm
=
False
parent
,
self
.
n_langs
=
2
batch_size
=
13
,
self
.
vocab_size
=
99
seq_length
=
7
,
self
.
n_special
=
0
is_training
=
True
,
self
.
hidden_size
=
32
use_input_lengths
=
True
,
self
.
num_hidden_layers
=
5
use_token_type_ids
=
True
,
self
.
num_attention_heads
=
4
use_labels
=
True
,
self
.
hidden_dropout_prob
=
0.1
gelu_activation
=
True
,
self
.
attention_probs_dropout_prob
=
0.1
sinusoidal_embeddings
=
False
,
self
.
max_position_embeddings
=
512
causal
=
False
,
self
.
type_vocab_size
=
16
asm
=
False
,
self
.
type_sequence_label_size
=
2
n_langs
=
2
,
self
.
initializer_range
=
0.02
vocab_size
=
99
,
self
.
num_labels
=
3
n_special
=
0
,
self
.
num_choices
=
4
hidden_size
=
32
,
self
.
summary_type
=
"last"
num_hidden_layers
=
5
,
self
.
use_proj
=
True
num_attention_heads
=
4
,
self
.
scope
=
None
hidden_dropout_prob
=
0.1
,
self
.
bos_token_id
=
0
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
def
prepare_config_and_inputs
(
self
):
type_vocab_size
=
16
,
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
type_sequence_label_size
=
2
,
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
2
,
dtype
=
tf
.
float32
)
initializer_range
=
0.02
,
num_labels
=
3
,
input_lengths
=
None
num_choices
=
4
,
if
self
.
use_input_lengths
:
summary_type
=
"last"
,
input_lengths
=
(
use_proj
=
True
,
ids_tensor
([
self
.
batch_size
],
vocab_size
=
2
)
+
self
.
seq_length
-
2
scope
=
None
,
)
# small variation of seq_length
bos_token_id
=
0
,
):
token_type_ids
=
None
self
.
parent
=
parent
if
self
.
use_token_type_ids
:
self
.
batch_size
=
batch_size
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
n_langs
)
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
sequence_labels
=
None
self
.
use_input_lengths
=
use_input_lengths
token_labels
=
None
self
.
use_token_type_ids
=
use_token_type_ids
is_impossible_labels
=
None
self
.
use_labels
=
use_labels
if
self
.
use_labels
:
self
.
gelu_activation
=
gelu_activation
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
self
.
sinusoidal_embeddings
=
sinusoidal_embeddings
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
self
.
asm
=
asm
is_impossible_labels
=
ids_tensor
([
self
.
batch_size
],
2
,
dtype
=
tf
.
float32
)
self
.
n_langs
=
n_langs
self
.
vocab_size
=
vocab_size
config
=
XLMConfig
(
self
.
n_special
=
n_special
vocab_size
=
self
.
vocab_size
,
self
.
summary_type
=
summary_type
n_special
=
self
.
n_special
,
self
.
causal
=
causal
emb_dim
=
self
.
hidden_size
,
self
.
use_proj
=
use_proj
n_layers
=
self
.
num_hidden_layers
,
self
.
hidden_size
=
hidden_size
n_heads
=
self
.
num_attention_heads
,
self
.
num_hidden_layers
=
num_hidden_layers
dropout
=
self
.
hidden_dropout_prob
,
self
.
num_attention_heads
=
num_attention_heads
attention_dropout
=
self
.
attention_probs_dropout_prob
,
self
.
hidden_dropout_prob
=
hidden_dropout_prob
gelu_activation
=
self
.
gelu_activation
,
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
sinusoidal_embeddings
=
self
.
sinusoidal_embeddings
,
self
.
max_position_embeddings
=
max_position_embeddings
asm
=
self
.
asm
,
self
.
n_langs
=
n_langs
causal
=
self
.
causal
,
self
.
type_sequence_label_size
=
type_sequence_label_size
n_langs
=
self
.
n_langs
,
self
.
initializer_range
=
initializer_range
max_position_embeddings
=
self
.
max_position_embeddings
,
self
.
summary_type
=
summary_type
initializer_range
=
self
.
initializer_range
,
self
.
num_labels
=
num_labels
summary_type
=
self
.
summary_type
,
self
.
num_choices
=
num_choices
use_proj
=
self
.
use_proj
,
self
.
scope
=
scope
bos_token_id
=
self
.
bos_token_id
,
self
.
bos_token_id
=
bos_token_id
)
def
prepare_config_and_inputs
(
self
):
return
(
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
2
,
dtype
=
tf
.
float32
)
input_lengths
=
None
if
self
.
use_input_lengths
:
input_lengths
=
(
ids_tensor
([
self
.
batch_size
],
vocab_size
=
2
)
+
self
.
seq_length
-
2
)
# small variation of seq_length
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
n_langs
)
sequence_labels
=
None
token_labels
=
None
is_impossible_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
is_impossible_labels
=
ids_tensor
([
self
.
batch_size
],
2
,
dtype
=
tf
.
float32
)
config
=
XLMConfig
(
vocab_size
=
self
.
vocab_size
,
n_special
=
self
.
n_special
,
emb_dim
=
self
.
hidden_size
,
n_layers
=
self
.
num_hidden_layers
,
n_heads
=
self
.
num_attention_heads
,
dropout
=
self
.
hidden_dropout_prob
,
attention_dropout
=
self
.
attention_probs_dropout_prob
,
gelu_activation
=
self
.
gelu_activation
,
sinusoidal_embeddings
=
self
.
sinusoidal_embeddings
,
asm
=
self
.
asm
,
causal
=
self
.
causal
,
n_langs
=
self
.
n_langs
,
max_position_embeddings
=
self
.
max_position_embeddings
,
initializer_range
=
self
.
initializer_range
,
summary_type
=
self
.
summary_type
,
use_proj
=
self
.
use_proj
,
bos_token_id
=
self
.
bos_token_id
,
)
return
(
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
)
def
create_and_check_xlm_model
(
self
,
config
,
config
,
input_ids
,
input_ids
,
token_type_ids
,
token_type_ids
,
...
@@ -174,23 +120,108 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
...
@@ -174,23 +120,108 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
token_labels
,
token_labels
,
is_impossible_labels
,
is_impossible_labels
,
input_mask
,
input_mask
,
):
)
model
=
TFXLMModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"lengths"
:
input_lengths
,
"langs"
:
token_type_ids
}
def
create_and_check_xlm_model
(
outputs
=
model
(
inputs
)
self
,
config
,
inputs
=
[
input_ids
,
input_mask
]
input_ids
,
outputs
=
model
(
inputs
)
token_type_ids
,
sequence_output
=
outputs
[
0
]
input_lengths
,
result
=
{
sequence_labels
,
"sequence_output"
:
sequence_output
.
numpy
(),
token_labels
,
}
is_impossible_labels
,
self
.
parent
.
assertListEqual
(
input_mask
,
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
):
)
model
=
TFXLMModel
(
config
=
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"lengths"
:
input_lengths
,
"langs"
:
token_type_ids
}
def
create_and_check_xlm_lm_head
(
outputs
=
model
(
inputs
)
self
,
inputs
=
[
input_ids
,
input_mask
]
outputs
=
model
(
inputs
)
sequence_output
=
outputs
[
0
]
result
=
{
"sequence_output"
:
sequence_output
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
def
create_and_check_xlm_lm_head
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
TFXLMWithLMHeadModel
(
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"lengths"
:
input_lengths
,
"langs"
:
token_type_ids
}
outputs
=
model
(
inputs
)
logits
=
outputs
[
0
]
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
])
def
create_and_check_xlm_qa
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
TFXLMForQuestionAnsweringSimple
(
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"lengths"
:
input_lengths
}
start_logits
,
end_logits
=
model
(
inputs
)
result
=
{
"start_logits"
:
start_logits
.
numpy
(),
"end_logits"
:
end_logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
def
create_and_check_xlm_sequence_classif
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
TFXLMForSequenceClassification
(
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"lengths"
:
input_lengths
}
(
logits
,)
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
type_sequence_label_size
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
config
,
input_ids
,
input_ids
,
token_type_ids
,
token_type_ids
,
...
@@ -199,92 +230,30 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
...
@@ -199,92 +230,30 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
token_labels
,
token_labels
,
is_impossible_labels
,
is_impossible_labels
,
input_mask
,
input_mask
,
):
)
=
config_and_inputs
model
=
TFXLMWithLMHeadModel
(
config
)
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"langs"
:
token_type_ids
,
"lengths"
:
input_lengths
,
}
return
config
,
inputs_dict
inputs
=
{
"input_ids"
:
input_ids
,
"lengths"
:
input_lengths
,
"langs"
:
token_type_ids
}
outputs
=
model
(
inputs
)
logits
=
outputs
[
0
]
@
require_tf
class
TFXLMModelTest
(
TFModelTesterMixin
,
unittest
.
TestCase
):
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
create_and_check_xlm_qa
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
TFXLMForQuestionAnsweringSimple
(
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"lengths"
:
input_lengths
}
start_logits
,
end_logits
=
model
(
inputs
)
result
=
{
"start_logits"
:
start_logits
.
numpy
(),
"end_logits"
:
end_logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
def
create_and_check_xlm_sequence_classif
(
all_model_classes
=
(
self
,
(
TFXLMModel
,
TFXLMWithLMHeadModel
,
TFXLMForSequenceClassification
,
TFXLMForQuestionAnsweringSimple
)
config
,
if
is_tf_available
()
input_ids
,
else
()
token_type_ids
,
)
input_lengths
,
all_generative_model_classes
=
(
sequence_labels
,
(
TFXLMWithLMHeadModel
,)
if
is_tf_available
()
else
()
token_labels
,
)
# TODO (PVP): Check other models whether language generation is also applicable
is_impossible_labels
,
input_mask
,
):
model
=
TFXLMForSequenceClassification
(
config
)
inputs
=
{
"input_ids"
:
input_ids
,
"lengths"
:
input_lengths
}
(
logits
,)
=
model
(
inputs
)
result
=
{
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
type_sequence_label_size
])
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"langs"
:
token_type_ids
,
"lengths"
:
input_lengths
,
}
return
config
,
inputs_dict
def
setUp
(
self
):
def
setUp
(
self
):
self
.
model_tester
=
TFXLMModelTest
.
TFXLMModelTester
(
self
)
self
.
model_tester
=
TFXLMModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
XLMConfig
,
emb_dim
=
37
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
XLMConfig
,
emb_dim
=
37
)
def
test_config
(
self
):
def
test_config
(
self
):
...
...
tests/test_modeling_tf_xlnet.py
View file @
c852036b
...
@@ -37,142 +37,80 @@ if is_tf_available():
...
@@ -37,142 +37,80 @@ if is_tf_available():
)
)
@
require_tf
class
TFXLNetModelTester
:
class
TFXLNetModelTest
(
TFModelTesterMixin
,
unittest
.
TestCase
):
def
__init__
(
self
,
parent
,
all_model_classes
=
(
):
(
self
.
parent
=
parent
TFXLNetModel
,
self
.
batch_size
=
13
TFXLNetLMHeadModel
,
self
.
seq_length
=
7
TFXLNetForSequenceClassification
,
self
.
mem_len
=
10
TFXLNetForTokenClassification
,
# self.key_len = seq_length + mem_len
TFXLNetForQuestionAnsweringSimple
,
self
.
clamp_len
=
-
1
self
.
reuse_len
=
15
self
.
is_training
=
True
self
.
use_labels
=
True
self
.
vocab_size
=
99
self
.
cutoffs
=
[
10
,
50
,
80
]
self
.
hidden_size
=
32
self
.
num_attention_heads
=
4
self
.
d_inner
=
128
self
.
num_hidden_layers
=
5
self
.
type_sequence_label_size
=
2
self
.
untie_r
=
True
self
.
bi_data
=
False
self
.
same_length
=
False
self
.
initializer_range
=
0.05
self
.
seed
=
1
self
.
type_vocab_size
=
2
self
.
bos_token_id
=
1
self
.
eos_token_id
=
2
self
.
pad_token_id
=
5
def
prepare_config_and_inputs
(
self
):
input_ids_1
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_ids_2
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
segment_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
2
,
dtype
=
tf
.
float32
)
input_ids_q
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
+
1
],
self
.
vocab_size
)
perm_mask
=
tf
.
zeros
((
self
.
batch_size
,
self
.
seq_length
+
1
,
self
.
seq_length
),
dtype
=
tf
.
float32
)
perm_mask_last
=
tf
.
ones
((
self
.
batch_size
,
self
.
seq_length
+
1
,
1
),
dtype
=
tf
.
float32
)
perm_mask
=
tf
.
concat
([
perm_mask
,
perm_mask_last
],
axis
=-
1
)
# perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping
=
tf
.
zeros
((
self
.
batch_size
,
1
,
self
.
seq_length
),
dtype
=
tf
.
float32
)
target_mapping_last
=
tf
.
ones
((
self
.
batch_size
,
1
,
1
),
dtype
=
tf
.
float32
)
target_mapping
=
tf
.
concat
([
target_mapping
,
target_mapping_last
],
axis
=-
1
)
# target_mapping[:, 0, -1] = 1.0 # predict last token
sequence_labels
=
None
lm_labels
=
None
is_impossible_labels
=
None
if
self
.
use_labels
:
lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
is_impossible_labels
=
ids_tensor
([
self
.
batch_size
],
2
,
dtype
=
tf
.
float32
)
config
=
XLNetConfig
(
vocab_size
=
self
.
vocab_size
,
d_model
=
self
.
hidden_size
,
n_head
=
self
.
num_attention_heads
,
d_inner
=
self
.
d_inner
,
n_layer
=
self
.
num_hidden_layers
,
untie_r
=
self
.
untie_r
,
mem_len
=
self
.
mem_len
,
clamp_len
=
self
.
clamp_len
,
same_length
=
self
.
same_length
,
reuse_len
=
self
.
reuse_len
,
bi_data
=
self
.
bi_data
,
initializer_range
=
self
.
initializer_range
,
num_labels
=
self
.
type_sequence_label_size
,
bos_token_id
=
self
.
bos_token_id
,
pad_token_id
=
self
.
pad_token_id
,
eos_token_id
=
self
.
eos_token_id
,
)
)
if
is_tf_available
()
else
()
)
all_generative_model_classes
=
(
(
TFXLNetLMHeadModel
,)
if
is_tf_available
()
else
()
)
# TODO (PVP): Check other models whether language generation is also applicable
test_pruning
=
False
class
TFXLNetModelTester
(
object
):
return
(
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
mem_len
=
10
,
clamp_len
=-
1
,
reuse_len
=
15
,
is_training
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
cutoffs
=
[
10
,
50
,
80
],
hidden_size
=
32
,
num_attention_heads
=
4
,
d_inner
=
128
,
num_hidden_layers
=
5
,
type_sequence_label_size
=
2
,
untie_r
=
True
,
bi_data
=
False
,
same_length
=
False
,
initializer_range
=
0.05
,
seed
=
1
,
type_vocab_size
=
2
,
bos_token_id
=
1
,
eos_token_id
=
2
,
pad_token_id
=
5
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
mem_len
=
mem_len
# self.key_len = seq_length + mem_len
self
.
clamp_len
=
clamp_len
self
.
reuse_len
=
reuse_len
self
.
is_training
=
is_training
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
cutoffs
=
cutoffs
self
.
hidden_size
=
hidden_size
self
.
num_attention_heads
=
num_attention_heads
self
.
d_inner
=
d_inner
self
.
num_hidden_layers
=
num_hidden_layers
self
.
bi_data
=
bi_data
self
.
untie_r
=
untie_r
self
.
same_length
=
same_length
self
.
initializer_range
=
initializer_range
self
.
seed
=
seed
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
bos_token_id
=
bos_token_id
self
.
pad_token_id
=
pad_token_id
self
.
eos_token_id
=
eos_token_id
def
prepare_config_and_inputs
(
self
):
input_ids_1
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_ids_2
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
segment_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
2
,
dtype
=
tf
.
float32
)
input_ids_q
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
+
1
],
self
.
vocab_size
)
perm_mask
=
tf
.
zeros
((
self
.
batch_size
,
self
.
seq_length
+
1
,
self
.
seq_length
),
dtype
=
tf
.
float32
)
perm_mask_last
=
tf
.
ones
((
self
.
batch_size
,
self
.
seq_length
+
1
,
1
),
dtype
=
tf
.
float32
)
perm_mask
=
tf
.
concat
([
perm_mask
,
perm_mask_last
],
axis
=-
1
)
# perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping
=
tf
.
zeros
((
self
.
batch_size
,
1
,
self
.
seq_length
),
dtype
=
tf
.
float32
)
target_mapping_last
=
tf
.
ones
((
self
.
batch_size
,
1
,
1
),
dtype
=
tf
.
float32
)
target_mapping
=
tf
.
concat
([
target_mapping
,
target_mapping_last
],
axis
=-
1
)
# target_mapping[:, 0, -1] = 1.0 # predict last token
sequence_labels
=
None
lm_labels
=
None
is_impossible_labels
=
None
if
self
.
use_labels
:
lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
is_impossible_labels
=
ids_tensor
([
self
.
batch_size
],
2
,
dtype
=
tf
.
float32
)
config
=
XLNetConfig
(
vocab_size
=
self
.
vocab_size
,
d_model
=
self
.
hidden_size
,
n_head
=
self
.
num_attention_heads
,
d_inner
=
self
.
d_inner
,
n_layer
=
self
.
num_hidden_layers
,
untie_r
=
self
.
untie_r
,
mem_len
=
self
.
mem_len
,
clamp_len
=
self
.
clamp_len
,
same_length
=
self
.
same_length
,
reuse_len
=
self
.
reuse_len
,
bi_data
=
self
.
bi_data
,
initializer_range
=
self
.
initializer_range
,
num_labels
=
self
.
type_sequence_label_size
,
bos_token_id
=
self
.
bos_token_id
,
pad_token_id
=
self
.
pad_token_id
,
eos_token_id
=
self
.
eos_token_id
,
)
return
(
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
)
def
set_seed
(
self
):
random
.
seed
(
self
.
seed
)
tf
.
random
.
set_seed
(
self
.
seed
)
def
create_and_check_xlnet_base_model
(
self
,
config
,
config
,
input_ids_1
,
input_ids_1
,
input_ids_2
,
input_ids_2
,
...
@@ -184,120 +122,203 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
...
@@ -184,120 +122,203 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
lm_labels
,
lm_labels
,
sequence_labels
,
sequence_labels
,
is_impossible_labels
,
is_impossible_labels
,
):
)
model
=
TFXLNetModel
(
config
)
inputs
=
{
"input_ids"
:
input_ids_1
,
"input_mask"
:
input_mask
,
"token_type_ids"
:
segment_ids
}
_
,
_
=
model
(
inputs
)
inputs
=
[
input_ids_1
,
input_mask
]
outputs
,
mems_1
=
model
(
inputs
)
def
set_seed
(
self
):
random
.
seed
(
self
.
seed
)
tf
.
random
.
set_seed
(
self
.
seed
)
def
create_and_check_xlnet_base_model
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
):
model
=
TFXLNetModel
(
config
)
inputs
=
{
"input_ids"
:
input_ids_1
,
"input_mask"
:
input_mask
,
"token_type_ids"
:
segment_ids
}
_
,
_
=
model
(
inputs
)
inputs
=
[
input_ids_1
,
input_mask
]
outputs
,
mems_1
=
model
(
inputs
)
result
=
{
"mems_1"
:
[
mem
.
numpy
()
for
mem
in
mems_1
],
"outputs"
:
outputs
.
numpy
(),
}
config
.
mem_len
=
0
model
=
TFXLNetModel
(
config
)
no_mems_outputs
=
model
(
inputs
)
self
.
parent
.
assertEqual
(
len
(
no_mems_outputs
),
1
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"outputs"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_1"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
result
=
{
def
create_and_check_xlnet_lm_head
(
"mems_1"
:
[
mem
.
numpy
()
for
mem
in
mems_1
],
self
,
"outputs"
:
outputs
.
numpy
(),
config
,
}
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
):
model
=
TFXLNetLMHeadModel
(
config
)
config
.
mem_len
=
0
inputs_1
=
{
"input_ids"
:
input_ids_1
,
"token_type_ids"
:
segment_ids
}
model
=
TFXLNetModel
(
config
)
no_mems_outputs
=
model
(
inputs
)
self
.
parent
.
assertEqual
(
len
(
no_mems_outputs
),
1
)
self
.
parent
.
assertListEqual
(
all_logits_1
,
mems_1
=
model
(
inputs_1
)
list
(
result
[
"outputs"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_1"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
create_and_check_xlnet_lm_head
(
inputs_2
=
{
"input_ids"
:
input_ids_2
,
"mems"
:
mems_1
,
"token_type_ids"
:
segment_ids
}
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
):
model
=
TFXLNetLMHeadModel
(
config
)
inputs_1
=
{
"input_ids"
:
input_id
s_
1
,
"token_type_ids"
:
segment_ids
}
all_logit
s_
2
,
mems_2
=
model
(
inputs_2
)
all_logit
s_
1
,
mems_1
=
model
(
inputs_1
)
inputs_3
=
{
"input_ids"
:
input_id
s_
q
,
"perm_mask"
:
perm_mask
,
"target_mapping"
:
target_mapping
}
inputs_2
=
{
"input_ids"
:
input_ids_2
,
"mems"
:
mems_1
,
"token_type_ids"
:
segment_ids
}
logits
,
_
=
model
(
inputs_3
)
all_logits_2
,
mems_2
=
model
(
inputs_2
)
result
=
{
"mems_1"
:
[
mem
.
numpy
()
for
mem
in
mems_1
],
"all_logits_1"
:
all_logits_1
.
numpy
(),
"mems_2"
:
[
mem
.
numpy
()
for
mem
in
mems_2
],
"all_logits_2"
:
all_logits_2
.
numpy
(),
}
inputs_3
=
{
"input_ids"
:
input_ids_q
,
"perm_mask"
:
perm_mask
,
"target_mapping"
:
target_mapping
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"all_logits_1"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_1"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
logits
,
_
=
model
(
inputs_3
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"all_logits_2"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_2"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
result
=
{
def
create_and_check_xlnet_qa
(
"mems_1"
:
[
mem
.
numpy
()
for
mem
in
mems_1
],
self
,
"all_logits_1"
:
all_logits_1
.
numpy
(),
config
,
"mems_2"
:
[
mem
.
numpy
()
for
mem
in
mems_2
],
input_ids_1
,
"all_logits_2"
:
all_logits_2
.
numpy
(),
input_ids_2
,
}
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
):
model
=
TFXLNetForQuestionAnsweringSimple
(
config
)
inputs
=
{
"input_ids"
:
input_ids_1
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
segment_ids
}
start_logits
,
end_logits
,
mems
=
model
(
inputs
)
result
=
{
"start_logits"
:
start_logits
.
numpy
(),
"end_logits"
:
end_logits
.
numpy
(),
"mems"
:
[
m
.
numpy
()
for
m
in
mems
],
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
self
.
parent
.
assertListEqual
(
def
create_and_check_xlnet_sequence_classif
(
list
(
result
[
"all_logits_1"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
self
,
)
config
,
self
.
parent
.
assertListEqual
(
input_ids_1
,
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_1"
]),
input_ids_2
,
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
input_ids_q
,
)
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
):
model
=
TFXLNetForSequenceClassification
(
config
)
logits
,
mems_1
=
model
(
input_ids_1
)
result
=
{
"mems_1"
:
[
mem
.
numpy
()
for
mem
in
mems_1
],
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
type_sequence_label_size
])
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_1"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
self
.
parent
.
assertListEqual
(
def
create_and_check_xlnet_for_token_classification
(
list
(
result
[
"all_logits_2"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
self
,
)
config
,
self
.
parent
.
assertListEqual
(
input_ids_1
,
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_2"
]),
input_ids_2
,
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
input_ids_q
,
)
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
):
config
.
num_labels
=
input_ids_1
.
shape
[
1
]
model
=
TFXLNetForTokenClassification
(
config
)
inputs
=
{
"input_ids"
:
input_ids_1
,
"attention_mask"
:
input_mask
,
# 'token_type_ids': token_type_ids
}
logits
,
mems_1
=
model
(
inputs
)
result
=
{
"mems_1"
:
[
mem
.
numpy
()
for
mem
in
mems_1
],
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
config
.
num_labels
]
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_1"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
create_and_check_xlnet_qa
(
def
prepare_config_and_inputs_for_common
(
self
):
self
,
config_and_inputs
=
self
.
prepare_config_and_inputs
()
config
,
(
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
):
model
=
TFXLNetForQuestionAnsweringSimple
(
config
)
inputs
=
{
"input_ids"
:
input_ids_1
,
"attention_mask"
:
input_mask
,
"token_type_ids"
:
segment_ids
}
start_logits
,
end_logits
,
mems
=
model
(
inputs
)
result
=
{
"start_logits"
:
start_logits
.
numpy
(),
"end_logits"
:
end_logits
.
numpy
(),
"mems"
:
[
m
.
numpy
()
for
m
in
mems
],
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
create_and_check_xlnet_sequence_classif
(
self
,
config
,
config
,
input_ids_1
,
input_ids_1
,
input_ids_2
,
input_ids_2
,
...
@@ -309,76 +330,32 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
...
@@ -309,76 +330,32 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
lm_labels
,
lm_labels
,
sequence_labels
,
sequence_labels
,
is_impossible_labels
,
is_impossible_labels
,
):
)
=
config_and_inputs
model
=
TFXLNetForSequenceClassification
(
config
)
inputs_dict
=
{
"input_ids"
:
input_ids_1
}
return
config
,
inputs_dict
logits
,
mems_1
=
model
(
input_ids_1
)
result
=
{
"mems_1"
:
[
mem
.
numpy
()
for
mem
in
mems_1
],
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
type_sequence_label_size
])
@
require_tf
self
.
parent
.
assertListEqual
(
class
TFXLNetModelTest
(
TFModelTesterMixin
,
unittest
.
TestCase
):
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_1"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
create_and_check_xlnet_for_token_classification
(
all_model_classes
=
(
self
,
(
config
,
TFXLNetModel
,
input_ids_1
,
TFXLNetLMHeadModel
,
input_ids_2
,
TFXLNetForSequenceClassification
,
input_ids_q
,
TFXLNetForTokenClassification
,
perm_mask
,
TFXLNetForQuestionAnsweringSimple
,
input_mask
,
)
target_mapping
,
if
is_tf_available
()
segment_ids
,
else
()
lm_labels
,
)
sequence_labels
,
all_generative_model_classes
=
(
is_impossible_labels
,
(
TFXLNetLMHeadModel
,)
if
is_tf_available
()
else
()
):
)
# TODO (PVP): Check other models whether language generation is also applicable
config
.
num_labels
=
input_ids_1
.
shape
[
1
]
test_pruning
=
False
model
=
TFXLNetForTokenClassification
(
config
)
inputs
=
{
"input_ids"
:
input_ids_1
,
"attention_mask"
:
input_mask
,
# 'token_type_ids': token_type_ids
}
logits
,
mems_1
=
model
(
inputs
)
result
=
{
"mems_1"
:
[
mem
.
numpy
()
for
mem
in
mems_1
],
"logits"
:
logits
.
numpy
(),
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
shape
),
[
self
.
batch_size
,
self
.
seq_length
,
config
.
num_labels
]
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
shape
)
for
mem
in
result
[
"mems_1"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids_1
}
return
config
,
inputs_dict
def
setUp
(
self
):
def
setUp
(
self
):
self
.
model_tester
=
TFXLNetModelTest
.
TFXLNetModelTester
(
self
)
self
.
model_tester
=
TFXLNetModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
XLNetConfig
,
d_inner
=
37
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
XLNetConfig
,
d_inner
=
37
)
def
test_config
(
self
):
def
test_config
(
self
):
...
...
tests/test_modeling_transfo_xl.py
View file @
c852036b
...
@@ -29,6 +29,137 @@ if is_torch_available():
...
@@ -29,6 +29,137 @@ if is_torch_available():
from
transformers.modeling_transfo_xl
import
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST
from
transformers.modeling_transfo_xl
import
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST
class
TransfoXLModelTester
:
def
__init__
(
self
,
parent
,
):
self
.
parent
=
parent
self
.
batch_size
=
14
self
.
seq_length
=
7
self
.
mem_len
=
30
self
.
key_length
=
self
.
seq_length
+
self
.
mem_len
self
.
clamp_len
=
15
self
.
is_training
=
True
self
.
use_labels
=
True
self
.
vocab_size
=
99
self
.
cutoffs
=
[
10
,
50
,
80
]
self
.
hidden_size
=
32
self
.
d_embed
=
32
self
.
num_attention_heads
=
4
self
.
d_head
=
8
self
.
d_inner
=
128
self
.
div_val
=
2
self
.
num_hidden_layers
=
5
self
.
scope
=
None
self
.
seed
=
1
self
.
eos_token_id
=
0
def
prepare_config_and_inputs
(
self
):
input_ids_1
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_ids_2
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
lm_labels
=
None
if
self
.
use_labels
:
lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
config
=
TransfoXLConfig
(
vocab_size
=
self
.
vocab_size
,
mem_len
=
self
.
mem_len
,
clamp_len
=
self
.
clamp_len
,
cutoffs
=
self
.
cutoffs
,
d_model
=
self
.
hidden_size
,
d_embed
=
self
.
d_embed
,
n_head
=
self
.
num_attention_heads
,
d_head
=
self
.
d_head
,
d_inner
=
self
.
d_inner
,
div_val
=
self
.
div_val
,
n_layer
=
self
.
num_hidden_layers
,
eos_token_id
=
self
.
eos_token_id
,
)
return
(
config
,
input_ids_1
,
input_ids_2
,
lm_labels
)
def
set_seed
(
self
):
random
.
seed
(
self
.
seed
)
torch
.
manual_seed
(
self
.
seed
)
def
create_transfo_xl_model
(
self
,
config
,
input_ids_1
,
input_ids_2
,
lm_labels
):
model
=
TransfoXLModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
hidden_states_1
,
mems_1
=
model
(
input_ids_1
)
hidden_states_2
,
mems_2
=
model
(
input_ids_2
,
mems_1
)
outputs
=
{
"hidden_states_1"
:
hidden_states_1
,
"mems_1"
:
mems_1
,
"hidden_states_2"
:
hidden_states_2
,
"mems_2"
:
mems_2
,
}
return
outputs
def
check_transfo_xl_model_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"hidden_states_1"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"hidden_states_2"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
],
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_1"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_2"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
create_transfo_xl_lm_head
(
self
,
config
,
input_ids_1
,
input_ids_2
,
lm_labels
):
model
=
TransfoXLLMHeadModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
lm_logits_1
,
mems_1
=
model
(
input_ids_1
)
loss_1
,
_
,
mems_1
=
model
(
input_ids_1
,
labels
=
lm_labels
)
lm_logits_2
,
mems_2
=
model
(
input_ids_2
,
mems
=
mems_1
)
loss_2
,
_
,
mems_2
=
model
(
input_ids_2
,
labels
=
lm_labels
,
mems
=
mems_1
)
outputs
=
{
"loss_1"
:
loss_1
,
"mems_1"
:
mems_1
,
"lm_logits_1"
:
lm_logits_1
,
"loss_2"
:
loss_2
,
"mems_2"
:
mems_2
,
"lm_logits_2"
:
lm_logits_2
,
}
return
outputs
def
check_transfo_xl_lm_head_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss_1"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
-
1
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits_1"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
],
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_1"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss_2"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
-
1
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits_2"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
],
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_2"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids_1
,
input_ids_2
,
lm_labels
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids_1
}
return
config
,
inputs_dict
@
require_torch
@
require_torch
class
TransfoXLModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
class
TransfoXLModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
...
@@ -38,155 +169,6 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
...
@@ -38,155 +169,6 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
test_torchscript
=
False
test_torchscript
=
False
test_resize_embeddings
=
True
test_resize_embeddings
=
True
class
TransfoXLModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
14
,
seq_length
=
7
,
mem_len
=
30
,
clamp_len
=
15
,
is_training
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
cutoffs
=
[
10
,
50
,
80
],
hidden_size
=
32
,
d_embed
=
32
,
num_attention_heads
=
4
,
d_head
=
8
,
d_inner
=
128
,
div_val
=
2
,
num_hidden_layers
=
5
,
scope
=
None
,
seed
=
1
,
eos_token_id
=
0
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
mem_len
=
mem_len
self
.
key_length
=
seq_length
+
mem_len
self
.
clamp_len
=
clamp_len
self
.
is_training
=
is_training
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
cutoffs
=
cutoffs
self
.
hidden_size
=
hidden_size
self
.
d_embed
=
d_embed
self
.
num_attention_heads
=
num_attention_heads
self
.
d_head
=
d_head
self
.
d_inner
=
d_inner
self
.
div_val
=
div_val
self
.
num_hidden_layers
=
num_hidden_layers
self
.
scope
=
scope
self
.
seed
=
seed
self
.
eos_token_id
=
eos_token_id
def
prepare_config_and_inputs
(
self
):
input_ids_1
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_ids_2
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
lm_labels
=
None
if
self
.
use_labels
:
lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
config
=
TransfoXLConfig
(
vocab_size
=
self
.
vocab_size
,
mem_len
=
self
.
mem_len
,
clamp_len
=
self
.
clamp_len
,
cutoffs
=
self
.
cutoffs
,
d_model
=
self
.
hidden_size
,
d_embed
=
self
.
d_embed
,
n_head
=
self
.
num_attention_heads
,
d_head
=
self
.
d_head
,
d_inner
=
self
.
d_inner
,
div_val
=
self
.
div_val
,
n_layer
=
self
.
num_hidden_layers
,
eos_token_id
=
self
.
eos_token_id
,
)
return
(
config
,
input_ids_1
,
input_ids_2
,
lm_labels
)
def
set_seed
(
self
):
random
.
seed
(
self
.
seed
)
torch
.
manual_seed
(
self
.
seed
)
def
create_transfo_xl_model
(
self
,
config
,
input_ids_1
,
input_ids_2
,
lm_labels
):
model
=
TransfoXLModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
hidden_states_1
,
mems_1
=
model
(
input_ids_1
)
hidden_states_2
,
mems_2
=
model
(
input_ids_2
,
mems_1
)
outputs
=
{
"hidden_states_1"
:
hidden_states_1
,
"mems_1"
:
mems_1
,
"hidden_states_2"
:
hidden_states_2
,
"mems_2"
:
mems_2
,
}
return
outputs
def
check_transfo_xl_model_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"hidden_states_1"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"hidden_states_2"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
],
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_1"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_2"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
create_transfo_xl_lm_head
(
self
,
config
,
input_ids_1
,
input_ids_2
,
lm_labels
):
model
=
TransfoXLLMHeadModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
lm_logits_1
,
mems_1
=
model
(
input_ids_1
)
loss_1
,
_
,
mems_1
=
model
(
input_ids_1
,
labels
=
lm_labels
)
lm_logits_2
,
mems_2
=
model
(
input_ids_2
,
mems
=
mems_1
)
loss_2
,
_
,
mems_2
=
model
(
input_ids_2
,
labels
=
lm_labels
,
mems
=
mems_1
)
outputs
=
{
"loss_1"
:
loss_1
,
"mems_1"
:
mems_1
,
"lm_logits_1"
:
lm_logits_1
,
"loss_2"
:
loss_2
,
"mems_2"
:
mems_2
,
"lm_logits_2"
:
lm_logits_2
,
}
return
outputs
def
check_transfo_xl_lm_head_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss_1"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
-
1
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits_1"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
],
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_1"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss_2"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
-
1
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits_2"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
],
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_2"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids_1
,
input_ids_2
,
lm_labels
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids_1
}
return
config
,
inputs_dict
def
check_cutoffs_and_n_token
(
def
check_cutoffs_and_n_token
(
self
,
copied_cutoffs
,
layer
,
model_embed
,
model
,
model_class
,
resized_value
,
vocab_size
self
,
copied_cutoffs
,
layer
,
model_embed
,
model
,
model_class
,
resized_value
,
vocab_size
):
):
...
@@ -210,7 +192,7 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
...
@@ -210,7 +192,7 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
self
.
assertEqual
(
model
.
crit
.
n_token
,
vocab_size
+
resized_value
)
self
.
assertEqual
(
model
.
crit
.
n_token
,
vocab_size
+
resized_value
)
def
setUp
(
self
):
def
setUp
(
self
):
self
.
model_tester
=
TransfoXLModelTest
.
TransfoXLModelTester
(
self
)
self
.
model_tester
=
TransfoXLModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
TransfoXLConfig
,
d_embed
=
37
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
TransfoXLConfig
,
d_embed
=
37
)
def
test_config
(
self
):
def
test_config
(
self
):
...
...
tests/test_modeling_xlm.py
View file @
c852036b
...
@@ -37,146 +37,82 @@ if is_torch_available():
...
@@ -37,146 +37,82 @@ if is_torch_available():
from
transformers.modeling_xlm
import
XLM_PRETRAINED_MODEL_ARCHIVE_LIST
from
transformers.modeling_xlm
import
XLM_PRETRAINED_MODEL_ARCHIVE_LIST
@
require_torch
class
XLMModelTester
:
class
XLMModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
def
__init__
(
self
,
parent
,
all_model_classes
=
(
):
(
self
.
parent
=
parent
XLMModel
,
self
.
batch_size
=
13
XLMWithLMHeadModel
,
self
.
seq_length
=
7
XLMForQuestionAnswering
,
self
.
is_training
=
True
XLMForSequenceClassification
,
self
.
use_input_lengths
=
True
XLMForQuestionAnsweringSimple
,
self
.
use_token_type_ids
=
True
self
.
use_labels
=
True
self
.
gelu_activation
=
True
self
.
sinusoidal_embeddings
=
False
self
.
causal
=
False
self
.
asm
=
False
self
.
n_langs
=
2
self
.
vocab_size
=
99
self
.
n_special
=
0
self
.
hidden_size
=
32
self
.
num_hidden_layers
=
5
self
.
num_attention_heads
=
4
self
.
hidden_dropout_prob
=
0.1
self
.
attention_probs_dropout_prob
=
0.1
self
.
max_position_embeddings
=
512
self
.
type_sequence_label_size
=
2
self
.
initializer_range
=
0.02
self
.
num_labels
=
3
self
.
num_choices
=
4
self
.
summary_type
=
"last"
self
.
use_proj
=
True
self
.
scope
=
None
self
.
bos_token_id
=
0
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
2
).
float
()
input_lengths
=
None
if
self
.
use_input_lengths
:
input_lengths
=
(
ids_tensor
([
self
.
batch_size
],
vocab_size
=
2
)
+
self
.
seq_length
-
2
)
# small variation of seq_length
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
n_langs
)
sequence_labels
=
None
token_labels
=
None
is_impossible_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
is_impossible_labels
=
ids_tensor
([
self
.
batch_size
],
2
).
float
()
config
=
XLMConfig
(
vocab_size
=
self
.
vocab_size
,
n_special
=
self
.
n_special
,
emb_dim
=
self
.
hidden_size
,
n_layers
=
self
.
num_hidden_layers
,
n_heads
=
self
.
num_attention_heads
,
dropout
=
self
.
hidden_dropout_prob
,
attention_dropout
=
self
.
attention_probs_dropout_prob
,
gelu_activation
=
self
.
gelu_activation
,
sinusoidal_embeddings
=
self
.
sinusoidal_embeddings
,
asm
=
self
.
asm
,
causal
=
self
.
causal
,
n_langs
=
self
.
n_langs
,
max_position_embeddings
=
self
.
max_position_embeddings
,
initializer_range
=
self
.
initializer_range
,
summary_type
=
self
.
summary_type
,
use_proj
=
self
.
use_proj
,
bos_token_id
=
self
.
bos_token_id
,
)
)
if
is_torch_available
()
else
()
)
all_generative_model_classes
=
(
(
XLMWithLMHeadModel
,)
if
is_torch_available
()
else
()
)
# TODO (PVP): Check other models whether language generation is also applicable
class
XLMModelTester
(
object
):
return
(
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_input_lengths
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
gelu_activation
=
True
,
sinusoidal_embeddings
=
False
,
causal
=
False
,
asm
=
False
,
n_langs
=
2
,
vocab_size
=
99
,
n_special
=
0
,
hidden_size
=
32
,
num_hidden_layers
=
5
,
num_attention_heads
=
4
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
16
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
num_choices
=
4
,
summary_type
=
"last"
,
use_proj
=
True
,
scope
=
None
,
bos_token_id
=
0
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_input_lengths
=
use_input_lengths
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_labels
=
use_labels
self
.
gelu_activation
=
gelu_activation
self
.
sinusoidal_embeddings
=
sinusoidal_embeddings
self
.
asm
=
asm
self
.
n_langs
=
n_langs
self
.
vocab_size
=
vocab_size
self
.
n_special
=
n_special
self
.
summary_type
=
summary_type
self
.
causal
=
causal
self
.
use_proj
=
use_proj
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
attention_probs_dropout_prob
=
attention_probs_dropout_prob
self
.
max_position_embeddings
=
max_position_embeddings
self
.
n_langs
=
n_langs
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
summary_type
=
summary_type
self
.
num_labels
=
num_labels
self
.
num_choices
=
num_choices
self
.
scope
=
scope
self
.
bos_token_id
=
bos_token_id
def
prepare_config_and_inputs
(
self
):
input_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
2
).
float
()
input_lengths
=
None
if
self
.
use_input_lengths
:
input_lengths
=
(
ids_tensor
([
self
.
batch_size
],
vocab_size
=
2
)
+
self
.
seq_length
-
2
)
# small variation of seq_length
token_type_ids
=
None
if
self
.
use_token_type_ids
:
token_type_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
n_langs
)
sequence_labels
=
None
token_labels
=
None
is_impossible_labels
=
None
if
self
.
use_labels
:
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
num_labels
)
is_impossible_labels
=
ids_tensor
([
self
.
batch_size
],
2
).
float
()
config
=
XLMConfig
(
vocab_size
=
self
.
vocab_size
,
n_special
=
self
.
n_special
,
emb_dim
=
self
.
hidden_size
,
n_layers
=
self
.
num_hidden_layers
,
n_heads
=
self
.
num_attention_heads
,
dropout
=
self
.
hidden_dropout_prob
,
attention_dropout
=
self
.
attention_probs_dropout_prob
,
gelu_activation
=
self
.
gelu_activation
,
sinusoidal_embeddings
=
self
.
sinusoidal_embeddings
,
asm
=
self
.
asm
,
causal
=
self
.
causal
,
n_langs
=
self
.
n_langs
,
max_position_embeddings
=
self
.
max_position_embeddings
,
initializer_range
=
self
.
initializer_range
,
summary_type
=
self
.
summary_type
,
use_proj
=
self
.
use_proj
,
bos_token_id
=
self
.
bos_token_id
,
)
return
(
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_xlm_model
(
self
,
config
,
config
,
input_ids
,
input_ids
,
token_type_ids
,
token_type_ids
,
...
@@ -185,174 +121,209 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
...
@@ -185,174 +121,209 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
token_labels
,
token_labels
,
is_impossible_labels
,
is_impossible_labels
,
input_mask
,
input_mask
,
):
)
model
=
XLMModel
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
,
lengths
=
input_lengths
,
langs
=
token_type_ids
)
outputs
=
model
(
input_ids
,
langs
=
token_type_ids
)
outputs
=
model
(
input_ids
)
sequence_output
=
outputs
[
0
]
result
=
{
"sequence_output"
:
sequence_output
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
def
create_and_check_xlm_lm_head
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
XLMWithLMHeadModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
def
check_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_and_check_xlm_model
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
XLMModel
(
config
=
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
,
lengths
=
input_lengths
,
langs
=
token_type_ids
)
outputs
=
model
(
input_ids
,
langs
=
token_type_ids
)
outputs
=
model
(
input_ids
)
sequence_output
=
outputs
[
0
]
result
=
{
"sequence_output"
:
sequence_output
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"sequence_output"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
]
)
result
=
{
def
create_and_check_xlm_lm_head
(
"loss"
:
loss
,
self
,
"logits"
:
logits
,
config
,
}
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
XLMWithLMHeadModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
token_type_ids
=
token_type_ids
,
labels
=
token_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
])
def
create_and_check_xlm_simple_qa
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
XLMForQuestionAnsweringSimple
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
)
outputs
=
model
(
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
)
loss
,
start_logits
,
end_logits
=
outputs
result
=
{
"loss"
:
loss
,
"start_logits"
:
start_logits
,
"end_logits"
:
end_logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
check_loss_output
(
result
)
def
create_and_check_xlm_qa
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
XLMForQuestionAnswering
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
outputs
=
model
(
input_ids
)
self
.
parent
.
assertListEqual
(
start_top_log_probs
,
start_top_index
,
end_top_log_probs
,
end_top_index
,
cls_logits
=
outputs
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
]
)
def
create_and_check_xlm_simple_qa
(
outputs
=
model
(
self
,
config
,
input_ids
,
input_ids
,
token_type_ids
,
start_positions
=
sequence_labels
,
input_lengths
,
end_positions
=
sequence_labels
,
sequence_labels
,
cls_index
=
sequence_labels
,
token_labels
,
is_impossible
=
is_impossible_labels
,
is_impossible_labels
,
p_mask
=
input_mask
,
input_mask
,
)
):
model
=
XLMForQuestionAnsweringSimple
(
config
)
outputs
=
model
(
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
)
outputs
=
model
(
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
)
loss
,
start_logits
,
end_logits
=
outputs
result
=
{
"loss"
:
loss
,
"start_logits"
:
start_logits
,
"end_logits"
:
end_logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
])
self
.
check_loss_output
(
result
)
def
create_and_check_xlm_qa
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
XLMForQuestionAnswering
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids
)
start_top_log_probs
,
start_top_index
,
end_top_log_probs
,
end_top_index
,
cls_logits
=
outputs
outputs
=
model
(
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,
cls_index
=
sequence_labels
,
is_impossible
=
is_impossible_labels
,
p_mask
=
input_mask
,
)
outputs
=
model
(
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,
cls_index
=
sequence_labels
,
is_impossible
=
is_impossible_labels
,
)
(
total_loss
,)
=
outputs
outputs
=
model
(
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
)
(
total_loss
,)
=
outputs
result
=
{
"loss"
:
total_loss
,
"start_top_log_probs"
:
start_top_log_probs
,
"start_top_index"
:
start_top_index
,
"end_top_log_probs"
:
end_top_log_probs
,
"end_top_index"
:
end_top_index
,
"cls_logits"
:
cls_logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_top_log_probs"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_top_index"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_top_log_probs"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
*
model
.
config
.
end_n_top
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_top_index"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
*
model
.
config
.
end_n_top
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"cls_logits"
].
size
()),
[
self
.
batch_size
])
def
create_and_check_xlm_sequence_classif
(
self
,
config
,
input_ids
,
input_ids
,
token_type_ids
,
start_positions
=
sequence_labels
,
input_lengths
,
end_positions
=
sequence_labels
,
sequence_labels
,
cls_index
=
sequence_labels
,
token_labels
,
is_impossible
=
is_impossible_labels
,
is_impossible_labels
,
)
input_mask
,
):
(
total_loss
,)
=
outputs
model
=
XLMForSequenceClassification
(
config
)
model
.
to
(
torch_device
)
outputs
=
model
(
input_ids
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
)
model
.
eval
()
(
total_loss
,)
=
outputs
(
logits
,)
=
model
(
input_ids
)
loss
,
logits
=
model
(
input_ids
,
labels
=
sequence_labels
)
result
=
{
"loss"
:
total_loss
,
result
=
{
"start_top_log_probs"
:
start_top_log_probs
,
"loss"
:
loss
,
"start_top_index"
:
start_top_index
,
"logits"
:
logits
,
"end_top_log_probs"
:
end_top_log_probs
,
}
"end_top_index"
:
end_top_index
,
"cls_logits"
:
cls_logits
,
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
type_sequence_label_size
]
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_top_log_probs"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
]
def
create_and_check_xlm_for_token_classification
(
)
self
,
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_top_index"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
]
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_top_log_probs"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
*
model
.
config
.
end_n_top
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_top_index"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
*
model
.
config
.
end_n_top
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"cls_logits"
].
size
()),
[
self
.
batch_size
])
def
create_and_check_xlm_sequence_classif
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
model
=
XLMForSequenceClassification
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
(
logits
,)
=
model
(
input_ids
)
loss
,
logits
=
model
(
input_ids
,
labels
=
sequence_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
type_sequence_label_size
])
def
create_and_check_xlm_for_token_classification
(
self
,
config
,
input_ids
,
token_type_ids
,
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
):
config
.
num_labels
=
self
.
num_labels
model
=
XLMForTokenClassification
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss
,
logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
labels
=
token_labels
)
result
=
{
"loss"
:
loss
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
num_labels
])
self
.
check_loss_output
(
result
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
config
,
input_ids
,
input_ids
,
token_type_ids
,
token_type_ids
,
...
@@ -361,39 +332,31 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
...
@@ -361,39 +332,31 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
token_labels
,
token_labels
,
is_impossible_labels
,
is_impossible_labels
,
input_mask
,
input_mask
,
):
)
=
config_and_inputs
config
.
num_labels
=
self
.
num_labels
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"lengths"
:
input_lengths
}
model
=
XLMForTokenClassification
(
config
)
return
config
,
inputs_dict
model
.
to
(
torch_device
)
model
.
eval
()
@
require_torch
loss
,
logits
=
model
(
input_ids
,
attention_mask
=
input_mask
,
labels
=
token_labels
)
class
XLMModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
result
=
{
"loss"
:
loss
,
all_model_classes
=
(
"logits"
:
logits
,
(
}
XLMModel
,
self
.
parent
.
assertListEqual
(
XLMWithLMHeadModel
,
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
num_labels
]
XLMForQuestionAnswering
,
)
XLMForSequenceClassification
,
self
.
check_loss_output
(
result
)
XLMForQuestionAnsweringSimple
,
)
def
prepare_config_and_inputs_for_common
(
self
):
if
is_torch_available
()
config_and_inputs
=
self
.
prepare_config_and_inputs
()
else
()
(
)
config
,
all_generative_model_classes
=
(
input_ids
,
(
XLMWithLMHeadModel
,)
if
is_torch_available
()
else
()
token_type_ids
,
)
# TODO (PVP): Check other models whether language generation is also applicable
input_lengths
,
sequence_labels
,
token_labels
,
is_impossible_labels
,
input_mask
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids
,
"token_type_ids"
:
token_type_ids
,
"lengths"
:
input_lengths
}
return
config
,
inputs_dict
def
setUp
(
self
):
def
setUp
(
self
):
self
.
model_tester
=
XLMModelTest
.
XLMModelTester
(
self
)
self
.
model_tester
=
XLMModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
XLMConfig
,
emb_dim
=
37
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
XLMConfig
,
emb_dim
=
37
)
def
test_config
(
self
):
def
test_config
(
self
):
...
...
tests/test_modeling_xlnet.py
View file @
c852036b
...
@@ -39,148 +39,106 @@ if is_torch_available():
...
@@ -39,148 +39,106 @@ if is_torch_available():
from
transformers.modeling_xlnet
import
XLNET_PRETRAINED_MODEL_ARCHIVE_LIST
from
transformers.modeling_xlnet
import
XLNET_PRETRAINED_MODEL_ARCHIVE_LIST
@
require_torch
class
XLNetModelTester
:
class
XLNetModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
def
__init__
(
self
,
all_model_classes
=
(
parent
,
(
batch_size
=
14
,
XLNetModel
,
seq_length
=
7
,
XLNetLMHeadModel
,
mem_len
=
10
,
XLNetForTokenClassification
,
clamp_len
=-
1
,
XLNetForSequenceClassification
,
reuse_len
=
15
,
XLNetForQuestionAnswering
,
is_training
=
True
,
XLNetForMultipleChoice
,
use_labels
=
True
,
vocab_size
=
99
,
cutoffs
=
[
10
,
50
,
80
],
hidden_size
=
32
,
num_attention_heads
=
4
,
d_inner
=
128
,
num_hidden_layers
=
5
,
type_sequence_label_size
=
2
,
untie_r
=
True
,
bi_data
=
False
,
same_length
=
False
,
initializer_range
=
0.05
,
seed
=
1
,
type_vocab_size
=
2
,
bos_token_id
=
1
,
eos_token_id
=
2
,
pad_token_id
=
5
,
num_choices
=
4
,
):
self
.
parent
=
parent
self
.
batch_size
=
14
self
.
seq_length
=
7
self
.
mem_len
=
10
# self.key_len = seq_length + mem_len
self
.
clamp_len
=
-
1
self
.
reuse_len
=
15
self
.
is_training
=
True
self
.
use_labels
=
True
self
.
vocab_size
=
99
self
.
cutoffs
=
[
10
,
50
,
80
]
self
.
hidden_size
=
32
self
.
num_attention_heads
=
4
self
.
d_inner
=
128
self
.
num_hidden_layers
=
5
self
.
type_sequence_label_size
=
2
self
.
untie_r
=
True
self
.
bi_data
=
False
self
.
same_length
=
False
self
.
initializer_range
=
0.05
self
.
seed
=
1
self
.
type_vocab_size
=
2
self
.
bos_token_id
=
1
self
.
eos_token_id
=
2
self
.
pad_token_id
=
5
self
.
num_choices
=
4
def
prepare_config_and_inputs
(
self
):
input_ids_1
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_ids_2
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
segment_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
2
).
float
()
input_ids_q
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
+
1
],
self
.
vocab_size
)
perm_mask
=
torch
.
zeros
(
self
.
batch_size
,
self
.
seq_length
+
1
,
self
.
seq_length
+
1
,
dtype
=
torch
.
float
,
device
=
torch_device
,
)
perm_mask
[:,
:,
-
1
]
=
1.0
# Previous tokens don't see last token
target_mapping
=
torch
.
zeros
(
self
.
batch_size
,
1
,
self
.
seq_length
+
1
,
dtype
=
torch
.
float
,
device
=
torch_device
,)
target_mapping
[:,
0
,
-
1
]
=
1.0
# predict last token
sequence_labels
=
None
lm_labels
=
None
is_impossible_labels
=
None
token_labels
=
None
if
self
.
use_labels
:
lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
is_impossible_labels
=
ids_tensor
([
self
.
batch_size
],
2
).
float
()
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
config
=
XLNetConfig
(
vocab_size
=
self
.
vocab_size
,
d_model
=
self
.
hidden_size
,
n_head
=
self
.
num_attention_heads
,
d_inner
=
self
.
d_inner
,
n_layer
=
self
.
num_hidden_layers
,
untie_r
=
self
.
untie_r
,
mem_len
=
self
.
mem_len
,
clamp_len
=
self
.
clamp_len
,
same_length
=
self
.
same_length
,
reuse_len
=
self
.
reuse_len
,
bi_data
=
self
.
bi_data
,
initializer_range
=
self
.
initializer_range
,
num_labels
=
self
.
type_sequence_label_size
,
bos_token_id
=
self
.
bos_token_id
,
pad_token_id
=
self
.
pad_token_id
,
eos_token_id
=
self
.
eos_token_id
,
)
)
if
is_torch_available
()
else
()
)
all_generative_model_classes
=
(
(
XLNetLMHeadModel
,)
if
is_torch_available
()
else
()
)
# TODO (PVP): Check other models whether language generation is also applicable
test_pruning
=
False
class
XLNetModelTester
(
object
):
return
(
def
__init__
(
self
,
parent
,
batch_size
=
14
,
seq_length
=
7
,
mem_len
=
10
,
clamp_len
=-
1
,
reuse_len
=
15
,
is_training
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
cutoffs
=
[
10
,
50
,
80
],
hidden_size
=
32
,
num_attention_heads
=
4
,
d_inner
=
128
,
num_hidden_layers
=
5
,
type_sequence_label_size
=
2
,
untie_r
=
True
,
bi_data
=
False
,
same_length
=
False
,
initializer_range
=
0.05
,
seed
=
1
,
type_vocab_size
=
2
,
bos_token_id
=
1
,
eos_token_id
=
2
,
pad_token_id
=
5
,
num_choices
=
4
,
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
mem_len
=
mem_len
# self.key_len = seq_length + mem_len
self
.
clamp_len
=
clamp_len
self
.
reuse_len
=
reuse_len
self
.
is_training
=
is_training
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
cutoffs
=
cutoffs
self
.
hidden_size
=
hidden_size
self
.
num_attention_heads
=
num_attention_heads
self
.
d_inner
=
d_inner
self
.
num_hidden_layers
=
num_hidden_layers
self
.
bi_data
=
bi_data
self
.
untie_r
=
untie_r
self
.
same_length
=
same_length
self
.
initializer_range
=
initializer_range
self
.
seed
=
seed
self
.
type_vocab_size
=
type_vocab_size
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
bos_token_id
=
bos_token_id
self
.
pad_token_id
=
pad_token_id
self
.
eos_token_id
=
eos_token_id
self
.
num_choices
=
num_choices
def
prepare_config_and_inputs
(
self
):
input_ids_1
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
input_ids_2
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
segment_ids
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
input_mask
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
2
).
float
()
input_ids_q
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
+
1
],
self
.
vocab_size
)
perm_mask
=
torch
.
zeros
(
self
.
batch_size
,
self
.
seq_length
+
1
,
self
.
seq_length
+
1
,
dtype
=
torch
.
float
,
device
=
torch_device
,
)
perm_mask
[:,
:,
-
1
]
=
1.0
# Previous tokens don't see last token
target_mapping
=
torch
.
zeros
(
self
.
batch_size
,
1
,
self
.
seq_length
+
1
,
dtype
=
torch
.
float
,
device
=
torch_device
,
)
target_mapping
[:,
0
,
-
1
]
=
1.0
# predict last token
sequence_labels
=
None
lm_labels
=
None
is_impossible_labels
=
None
token_labels
=
None
if
self
.
use_labels
:
lm_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
vocab_size
)
sequence_labels
=
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
is_impossible_labels
=
ids_tensor
([
self
.
batch_size
],
2
).
float
()
token_labels
=
ids_tensor
([
self
.
batch_size
,
self
.
seq_length
],
self
.
type_vocab_size
)
config
=
XLNetConfig
(
vocab_size
=
self
.
vocab_size
,
d_model
=
self
.
hidden_size
,
n_head
=
self
.
num_attention_heads
,
d_inner
=
self
.
d_inner
,
n_layer
=
self
.
num_hidden_layers
,
untie_r
=
self
.
untie_r
,
mem_len
=
self
.
mem_len
,
clamp_len
=
self
.
clamp_len
,
same_length
=
self
.
same_length
,
reuse_len
=
self
.
reuse_len
,
bi_data
=
self
.
bi_data
,
initializer_range
=
self
.
initializer_range
,
num_labels
=
self
.
type_sequence_label_size
,
bos_token_id
=
self
.
bos_token_id
,
pad_token_id
=
self
.
pad_token_id
,
eos_token_id
=
self
.
eos_token_id
,
)
return
(
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
,
)
def
set_seed
(
self
):
random
.
seed
(
self
.
seed
)
torch
.
manual_seed
(
self
.
seed
)
def
create_and_check_xlnet_base_model
(
self
,
config
,
config
,
input_ids_1
,
input_ids_1
,
input_ids_2
,
input_ids_2
,
...
@@ -193,231 +151,286 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
...
@@ -193,231 +151,286 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
sequence_labels
,
sequence_labels
,
is_impossible_labels
,
is_impossible_labels
,
token_labels
,
token_labels
,
):
)
model
=
XLNetModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
_
,
_
=
model
(
input_ids_1
,
input_mask
=
input_mask
)
_
,
_
=
model
(
input_ids_1
,
attention_mask
=
input_mask
)
_
,
_
=
model
(
input_ids_1
,
token_type_ids
=
segment_ids
)
outputs
,
mems_1
=
model
(
input_ids_1
)
result
=
{
"mems_1"
:
mems_1
,
"outputs"
:
outputs
,
}
config
.
mem_len
=
0
model
=
XLNetModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
no_mems_outputs
=
model
(
input_ids_1
)
self
.
parent
.
assertEqual
(
len
(
no_mems_outputs
),
1
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"outputs"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
],
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_1"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
create_and_check_xlnet_base_model_with_att_output
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
,
):
model
=
XLNetModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
_
,
_
,
attentions
=
model
(
input_ids_1
,
target_mapping
=
target_mapping
,
output_attentions
=
True
)
def
set_seed
(
self
):
random
.
seed
(
self
.
seed
)
torch
.
manual_seed
(
self
.
seed
)
def
create_and_check_xlnet_base_model
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
,
):
model
=
XLNetModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
self
.
parent
.
assertEqual
(
len
(
attentions
),
config
.
n_layer
)
_
,
_
=
model
(
input_ids_1
,
input_mask
=
input_mask
)
self
.
parent
.
assertIsInstance
(
attentions
[
0
],
tuple
)
_
,
_
=
model
(
input_ids_1
,
attention_mask
=
input_mask
)
self
.
parent
.
assertEqual
(
len
(
attentions
[
0
]),
2
)
_
,
_
=
model
(
input_ids_1
,
token_type_ids
=
segment_ids
)
self
.
parent
.
assertTrue
(
attentions
[
0
][
0
].
shape
,
attentions
[
0
][
0
].
shape
)
outputs
,
mems_1
=
model
(
input_ids_1
)
def
create_and_check_xlnet_lm_head
(
result
=
{
self
,
"mems_1"
:
mems_1
,
config
,
"outputs"
:
outputs
,
input_ids_1
,
}
input_ids_2
,
input_ids_q
,
config
.
mem_len
=
0
perm_mask
,
model
=
XLNetModel
(
config
)
input_mask
,
model
.
to
(
torch_device
)
target_mapping
,
model
.
eval
()
segment_ids
,
no_mems_outputs
=
model
(
input_ids_1
)
lm_labels
,
self
.
parent
.
assertEqual
(
len
(
no_mems_outputs
),
1
)
sequence_labels
,
is_impossible_labels
,
self
.
parent
.
assertListEqual
(
token_labels
,
list
(
result
[
"outputs"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
hidden_size
],
):
)
model
=
XLNetLMHeadModel
(
config
)
self
.
parent
.
assertListEqual
(
model
.
to
(
torch_device
)
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_1"
]),
model
.
eval
()
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
loss_1
,
all_logits_1
,
mems_1
=
model
(
input_ids_1
,
token_type_ids
=
segment_ids
,
labels
=
lm_labels
)
def
create_and_check_xlnet_base_model_with_att_output
(
loss_2
,
all_logits_2
,
mems_2
=
model
(
self
,
input_ids_2
,
token_type_ids
=
segment_ids
,
labels
=
lm_labels
,
mems
=
mems_1
config
,
)
input_ids_1
,
input_ids_2
,
logits
,
_
=
model
(
input_ids_q
,
perm_mask
=
perm_mask
,
target_mapping
=
target_mapping
)
input_ids_q
,
perm_mask
,
result
=
{
input_mask
,
"loss_1"
:
loss_1
,
target_mapping
,
"mems_1"
:
mems_1
,
segment_ids
,
"all_logits_1"
:
all_logits_1
,
lm_labels
,
"loss_2"
:
loss_2
,
sequence_labels
,
"mems_2"
:
mems_2
,
is_impossible_labels
,
"all_logits_2"
:
all_logits_2
,
token_labels
,
}
):
model
=
XLNetModel
(
config
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss_1"
].
size
()),
[])
model
.
to
(
torch_device
)
self
.
parent
.
assertListEqual
(
model
.
eval
()
list
(
result
[
"all_logits_1"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
],
)
_
,
_
,
attentions
=
model
(
input_ids_1
,
target_mapping
=
target_mapping
,
output_attentions
=
True
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_1"
]),
self
.
parent
.
assertEqual
(
len
(
attentions
),
config
.
n_layer
)
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
self
.
parent
.
assertIsInstance
(
attentions
[
0
],
tuple
)
)
self
.
parent
.
assertEqual
(
len
(
attentions
[
0
]),
2
)
self
.
parent
.
assertTrue
(
attentions
[
0
][
0
].
shape
,
attentions
[
0
][
0
].
shape
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss_2"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
def
create_and_check_xlnet_lm_head
(
list
(
result
[
"all_logits_2"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
],
self
,
)
config
,
self
.
parent
.
assertListEqual
(
input_ids_1
,
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_2"
]),
input_ids_2
,
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
input_ids_q
,
)
perm_mask
,
input_mask
,
def
create_and_check_xlnet_qa
(
target_mapping
,
self
,
segment_ids
,
config
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
,
):
model
=
XLNetLMHeadModel
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
loss_1
,
all_logits_1
,
mems_1
=
model
(
input_ids_1
,
token_type_ids
=
segment_ids
,
labels
=
lm_labels
)
loss_2
,
all_logits_2
,
mems_2
=
model
(
input_ids_2
,
token_type_ids
=
segment_ids
,
labels
=
lm_labels
,
mems
=
mems_1
)
logits
,
_
=
model
(
input_ids_q
,
perm_mask
=
perm_mask
,
target_mapping
=
target_mapping
)
result
=
{
"loss_1"
:
loss_1
,
"mems_1"
:
mems_1
,
"all_logits_1"
:
all_logits_1
,
"loss_2"
:
loss_2
,
"mems_2"
:
mems_2
,
"all_logits_2"
:
all_logits_2
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss_1"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"all_logits_1"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
],
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_1"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss_2"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"all_logits_2"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
vocab_size
],
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_2"
]),
[[
self
.
mem_len
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
create_and_check_xlnet_qa
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
,
):
model
=
XLNetForQuestionAnswering
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids_1
)
(
start_top_log_probs
,
start_top_index
,
end_top_log_probs
,
end_top_index
,
cls_logits
,
mems
,)
=
outputs
outputs
=
model
(
input_ids_1
,
input_ids_1
,
input_ids_2
,
start_positions
=
sequence_labels
,
input_ids_q
,
end_positions
=
sequence_labels
,
perm_mask
,
cls_index
=
sequence_labels
,
input_mask
,
is_impossible
=
is_impossible_labels
,
target_mapping
,
p_mask
=
input_mask
,
segment_ids
,
)
lm_labels
,
sequence_labels
,
outputs
=
model
(
is_impossible_labels
,
token_labels
,
):
model
=
XLNetForQuestionAnswering
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
outputs
=
model
(
input_ids_1
)
(
start_top_log_probs
,
start_top_index
,
end_top_log_probs
,
end_top_index
,
cls_logits
,
mems
,)
=
outputs
outputs
=
model
(
input_ids_1
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,
cls_index
=
sequence_labels
,
is_impossible
=
is_impossible_labels
,
p_mask
=
input_mask
,
)
outputs
=
model
(
input_ids_1
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,
cls_index
=
sequence_labels
,
is_impossible
=
is_impossible_labels
,
)
total_loss
,
mems
=
outputs
outputs
=
model
(
input_ids_1
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,)
total_loss
,
mems
=
outputs
result
=
{
"loss"
:
total_loss
,
"start_top_log_probs"
:
start_top_log_probs
,
"start_top_index"
:
start_top_index
,
"end_top_log_probs"
:
end_top_log_probs
,
"end_top_index"
:
end_top_index
,
"cls_logits"
:
cls_logits
,
"mems"
:
mems
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_top_log_probs"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"start_top_index"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_top_log_probs"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
*
model
.
config
.
end_n_top
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"end_top_index"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
*
model
.
config
.
end_n_top
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"cls_logits"
].
size
()),
[
self
.
batch_size
])
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
create_and_check_xlnet_token_classif
(
self
,
config
,
input_ids_1
,
input_ids_1
,
input_ids_2
,
start_positions
=
sequence_labels
,
input_ids_q
,
end_positions
=
sequence_labels
,
perm_mask
,
cls_index
=
sequence_labels
,
input_mask
,
is_impossible
=
is_impossible_labels
,
target_mapping
,
)
segment_ids
,
lm_labels
,
total_loss
,
mems
=
outputs
sequence_labels
,
is_impossible_labels
,
outputs
=
model
(
input_ids_1
,
start_positions
=
sequence_labels
,
end_positions
=
sequence_labels
,)
token_labels
,
):
total_loss
,
mems
=
outputs
model
=
XLNetForTokenClassification
(
config
)
model
.
to
(
torch_device
)
result
=
{
model
.
eval
()
"loss"
:
total_loss
,
"start_top_log_probs"
:
start_top_log_probs
,
logits
,
mems_1
=
model
(
input_ids_1
)
"start_top_index"
:
start_top_index
,
loss
,
logits
,
mems_1
=
model
(
input_ids_1
,
labels
=
token_labels
)
"end_top_log_probs"
:
end_top_log_probs
,
"end_top_index"
:
end_top_index
,
result
=
{
"cls_logits"
:
cls_logits
,
"loss"
:
loss
,
"mems"
:
mems
,
"mems_1"
:
mems_1
,
}
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
list
(
result
[
"start_top_log_probs"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
],
self
.
parent
.
assertListEqual
(
)
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
type_sequence_label_size
],
self
.
parent
.
assertListEqual
(
)
list
(
result
[
"start_top_index"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
],
self
.
parent
.
assertListEqual
(
)
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_1"
]),
self
.
parent
.
assertListEqual
(
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
list
(
result
[
"end_top_log_probs"
].
size
()),
)
[
self
.
batch_size
,
model
.
config
.
start_n_top
*
model
.
config
.
end_n_top
],
)
def
create_and_check_xlnet_sequence_classif
(
self
.
parent
.
assertListEqual
(
self
,
list
(
result
[
"end_top_index"
].
size
()),
[
self
.
batch_size
,
model
.
config
.
start_n_top
*
model
.
config
.
end_n_top
],
)
self
.
parent
.
assertListEqual
(
list
(
result
[
"cls_logits"
].
size
()),
[
self
.
batch_size
])
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
create_and_check_xlnet_token_classif
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
,
):
model
=
XLNetForTokenClassification
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
logits
,
mems_1
=
model
(
input_ids_1
)
loss
,
logits
,
mems_1
=
model
(
input_ids_1
,
labels
=
token_labels
)
result
=
{
"loss"
:
loss
,
"mems_1"
:
mems_1
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
seq_length
,
self
.
type_sequence_label_size
],
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_1"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
create_and_check_xlnet_sequence_classif
(
self
,
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
,
):
model
=
XLNetForSequenceClassification
(
config
)
model
.
to
(
torch_device
)
model
.
eval
()
logits
,
mems_1
=
model
(
input_ids_1
)
loss
,
logits
,
mems_1
=
model
(
input_ids_1
,
labels
=
sequence_labels
)
result
=
{
"loss"
:
loss
,
"mems_1"
:
mems_1
,
"logits"
:
logits
,
}
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
self
.
parent
.
assertListEqual
(
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
type_sequence_label_size
],
)
self
.
parent
.
assertListEqual
(
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_1"
]),
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
)
def
prepare_config_and_inputs_for_common
(
self
):
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
config
,
input_ids_1
,
input_ids_1
,
input_ids_2
,
input_ids_2
,
...
@@ -430,50 +443,33 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
...
@@ -430,50 +443,33 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
sequence_labels
,
sequence_labels
,
is_impossible_labels
,
is_impossible_labels
,
token_labels
,
token_labels
,
):
)
=
config_and_inputs
model
=
XLNetForSequenceClassification
(
config
)
inputs_dict
=
{
"input_ids"
:
input_ids_1
}
model
.
to
(
torch_device
)
return
config
,
inputs_dict
model
.
eval
()
logits
,
mems_1
=
model
(
input_ids_1
)
@
require_torch
loss
,
logits
,
mems_1
=
model
(
input_ids_1
,
labels
=
sequence_labels
)
class
XLNetModelTest
(
ModelTesterMixin
,
unittest
.
TestCase
):
result
=
{
all_model_classes
=
(
"loss"
:
loss
,
(
"mems_1"
:
mems_1
,
XLNetModel
,
"logits"
:
logits
,
XLNetLMHeadModel
,
}
XLNetForTokenClassification
,
XLNetForSequenceClassification
,
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
XLNetForQuestionAnswering
,
self
.
parent
.
assertListEqual
(
XLNetForMultipleChoice
,
list
(
result
[
"logits"
].
size
()),
[
self
.
batch_size
,
self
.
type_sequence_label_size
],
)
)
if
is_torch_available
()
self
.
parent
.
assertListEqual
(
else
()
list
(
list
(
mem
.
size
())
for
mem
in
result
[
"mems_1"
]),
)
[[
self
.
seq_length
,
self
.
batch_size
,
self
.
hidden_size
]]
*
self
.
num_hidden_layers
,
all_generative_model_classes
=
(
)
(
XLNetLMHeadModel
,)
if
is_torch_available
()
else
()
)
# TODO (PVP): Check other models whether language generation is also applicable
def
prepare_config_and_inputs_for_common
(
self
):
test_pruning
=
False
config_and_inputs
=
self
.
prepare_config_and_inputs
()
(
config
,
input_ids_1
,
input_ids_2
,
input_ids_q
,
perm_mask
,
input_mask
,
target_mapping
,
segment_ids
,
lm_labels
,
sequence_labels
,
is_impossible_labels
,
token_labels
,
)
=
config_and_inputs
inputs_dict
=
{
"input_ids"
:
input_ids_1
}
return
config
,
inputs_dict
def
setUp
(
self
):
def
setUp
(
self
):
self
.
model_tester
=
XLNetModelTest
.
XLNetModelTester
(
self
)
self
.
model_tester
=
XLNetModelTester
(
self
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
XLNetConfig
,
d_inner
=
37
)
self
.
config_tester
=
ConfigTester
(
self
,
config_class
=
XLNetConfig
,
d_inner
=
37
)
def
test_config
(
self
):
def
test_config
(
self
):
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment