Unverified Commit c852036b authored by Amil Khare's avatar Amil Khare Committed by GitHub
Browse files

[cleanup] Hoist ModelTester objects to top level (#4939)


Co-authored-by: default avatarSam Shleifer <sshleifer@gmail.com>
parent 0c55a384
......@@ -35,137 +35,83 @@ if is_tf_available():
)
@require_tf
class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (
(TFXLMModel, TFXLMWithLMHeadModel, TFXLMForSequenceClassification, TFXLMForQuestionAnsweringSimple)
if is_tf_available()
else ()
)
all_generative_model_classes = (
(TFXLMWithLMHeadModel,) if is_tf_available() else ()
) # TODO (PVP): Check other models whether language generation is also applicable
class TFXLMModelTester(object):
def __init__(
self,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_lengths=True,
use_token_type_ids=True,
use_labels=True,
gelu_activation=True,
sinusoidal_embeddings=False,
causal=False,
asm=False,
n_langs=2,
vocab_size=99,
n_special=0,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
summary_type="last",
use_proj=True,
scope=None,
bos_token_id=0,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
self.use_input_lengths = use_input_lengths
self.use_token_type_ids = use_token_type_ids
self.use_labels = use_labels
self.gelu_activation = gelu_activation
self.sinusoidal_embeddings = sinusoidal_embeddings
self.asm = asm
self.n_langs = n_langs
self.vocab_size = vocab_size
self.n_special = n_special
self.summary_type = summary_type
self.causal = causal
self.use_proj = use_proj
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = max_position_embeddings
self.n_langs = n_langs
self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = initializer_range
self.summary_type = summary_type
self.num_labels = num_labels
self.num_choices = num_choices
self.scope = scope
self.bos_token_id = bos_token_id
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32)
input_lengths = None
if self.use_input_lengths:
input_lengths = (
ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2
) # small variation of seq_length
token_type_ids = None
if self.use_token_type_ids:
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.n_langs)
sequence_labels = None
token_labels = None
is_impossible_labels = None
if self.use_labels:
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32)
config = XLMConfig(
vocab_size=self.vocab_size,
n_special=self.n_special,
emb_dim=self.hidden_size,
n_layers=self.num_hidden_layers,
n_heads=self.num_attention_heads,
dropout=self.hidden_dropout_prob,
attention_dropout=self.attention_probs_dropout_prob,
gelu_activation=self.gelu_activation,
sinusoidal_embeddings=self.sinusoidal_embeddings,
asm=self.asm,
causal=self.causal,
n_langs=self.n_langs,
max_position_embeddings=self.max_position_embeddings,
initializer_range=self.initializer_range,
summary_type=self.summary_type,
use_proj=self.use_proj,
bos_token_id=self.bos_token_id,
)
return (
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
)
def create_and_check_xlm_model(
self,
class TFXLMModelTester:
def __init__(
self, parent,
):
self.parent = parent
self.batch_size = 13
self.seq_length = 7
self.is_training = True
self.use_input_lengths = True
self.use_token_type_ids = True
self.use_labels = True
self.gelu_activation = True
self.sinusoidal_embeddings = False
self.causal = False
self.asm = False
self.n_langs = 2
self.vocab_size = 99
self.n_special = 0
self.hidden_size = 32
self.num_hidden_layers = 5
self.num_attention_heads = 4
self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = 0.1
self.max_position_embeddings = 512
self.type_vocab_size = 16
self.type_sequence_label_size = 2
self.initializer_range = 0.02
self.num_labels = 3
self.num_choices = 4
self.summary_type = "last"
self.use_proj = True
self.scope = None
self.bos_token_id = 0
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32)
input_lengths = None
if self.use_input_lengths:
input_lengths = (
ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2
) # small variation of seq_length
token_type_ids = None
if self.use_token_type_ids:
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.n_langs)
sequence_labels = None
token_labels = None
is_impossible_labels = None
if self.use_labels:
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32)
config = XLMConfig(
vocab_size=self.vocab_size,
n_special=self.n_special,
emb_dim=self.hidden_size,
n_layers=self.num_hidden_layers,
n_heads=self.num_attention_heads,
dropout=self.hidden_dropout_prob,
attention_dropout=self.attention_probs_dropout_prob,
gelu_activation=self.gelu_activation,
sinusoidal_embeddings=self.sinusoidal_embeddings,
asm=self.asm,
causal=self.causal,
n_langs=self.n_langs,
max_position_embeddings=self.max_position_embeddings,
initializer_range=self.initializer_range,
summary_type=self.summary_type,
use_proj=self.use_proj,
bos_token_id=self.bos_token_id,
)
return (
config,
input_ids,
token_type_ids,
......@@ -174,23 +120,108 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
token_labels,
is_impossible_labels,
input_mask,
):
model = TFXLMModel(config=config)
inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids}
outputs = model(inputs)
inputs = [input_ids, input_mask]
outputs = model(inputs)
sequence_output = outputs[0]
result = {
"sequence_output": sequence_output.numpy(),
}
self.parent.assertListEqual(
list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
)
def create_and_check_xlm_lm_head(
self,
)
def create_and_check_xlm_model(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = TFXLMModel(config=config)
inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids}
outputs = model(inputs)
inputs = [input_ids, input_mask]
outputs = model(inputs)
sequence_output = outputs[0]
result = {
"sequence_output": sequence_output.numpy(),
}
self.parent.assertListEqual(
list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
)
def create_and_check_xlm_lm_head(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = TFXLMWithLMHeadModel(config)
inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids}
outputs = model(inputs)
logits = outputs[0]
result = {
"logits": logits.numpy(),
}
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size])
def create_and_check_xlm_qa(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = TFXLMForQuestionAnsweringSimple(config)
inputs = {"input_ids": input_ids, "lengths": input_lengths}
start_logits, end_logits = model(inputs)
result = {
"start_logits": start_logits.numpy(),
"end_logits": end_logits.numpy(),
}
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
def create_and_check_xlm_sequence_classif(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = TFXLMForSequenceClassification(config)
inputs = {"input_ids": input_ids, "lengths": input_lengths}
(logits,) = model(inputs)
result = {
"logits": logits.numpy(),
}
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size])
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids,
token_type_ids,
......@@ -199,92 +230,30 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
token_labels,
is_impossible_labels,
input_mask,
):
model = TFXLMWithLMHeadModel(config)
) = config_and_inputs
inputs_dict = {
"input_ids": input_ids,
"token_type_ids": token_type_ids,
"langs": token_type_ids,
"lengths": input_lengths,
}
return config, inputs_dict
inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids}
outputs = model(inputs)
logits = outputs[0]
result = {
"logits": logits.numpy(),
}
self.parent.assertListEqual(
list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size]
)
def create_and_check_xlm_qa(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = TFXLMForQuestionAnsweringSimple(config)
inputs = {"input_ids": input_ids, "lengths": input_lengths}
start_logits, end_logits = model(inputs)
result = {
"start_logits": start_logits.numpy(),
"end_logits": end_logits.numpy(),
}
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
@require_tf
class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
def create_and_check_xlm_sequence_classif(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = TFXLMForSequenceClassification(config)
inputs = {"input_ids": input_ids, "lengths": input_lengths}
(logits,) = model(inputs)
result = {
"logits": logits.numpy(),
}
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size])
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
) = config_and_inputs
inputs_dict = {
"input_ids": input_ids,
"token_type_ids": token_type_ids,
"langs": token_type_ids,
"lengths": input_lengths,
}
return config, inputs_dict
all_model_classes = (
(TFXLMModel, TFXLMWithLMHeadModel, TFXLMForSequenceClassification, TFXLMForQuestionAnsweringSimple)
if is_tf_available()
else ()
)
all_generative_model_classes = (
(TFXLMWithLMHeadModel,) if is_tf_available() else ()
) # TODO (PVP): Check other models whether language generation is also applicable
def setUp(self):
self.model_tester = TFXLMModelTest.TFXLMModelTester(self)
self.model_tester = TFXLMModelTester(self)
self.config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37)
def test_config(self):
......
......@@ -37,142 +37,80 @@ if is_tf_available():
)
@require_tf
class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
TFXLNetModel,
TFXLNetLMHeadModel,
TFXLNetForSequenceClassification,
TFXLNetForTokenClassification,
TFXLNetForQuestionAnsweringSimple,
class TFXLNetModelTester:
def __init__(
self, parent,
):
self.parent = parent
self.batch_size = 13
self.seq_length = 7
self.mem_len = 10
# self.key_len = seq_length + mem_len
self.clamp_len = -1
self.reuse_len = 15
self.is_training = True
self.use_labels = True
self.vocab_size = 99
self.cutoffs = [10, 50, 80]
self.hidden_size = 32
self.num_attention_heads = 4
self.d_inner = 128
self.num_hidden_layers = 5
self.type_sequence_label_size = 2
self.untie_r = True
self.bi_data = False
self.same_length = False
self.initializer_range = 0.05
self.seed = 1
self.type_vocab_size = 2
self.bos_token_id = 1
self.eos_token_id = 2
self.pad_token_id = 5
def prepare_config_and_inputs(self):
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32)
input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size)
perm_mask = tf.zeros((self.batch_size, self.seq_length + 1, self.seq_length), dtype=tf.float32)
perm_mask_last = tf.ones((self.batch_size, self.seq_length + 1, 1), dtype=tf.float32)
perm_mask = tf.concat([perm_mask, perm_mask_last], axis=-1)
# perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = tf.zeros((self.batch_size, 1, self.seq_length), dtype=tf.float32)
target_mapping_last = tf.ones((self.batch_size, 1, 1), dtype=tf.float32)
target_mapping = tf.concat([target_mapping, target_mapping_last], axis=-1)
# target_mapping[:, 0, -1] = 1.0 # predict last token
sequence_labels = None
lm_labels = None
is_impossible_labels = None
if self.use_labels:
lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32)
config = XLNetConfig(
vocab_size=self.vocab_size,
d_model=self.hidden_size,
n_head=self.num_attention_heads,
d_inner=self.d_inner,
n_layer=self.num_hidden_layers,
untie_r=self.untie_r,
mem_len=self.mem_len,
clamp_len=self.clamp_len,
same_length=self.same_length,
reuse_len=self.reuse_len,
bi_data=self.bi_data,
initializer_range=self.initializer_range,
num_labels=self.type_sequence_label_size,
bos_token_id=self.bos_token_id,
pad_token_id=self.pad_token_id,
eos_token_id=self.eos_token_id,
)
if is_tf_available()
else ()
)
all_generative_model_classes = (
(TFXLNetLMHeadModel,) if is_tf_available() else ()
) # TODO (PVP): Check other models whether language generation is also applicable
test_pruning = False
class TFXLNetModelTester(object):
def __init__(
self,
parent,
batch_size=13,
seq_length=7,
mem_len=10,
clamp_len=-1,
reuse_len=15,
is_training=True,
use_labels=True,
vocab_size=99,
cutoffs=[10, 50, 80],
hidden_size=32,
num_attention_heads=4,
d_inner=128,
num_hidden_layers=5,
type_sequence_label_size=2,
untie_r=True,
bi_data=False,
same_length=False,
initializer_range=0.05,
seed=1,
type_vocab_size=2,
bos_token_id=1,
eos_token_id=2,
pad_token_id=5,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.mem_len = mem_len
# self.key_len = seq_length + mem_len
self.clamp_len = clamp_len
self.reuse_len = reuse_len
self.is_training = is_training
self.use_labels = use_labels
self.vocab_size = vocab_size
self.cutoffs = cutoffs
self.hidden_size = hidden_size
self.num_attention_heads = num_attention_heads
self.d_inner = d_inner
self.num_hidden_layers = num_hidden_layers
self.bi_data = bi_data
self.untie_r = untie_r
self.same_length = same_length
self.initializer_range = initializer_range
self.seed = seed
self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = type_sequence_label_size
self.bos_token_id = bos_token_id
self.pad_token_id = pad_token_id
self.eos_token_id = eos_token_id
def prepare_config_and_inputs(self):
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32)
input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size)
perm_mask = tf.zeros((self.batch_size, self.seq_length + 1, self.seq_length), dtype=tf.float32)
perm_mask_last = tf.ones((self.batch_size, self.seq_length + 1, 1), dtype=tf.float32)
perm_mask = tf.concat([perm_mask, perm_mask_last], axis=-1)
# perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = tf.zeros((self.batch_size, 1, self.seq_length), dtype=tf.float32)
target_mapping_last = tf.ones((self.batch_size, 1, 1), dtype=tf.float32)
target_mapping = tf.concat([target_mapping, target_mapping_last], axis=-1)
# target_mapping[:, 0, -1] = 1.0 # predict last token
sequence_labels = None
lm_labels = None
is_impossible_labels = None
if self.use_labels:
lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32)
config = XLNetConfig(
vocab_size=self.vocab_size,
d_model=self.hidden_size,
n_head=self.num_attention_heads,
d_inner=self.d_inner,
n_layer=self.num_hidden_layers,
untie_r=self.untie_r,
mem_len=self.mem_len,
clamp_len=self.clamp_len,
same_length=self.same_length,
reuse_len=self.reuse_len,
bi_data=self.bi_data,
initializer_range=self.initializer_range,
num_labels=self.type_sequence_label_size,
bos_token_id=self.bos_token_id,
pad_token_id=self.pad_token_id,
eos_token_id=self.eos_token_id,
)
return (
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
)
def set_seed(self):
random.seed(self.seed)
tf.random.set_seed(self.seed)
def create_and_check_xlnet_base_model(
self,
return (
config,
input_ids_1,
input_ids_2,
......@@ -184,120 +122,203 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetModel(config)
inputs = {"input_ids": input_ids_1, "input_mask": input_mask, "token_type_ids": segment_ids}
_, _ = model(inputs)
inputs = [input_ids_1, input_mask]
)
outputs, mems_1 = model(inputs)
def set_seed(self):
random.seed(self.seed)
tf.random.set_seed(self.seed)
def create_and_check_xlnet_base_model(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetModel(config)
inputs = {"input_ids": input_ids_1, "input_mask": input_mask, "token_type_ids": segment_ids}
_, _ = model(inputs)
inputs = [input_ids_1, input_mask]
outputs, mems_1 = model(inputs)
result = {
"mems_1": [mem.numpy() for mem in mems_1],
"outputs": outputs.numpy(),
}
config.mem_len = 0
model = TFXLNetModel(config)
no_mems_outputs = model(inputs)
self.parent.assertEqual(len(no_mems_outputs), 1)
self.parent.assertListEqual(
list(result["outputs"].shape), [self.batch_size, self.seq_length, self.hidden_size]
)
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
result = {
"mems_1": [mem.numpy() for mem in mems_1],
"outputs": outputs.numpy(),
}
def create_and_check_xlnet_lm_head(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetLMHeadModel(config)
config.mem_len = 0
model = TFXLNetModel(config)
no_mems_outputs = model(inputs)
self.parent.assertEqual(len(no_mems_outputs), 1)
inputs_1 = {"input_ids": input_ids_1, "token_type_ids": segment_ids}
self.parent.assertListEqual(
list(result["outputs"].shape), [self.batch_size, self.seq_length, self.hidden_size]
)
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
all_logits_1, mems_1 = model(inputs_1)
def create_and_check_xlnet_lm_head(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetLMHeadModel(config)
inputs_2 = {"input_ids": input_ids_2, "mems": mems_1, "token_type_ids": segment_ids}
inputs_1 = {"input_ids": input_ids_1, "token_type_ids": segment_ids}
all_logits_2, mems_2 = model(inputs_2)
all_logits_1, mems_1 = model(inputs_1)
inputs_3 = {"input_ids": input_ids_q, "perm_mask": perm_mask, "target_mapping": target_mapping}
inputs_2 = {"input_ids": input_ids_2, "mems": mems_1, "token_type_ids": segment_ids}
logits, _ = model(inputs_3)
all_logits_2, mems_2 = model(inputs_2)
result = {
"mems_1": [mem.numpy() for mem in mems_1],
"all_logits_1": all_logits_1.numpy(),
"mems_2": [mem.numpy() for mem in mems_2],
"all_logits_2": all_logits_2.numpy(),
}
inputs_3 = {"input_ids": input_ids_q, "perm_mask": perm_mask, "target_mapping": target_mapping}
self.parent.assertListEqual(
list(result["all_logits_1"].shape), [self.batch_size, self.seq_length, self.vocab_size]
)
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
logits, _ = model(inputs_3)
self.parent.assertListEqual(
list(result["all_logits_2"].shape), [self.batch_size, self.seq_length, self.vocab_size]
)
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
result = {
"mems_1": [mem.numpy() for mem in mems_1],
"all_logits_1": all_logits_1.numpy(),
"mems_2": [mem.numpy() for mem in mems_2],
"all_logits_2": all_logits_2.numpy(),
}
def create_and_check_xlnet_qa(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetForQuestionAnsweringSimple(config)
inputs = {"input_ids": input_ids_1, "attention_mask": input_mask, "token_type_ids": segment_ids}
start_logits, end_logits, mems = model(inputs)
result = {
"start_logits": start_logits.numpy(),
"end_logits": end_logits.numpy(),
"mems": [m.numpy() for m in mems],
}
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual(
list(result["all_logits_1"].shape), [self.batch_size, self.seq_length, self.vocab_size]
)
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_sequence_classif(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetForSequenceClassification(config)
logits, mems_1 = model(input_ids_1)
result = {
"mems_1": [mem.numpy() for mem in mems_1],
"logits": logits.numpy(),
}
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size])
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual(
list(result["all_logits_2"].shape), [self.batch_size, self.seq_length, self.vocab_size]
)
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_for_token_classification(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
config.num_labels = input_ids_1.shape[1]
model = TFXLNetForTokenClassification(config)
inputs = {
"input_ids": input_ids_1,
"attention_mask": input_mask,
# 'token_type_ids': token_type_ids
}
logits, mems_1 = model(inputs)
result = {
"mems_1": [mem.numpy() for mem in mems_1],
"logits": logits.numpy(),
}
self.parent.assertListEqual(
list(result["logits"].shape), [self.batch_size, self.seq_length, config.num_labels]
)
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_qa(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetForQuestionAnsweringSimple(config)
inputs = {"input_ids": input_ids_1, "attention_mask": input_mask, "token_type_ids": segment_ids}
start_logits, end_logits, mems = model(inputs)
result = {
"start_logits": start_logits.numpy(),
"end_logits": end_logits.numpy(),
"mems": [m.numpy() for m in mems],
}
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_sequence_classif(
self,
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids_1,
input_ids_2,
......@@ -309,76 +330,32 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetForSequenceClassification(config)
logits, mems_1 = model(input_ids_1)
) = config_and_inputs
inputs_dict = {"input_ids": input_ids_1}
return config, inputs_dict
result = {
"mems_1": [mem.numpy() for mem in mems_1],
"logits": logits.numpy(),
}
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size])
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
@require_tf
class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
def create_and_check_xlnet_for_token_classification(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
config.num_labels = input_ids_1.shape[1]
model = TFXLNetForTokenClassification(config)
inputs = {
"input_ids": input_ids_1,
"attention_mask": input_mask,
# 'token_type_ids': token_type_ids
}
logits, mems_1 = model(inputs)
result = {
"mems_1": [mem.numpy() for mem in mems_1],
"logits": logits.numpy(),
}
self.parent.assertListEqual(
list(result["logits"].shape), [self.batch_size, self.seq_length, config.num_labels]
)
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids_1}
return config, inputs_dict
all_model_classes = (
(
TFXLNetModel,
TFXLNetLMHeadModel,
TFXLNetForSequenceClassification,
TFXLNetForTokenClassification,
TFXLNetForQuestionAnsweringSimple,
)
if is_tf_available()
else ()
)
all_generative_model_classes = (
(TFXLNetLMHeadModel,) if is_tf_available() else ()
) # TODO (PVP): Check other models whether language generation is also applicable
test_pruning = False
def setUp(self):
self.model_tester = TFXLNetModelTest.TFXLNetModelTester(self)
self.model_tester = TFXLNetModelTester(self)
self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37)
def test_config(self):
......
......@@ -29,6 +29,137 @@ if is_torch_available():
from transformers.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST
class TransfoXLModelTester:
def __init__(
self, parent,
):
self.parent = parent
self.batch_size = 14
self.seq_length = 7
self.mem_len = 30
self.key_length = self.seq_length + self.mem_len
self.clamp_len = 15
self.is_training = True
self.use_labels = True
self.vocab_size = 99
self.cutoffs = [10, 50, 80]
self.hidden_size = 32
self.d_embed = 32
self.num_attention_heads = 4
self.d_head = 8
self.d_inner = 128
self.div_val = 2
self.num_hidden_layers = 5
self.scope = None
self.seed = 1
self.eos_token_id = 0
def prepare_config_and_inputs(self):
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
lm_labels = None
if self.use_labels:
lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
config = TransfoXLConfig(
vocab_size=self.vocab_size,
mem_len=self.mem_len,
clamp_len=self.clamp_len,
cutoffs=self.cutoffs,
d_model=self.hidden_size,
d_embed=self.d_embed,
n_head=self.num_attention_heads,
d_head=self.d_head,
d_inner=self.d_inner,
div_val=self.div_val,
n_layer=self.num_hidden_layers,
eos_token_id=self.eos_token_id,
)
return (config, input_ids_1, input_ids_2, lm_labels)
def set_seed(self):
random.seed(self.seed)
torch.manual_seed(self.seed)
def create_transfo_xl_model(self, config, input_ids_1, input_ids_2, lm_labels):
model = TransfoXLModel(config)
model.to(torch_device)
model.eval()
hidden_states_1, mems_1 = model(input_ids_1)
hidden_states_2, mems_2 = model(input_ids_2, mems_1)
outputs = {
"hidden_states_1": hidden_states_1,
"mems_1": mems_1,
"hidden_states_2": hidden_states_2,
"mems_2": mems_2,
}
return outputs
def check_transfo_xl_model_output(self, result):
self.parent.assertListEqual(
list(result["hidden_states_1"].size()), [self.batch_size, self.seq_length, self.hidden_size],
)
self.parent.assertListEqual(
list(result["hidden_states_2"].size()), [self.batch_size, self.seq_length, self.hidden_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_transfo_xl_lm_head(self, config, input_ids_1, input_ids_2, lm_labels):
model = TransfoXLLMHeadModel(config)
model.to(torch_device)
model.eval()
lm_logits_1, mems_1 = model(input_ids_1)
loss_1, _, mems_1 = model(input_ids_1, labels=lm_labels)
lm_logits_2, mems_2 = model(input_ids_2, mems=mems_1)
loss_2, _, mems_2 = model(input_ids_2, labels=lm_labels, mems=mems_1)
outputs = {
"loss_1": loss_1,
"mems_1": mems_1,
"lm_logits_1": lm_logits_1,
"loss_2": loss_2,
"mems_2": mems_2,
"lm_logits_2": lm_logits_2,
}
return outputs
def check_transfo_xl_lm_head_output(self, result):
self.parent.assertListEqual(list(result["loss_1"].size()), [self.batch_size, self.seq_length - 1])
self.parent.assertListEqual(
list(result["lm_logits_1"].size()), [self.batch_size, self.seq_length, self.vocab_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual(list(result["loss_2"].size()), [self.batch_size, self.seq_length - 1])
self.parent.assertListEqual(
list(result["lm_logits_2"].size()), [self.batch_size, self.seq_length, self.vocab_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids_1, input_ids_2, lm_labels) = config_and_inputs
inputs_dict = {"input_ids": input_ids_1}
return config, inputs_dict
@require_torch
class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
......@@ -38,155 +169,6 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
test_torchscript = False
test_resize_embeddings = True
class TransfoXLModelTester(object):
def __init__(
self,
parent,
batch_size=14,
seq_length=7,
mem_len=30,
clamp_len=15,
is_training=True,
use_labels=True,
vocab_size=99,
cutoffs=[10, 50, 80],
hidden_size=32,
d_embed=32,
num_attention_heads=4,
d_head=8,
d_inner=128,
div_val=2,
num_hidden_layers=5,
scope=None,
seed=1,
eos_token_id=0,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.mem_len = mem_len
self.key_length = seq_length + mem_len
self.clamp_len = clamp_len
self.is_training = is_training
self.use_labels = use_labels
self.vocab_size = vocab_size
self.cutoffs = cutoffs
self.hidden_size = hidden_size
self.d_embed = d_embed
self.num_attention_heads = num_attention_heads
self.d_head = d_head
self.d_inner = d_inner
self.div_val = div_val
self.num_hidden_layers = num_hidden_layers
self.scope = scope
self.seed = seed
self.eos_token_id = eos_token_id
def prepare_config_and_inputs(self):
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
lm_labels = None
if self.use_labels:
lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
config = TransfoXLConfig(
vocab_size=self.vocab_size,
mem_len=self.mem_len,
clamp_len=self.clamp_len,
cutoffs=self.cutoffs,
d_model=self.hidden_size,
d_embed=self.d_embed,
n_head=self.num_attention_heads,
d_head=self.d_head,
d_inner=self.d_inner,
div_val=self.div_val,
n_layer=self.num_hidden_layers,
eos_token_id=self.eos_token_id,
)
return (config, input_ids_1, input_ids_2, lm_labels)
def set_seed(self):
random.seed(self.seed)
torch.manual_seed(self.seed)
def create_transfo_xl_model(self, config, input_ids_1, input_ids_2, lm_labels):
model = TransfoXLModel(config)
model.to(torch_device)
model.eval()
hidden_states_1, mems_1 = model(input_ids_1)
hidden_states_2, mems_2 = model(input_ids_2, mems_1)
outputs = {
"hidden_states_1": hidden_states_1,
"mems_1": mems_1,
"hidden_states_2": hidden_states_2,
"mems_2": mems_2,
}
return outputs
def check_transfo_xl_model_output(self, result):
self.parent.assertListEqual(
list(result["hidden_states_1"].size()), [self.batch_size, self.seq_length, self.hidden_size],
)
self.parent.assertListEqual(
list(result["hidden_states_2"].size()), [self.batch_size, self.seq_length, self.hidden_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_transfo_xl_lm_head(self, config, input_ids_1, input_ids_2, lm_labels):
model = TransfoXLLMHeadModel(config)
model.to(torch_device)
model.eval()
lm_logits_1, mems_1 = model(input_ids_1)
loss_1, _, mems_1 = model(input_ids_1, labels=lm_labels)
lm_logits_2, mems_2 = model(input_ids_2, mems=mems_1)
loss_2, _, mems_2 = model(input_ids_2, labels=lm_labels, mems=mems_1)
outputs = {
"loss_1": loss_1,
"mems_1": mems_1,
"lm_logits_1": lm_logits_1,
"loss_2": loss_2,
"mems_2": mems_2,
"lm_logits_2": lm_logits_2,
}
return outputs
def check_transfo_xl_lm_head_output(self, result):
self.parent.assertListEqual(list(result["loss_1"].size()), [self.batch_size, self.seq_length - 1])
self.parent.assertListEqual(
list(result["lm_logits_1"].size()), [self.batch_size, self.seq_length, self.vocab_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual(list(result["loss_2"].size()), [self.batch_size, self.seq_length - 1])
self.parent.assertListEqual(
list(result["lm_logits_2"].size()), [self.batch_size, self.seq_length, self.vocab_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids_1, input_ids_2, lm_labels) = config_and_inputs
inputs_dict = {"input_ids": input_ids_1}
return config, inputs_dict
def check_cutoffs_and_n_token(
self, copied_cutoffs, layer, model_embed, model, model_class, resized_value, vocab_size
):
......@@ -210,7 +192,7 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
self.assertEqual(model.crit.n_token, vocab_size + resized_value)
def setUp(self):
self.model_tester = TransfoXLModelTest.TransfoXLModelTester(self)
self.model_tester = TransfoXLModelTester(self)
self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37)
def test_config(self):
......
......@@ -37,146 +37,82 @@ if is_torch_available():
from transformers.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_LIST
@require_torch
class XLMModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
XLMModel,
XLMWithLMHeadModel,
XLMForQuestionAnswering,
XLMForSequenceClassification,
XLMForQuestionAnsweringSimple,
class XLMModelTester:
def __init__(
self, parent,
):
self.parent = parent
self.batch_size = 13
self.seq_length = 7
self.is_training = True
self.use_input_lengths = True
self.use_token_type_ids = True
self.use_labels = True
self.gelu_activation = True
self.sinusoidal_embeddings = False
self.causal = False
self.asm = False
self.n_langs = 2
self.vocab_size = 99
self.n_special = 0
self.hidden_size = 32
self.num_hidden_layers = 5
self.num_attention_heads = 4
self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = 0.1
self.max_position_embeddings = 512
self.type_sequence_label_size = 2
self.initializer_range = 0.02
self.num_labels = 3
self.num_choices = 4
self.summary_type = "last"
self.use_proj = True
self.scope = None
self.bos_token_id = 0
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float()
input_lengths = None
if self.use_input_lengths:
input_lengths = (
ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2
) # small variation of seq_length
token_type_ids = None
if self.use_token_type_ids:
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.n_langs)
sequence_labels = None
token_labels = None
is_impossible_labels = None
if self.use_labels:
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
is_impossible_labels = ids_tensor([self.batch_size], 2).float()
config = XLMConfig(
vocab_size=self.vocab_size,
n_special=self.n_special,
emb_dim=self.hidden_size,
n_layers=self.num_hidden_layers,
n_heads=self.num_attention_heads,
dropout=self.hidden_dropout_prob,
attention_dropout=self.attention_probs_dropout_prob,
gelu_activation=self.gelu_activation,
sinusoidal_embeddings=self.sinusoidal_embeddings,
asm=self.asm,
causal=self.causal,
n_langs=self.n_langs,
max_position_embeddings=self.max_position_embeddings,
initializer_range=self.initializer_range,
summary_type=self.summary_type,
use_proj=self.use_proj,
bos_token_id=self.bos_token_id,
)
if is_torch_available()
else ()
)
all_generative_model_classes = (
(XLMWithLMHeadModel,) if is_torch_available() else ()
) # TODO (PVP): Check other models whether language generation is also applicable
class XLMModelTester(object):
def __init__(
self,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_lengths=True,
use_token_type_ids=True,
use_labels=True,
gelu_activation=True,
sinusoidal_embeddings=False,
causal=False,
asm=False,
n_langs=2,
vocab_size=99,
n_special=0,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
summary_type="last",
use_proj=True,
scope=None,
bos_token_id=0,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
self.use_input_lengths = use_input_lengths
self.use_token_type_ids = use_token_type_ids
self.use_labels = use_labels
self.gelu_activation = gelu_activation
self.sinusoidal_embeddings = sinusoidal_embeddings
self.asm = asm
self.n_langs = n_langs
self.vocab_size = vocab_size
self.n_special = n_special
self.summary_type = summary_type
self.causal = causal
self.use_proj = use_proj
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = max_position_embeddings
self.n_langs = n_langs
self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = initializer_range
self.summary_type = summary_type
self.num_labels = num_labels
self.num_choices = num_choices
self.scope = scope
self.bos_token_id = bos_token_id
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float()
input_lengths = None
if self.use_input_lengths:
input_lengths = (
ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2
) # small variation of seq_length
token_type_ids = None
if self.use_token_type_ids:
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.n_langs)
sequence_labels = None
token_labels = None
is_impossible_labels = None
if self.use_labels:
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
is_impossible_labels = ids_tensor([self.batch_size], 2).float()
config = XLMConfig(
vocab_size=self.vocab_size,
n_special=self.n_special,
emb_dim=self.hidden_size,
n_layers=self.num_hidden_layers,
n_heads=self.num_attention_heads,
dropout=self.hidden_dropout_prob,
attention_dropout=self.attention_probs_dropout_prob,
gelu_activation=self.gelu_activation,
sinusoidal_embeddings=self.sinusoidal_embeddings,
asm=self.asm,
causal=self.causal,
n_langs=self.n_langs,
max_position_embeddings=self.max_position_embeddings,
initializer_range=self.initializer_range,
summary_type=self.summary_type,
use_proj=self.use_proj,
bos_token_id=self.bos_token_id,
)
return (
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
)
def check_loss_output(self, result):
self.parent.assertListEqual(list(result["loss"].size()), [])
def create_and_check_xlm_model(
self,
return (
config,
input_ids,
token_type_ids,
......@@ -185,174 +121,209 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMModel(config=config)
model.to(torch_device)
model.eval()
outputs = model(input_ids, lengths=input_lengths, langs=token_type_ids)
outputs = model(input_ids, langs=token_type_ids)
outputs = model(input_ids)
sequence_output = outputs[0]
result = {
"sequence_output": sequence_output,
}
self.parent.assertListEqual(
list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
)
def create_and_check_xlm_lm_head(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMWithLMHeadModel(config)
model.to(torch_device)
model.eval()
)
loss, logits = model(input_ids, token_type_ids=token_type_ids, labels=token_labels)
def check_loss_output(self, result):
self.parent.assertListEqual(list(result["loss"].size()), [])
def create_and_check_xlm_model(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMModel(config=config)
model.to(torch_device)
model.eval()
outputs = model(input_ids, lengths=input_lengths, langs=token_type_ids)
outputs = model(input_ids, langs=token_type_ids)
outputs = model(input_ids)
sequence_output = outputs[0]
result = {
"sequence_output": sequence_output,
}
self.parent.assertListEqual(
list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
)
result = {
"loss": loss,
"logits": logits,
}
def create_and_check_xlm_lm_head(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMWithLMHeadModel(config)
model.to(torch_device)
model.eval()
loss, logits = model(input_ids, token_type_ids=token_type_ids, labels=token_labels)
result = {
"loss": loss,
"logits": logits,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.vocab_size])
def create_and_check_xlm_simple_qa(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMForQuestionAnsweringSimple(config)
model.to(torch_device)
model.eval()
outputs = model(input_ids)
outputs = model(input_ids, start_positions=sequence_labels, end_positions=sequence_labels)
loss, start_logits, end_logits = outputs
result = {
"loss": loss,
"start_logits": start_logits,
"end_logits": end_logits,
}
self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length])
self.check_loss_output(result)
def create_and_check_xlm_qa(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMForQuestionAnswering(config)
model.to(torch_device)
model.eval()
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(
list(result["logits"].size()), [self.batch_size, self.seq_length, self.vocab_size]
)
outputs = model(input_ids)
start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits = outputs
def create_and_check_xlm_simple_qa(
self,
config,
outputs = model(
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMForQuestionAnsweringSimple(config)
model.to(torch_device)
model.eval()
outputs = model(input_ids)
outputs = model(input_ids, start_positions=sequence_labels, end_positions=sequence_labels)
loss, start_logits, end_logits = outputs
result = {
"loss": loss,
"start_logits": start_logits,
"end_logits": end_logits,
}
self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length])
self.check_loss_output(result)
def create_and_check_xlm_qa(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMForQuestionAnswering(config)
model.to(torch_device)
model.eval()
outputs = model(input_ids)
start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits = outputs
outputs = model(
input_ids,
start_positions=sequence_labels,
end_positions=sequence_labels,
cls_index=sequence_labels,
is_impossible=is_impossible_labels,
p_mask=input_mask,
)
outputs = model(
input_ids,
start_positions=sequence_labels,
end_positions=sequence_labels,
cls_index=sequence_labels,
is_impossible=is_impossible_labels,
)
(total_loss,) = outputs
outputs = model(input_ids, start_positions=sequence_labels, end_positions=sequence_labels)
(total_loss,) = outputs
result = {
"loss": total_loss,
"start_top_log_probs": start_top_log_probs,
"start_top_index": start_top_index,
"end_top_log_probs": end_top_log_probs,
"end_top_index": end_top_index,
"cls_logits": cls_logits,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(
list(result["start_top_log_probs"].size()), [self.batch_size, model.config.start_n_top]
)
self.parent.assertListEqual(
list(result["start_top_index"].size()), [self.batch_size, model.config.start_n_top]
)
self.parent.assertListEqual(
list(result["end_top_log_probs"].size()),
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
)
self.parent.assertListEqual(
list(result["end_top_index"].size()),
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
)
self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
def create_and_check_xlm_sequence_classif(
self,
config,
start_positions=sequence_labels,
end_positions=sequence_labels,
cls_index=sequence_labels,
is_impossible=is_impossible_labels,
p_mask=input_mask,
)
outputs = model(
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMForSequenceClassification(config)
model.to(torch_device)
model.eval()
(logits,) = model(input_ids)
loss, logits = model(input_ids, labels=sequence_labels)
result = {
"loss": loss,
"logits": logits,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(
list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size]
)
def create_and_check_xlm_for_token_classification(
self,
start_positions=sequence_labels,
end_positions=sequence_labels,
cls_index=sequence_labels,
is_impossible=is_impossible_labels,
)
(total_loss,) = outputs
outputs = model(input_ids, start_positions=sequence_labels, end_positions=sequence_labels)
(total_loss,) = outputs
result = {
"loss": total_loss,
"start_top_log_probs": start_top_log_probs,
"start_top_index": start_top_index,
"end_top_log_probs": end_top_log_probs,
"end_top_index": end_top_index,
"cls_logits": cls_logits,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(
list(result["start_top_log_probs"].size()), [self.batch_size, model.config.start_n_top]
)
self.parent.assertListEqual(
list(result["start_top_index"].size()), [self.batch_size, model.config.start_n_top]
)
self.parent.assertListEqual(
list(result["end_top_log_probs"].size()),
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
)
self.parent.assertListEqual(
list(result["end_top_index"].size()), [self.batch_size, model.config.start_n_top * model.config.end_n_top],
)
self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
def create_and_check_xlm_sequence_classif(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMForSequenceClassification(config)
model.to(torch_device)
model.eval()
(logits,) = model(input_ids)
loss, logits = model(input_ids, labels=sequence_labels)
result = {
"loss": loss,
"logits": logits,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size])
def create_and_check_xlm_for_token_classification(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
config.num_labels = self.num_labels
model = XLMForTokenClassification(config)
model.to(torch_device)
model.eval()
loss, logits = model(input_ids, attention_mask=input_mask, labels=token_labels)
result = {
"loss": loss,
"logits": logits,
}
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels])
self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids,
token_type_ids,
......@@ -361,39 +332,31 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
token_labels,
is_impossible_labels,
input_mask,
):
config.num_labels = self.num_labels
model = XLMForTokenClassification(config)
model.to(torch_device)
model.eval()
loss, logits = model(input_ids, attention_mask=input_mask, labels=token_labels)
result = {
"loss": loss,
"logits": logits,
}
self.parent.assertListEqual(
list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
)
self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "lengths": input_lengths}
return config, inputs_dict
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "lengths": input_lengths}
return config, inputs_dict
@require_torch
class XLMModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
XLMModel,
XLMWithLMHeadModel,
XLMForQuestionAnswering,
XLMForSequenceClassification,
XLMForQuestionAnsweringSimple,
)
if is_torch_available()
else ()
)
all_generative_model_classes = (
(XLMWithLMHeadModel,) if is_torch_available() else ()
) # TODO (PVP): Check other models whether language generation is also applicable
def setUp(self):
self.model_tester = XLMModelTest.XLMModelTester(self)
self.model_tester = XLMModelTester(self)
self.config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37)
def test_config(self):
......
......@@ -39,148 +39,106 @@ if is_torch_available():
from transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_LIST
@require_torch
class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
XLNetModel,
XLNetLMHeadModel,
XLNetForTokenClassification,
XLNetForSequenceClassification,
XLNetForQuestionAnswering,
XLNetForMultipleChoice,
class XLNetModelTester:
def __init__(
self,
parent,
batch_size=14,
seq_length=7,
mem_len=10,
clamp_len=-1,
reuse_len=15,
is_training=True,
use_labels=True,
vocab_size=99,
cutoffs=[10, 50, 80],
hidden_size=32,
num_attention_heads=4,
d_inner=128,
num_hidden_layers=5,
type_sequence_label_size=2,
untie_r=True,
bi_data=False,
same_length=False,
initializer_range=0.05,
seed=1,
type_vocab_size=2,
bos_token_id=1,
eos_token_id=2,
pad_token_id=5,
num_choices=4,
):
self.parent = parent
self.batch_size = 14
self.seq_length = 7
self.mem_len = 10
# self.key_len = seq_length + mem_len
self.clamp_len = -1
self.reuse_len = 15
self.is_training = True
self.use_labels = True
self.vocab_size = 99
self.cutoffs = [10, 50, 80]
self.hidden_size = 32
self.num_attention_heads = 4
self.d_inner = 128
self.num_hidden_layers = 5
self.type_sequence_label_size = 2
self.untie_r = True
self.bi_data = False
self.same_length = False
self.initializer_range = 0.05
self.seed = 1
self.type_vocab_size = 2
self.bos_token_id = 1
self.eos_token_id = 2
self.pad_token_id = 5
self.num_choices = 4
def prepare_config_and_inputs(self):
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float()
input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size)
perm_mask = torch.zeros(
self.batch_size, self.seq_length + 1, self.seq_length + 1, dtype=torch.float, device=torch_device,
)
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = torch.zeros(self.batch_size, 1, self.seq_length + 1, dtype=torch.float, device=torch_device,)
target_mapping[:, 0, -1] = 1.0 # predict last token
sequence_labels = None
lm_labels = None
is_impossible_labels = None
token_labels = None
if self.use_labels:
lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
is_impossible_labels = ids_tensor([self.batch_size], 2).float()
token_labels = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
config = XLNetConfig(
vocab_size=self.vocab_size,
d_model=self.hidden_size,
n_head=self.num_attention_heads,
d_inner=self.d_inner,
n_layer=self.num_hidden_layers,
untie_r=self.untie_r,
mem_len=self.mem_len,
clamp_len=self.clamp_len,
same_length=self.same_length,
reuse_len=self.reuse_len,
bi_data=self.bi_data,
initializer_range=self.initializer_range,
num_labels=self.type_sequence_label_size,
bos_token_id=self.bos_token_id,
pad_token_id=self.pad_token_id,
eos_token_id=self.eos_token_id,
)
if is_torch_available()
else ()
)
all_generative_model_classes = (
(XLNetLMHeadModel,) if is_torch_available() else ()
) # TODO (PVP): Check other models whether language generation is also applicable
test_pruning = False
class XLNetModelTester(object):
def __init__(
self,
parent,
batch_size=14,
seq_length=7,
mem_len=10,
clamp_len=-1,
reuse_len=15,
is_training=True,
use_labels=True,
vocab_size=99,
cutoffs=[10, 50, 80],
hidden_size=32,
num_attention_heads=4,
d_inner=128,
num_hidden_layers=5,
type_sequence_label_size=2,
untie_r=True,
bi_data=False,
same_length=False,
initializer_range=0.05,
seed=1,
type_vocab_size=2,
bos_token_id=1,
eos_token_id=2,
pad_token_id=5,
num_choices=4,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.mem_len = mem_len
# self.key_len = seq_length + mem_len
self.clamp_len = clamp_len
self.reuse_len = reuse_len
self.is_training = is_training
self.use_labels = use_labels
self.vocab_size = vocab_size
self.cutoffs = cutoffs
self.hidden_size = hidden_size
self.num_attention_heads = num_attention_heads
self.d_inner = d_inner
self.num_hidden_layers = num_hidden_layers
self.bi_data = bi_data
self.untie_r = untie_r
self.same_length = same_length
self.initializer_range = initializer_range
self.seed = seed
self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = type_sequence_label_size
self.bos_token_id = bos_token_id
self.pad_token_id = pad_token_id
self.eos_token_id = eos_token_id
self.num_choices = num_choices
def prepare_config_and_inputs(self):
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float()
input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size)
perm_mask = torch.zeros(
self.batch_size, self.seq_length + 1, self.seq_length + 1, dtype=torch.float, device=torch_device,
)
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = torch.zeros(
self.batch_size, 1, self.seq_length + 1, dtype=torch.float, device=torch_device,
)
target_mapping[:, 0, -1] = 1.0 # predict last token
sequence_labels = None
lm_labels = None
is_impossible_labels = None
token_labels = None
if self.use_labels:
lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
is_impossible_labels = ids_tensor([self.batch_size], 2).float()
token_labels = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
config = XLNetConfig(
vocab_size=self.vocab_size,
d_model=self.hidden_size,
n_head=self.num_attention_heads,
d_inner=self.d_inner,
n_layer=self.num_hidden_layers,
untie_r=self.untie_r,
mem_len=self.mem_len,
clamp_len=self.clamp_len,
same_length=self.same_length,
reuse_len=self.reuse_len,
bi_data=self.bi_data,
initializer_range=self.initializer_range,
num_labels=self.type_sequence_label_size,
bos_token_id=self.bos_token_id,
pad_token_id=self.pad_token_id,
eos_token_id=self.eos_token_id,
)
return (
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
)
def set_seed(self):
random.seed(self.seed)
torch.manual_seed(self.seed)
def create_and_check_xlnet_base_model(
self,
return (
config,
input_ids_1,
input_ids_2,
......@@ -193,231 +151,286 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetModel(config)
model.to(torch_device)
model.eval()
_, _ = model(input_ids_1, input_mask=input_mask)
_, _ = model(input_ids_1, attention_mask=input_mask)
_, _ = model(input_ids_1, token_type_ids=segment_ids)
outputs, mems_1 = model(input_ids_1)
result = {
"mems_1": mems_1,
"outputs": outputs,
}
config.mem_len = 0
model = XLNetModel(config)
model.to(torch_device)
model.eval()
no_mems_outputs = model(input_ids_1)
self.parent.assertEqual(len(no_mems_outputs), 1)
self.parent.assertListEqual(
list(result["outputs"].size()), [self.batch_size, self.seq_length, self.hidden_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_base_model_with_att_output(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetModel(config)
model.to(torch_device)
model.eval()
)
_, _, attentions = model(input_ids_1, target_mapping=target_mapping, output_attentions=True)
def set_seed(self):
random.seed(self.seed)
torch.manual_seed(self.seed)
def create_and_check_xlnet_base_model(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetModel(config)
model.to(torch_device)
model.eval()
self.parent.assertEqual(len(attentions), config.n_layer)
self.parent.assertIsInstance(attentions[0], tuple)
self.parent.assertEqual(len(attentions[0]), 2)
self.parent.assertTrue(attentions[0][0].shape, attentions[0][0].shape)
_, _ = model(input_ids_1, input_mask=input_mask)
_, _ = model(input_ids_1, attention_mask=input_mask)
_, _ = model(input_ids_1, token_type_ids=segment_ids)
outputs, mems_1 = model(input_ids_1)
def create_and_check_xlnet_lm_head(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetLMHeadModel(config)
model.to(torch_device)
model.eval()
loss_1, all_logits_1, mems_1 = model(input_ids_1, token_type_ids=segment_ids, labels=lm_labels)
loss_2, all_logits_2, mems_2 = model(
input_ids_2, token_type_ids=segment_ids, labels=lm_labels, mems=mems_1
)
logits, _ = model(input_ids_q, perm_mask=perm_mask, target_mapping=target_mapping)
result = {
"loss_1": loss_1,
"mems_1": mems_1,
"all_logits_1": all_logits_1,
"loss_2": loss_2,
"mems_2": mems_2,
"all_logits_2": all_logits_2,
}
self.parent.assertListEqual(list(result["loss_1"].size()), [])
self.parent.assertListEqual(
list(result["all_logits_1"].size()), [self.batch_size, self.seq_length, self.vocab_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual(list(result["loss_2"].size()), [])
self.parent.assertListEqual(
list(result["all_logits_2"].size()), [self.batch_size, self.seq_length, self.vocab_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_qa(
self,
config,
result = {
"mems_1": mems_1,
"outputs": outputs,
}
config.mem_len = 0
model = XLNetModel(config)
model.to(torch_device)
model.eval()
no_mems_outputs = model(input_ids_1)
self.parent.assertEqual(len(no_mems_outputs), 1)
self.parent.assertListEqual(
list(result["outputs"].size()), [self.batch_size, self.seq_length, self.hidden_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_base_model_with_att_output(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetModel(config)
model.to(torch_device)
model.eval()
_, _, attentions = model(input_ids_1, target_mapping=target_mapping, output_attentions=True)
self.parent.assertEqual(len(attentions), config.n_layer)
self.parent.assertIsInstance(attentions[0], tuple)
self.parent.assertEqual(len(attentions[0]), 2)
self.parent.assertTrue(attentions[0][0].shape, attentions[0][0].shape)
def create_and_check_xlnet_lm_head(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetLMHeadModel(config)
model.to(torch_device)
model.eval()
loss_1, all_logits_1, mems_1 = model(input_ids_1, token_type_ids=segment_ids, labels=lm_labels)
loss_2, all_logits_2, mems_2 = model(input_ids_2, token_type_ids=segment_ids, labels=lm_labels, mems=mems_1)
logits, _ = model(input_ids_q, perm_mask=perm_mask, target_mapping=target_mapping)
result = {
"loss_1": loss_1,
"mems_1": mems_1,
"all_logits_1": all_logits_1,
"loss_2": loss_2,
"mems_2": mems_2,
"all_logits_2": all_logits_2,
}
self.parent.assertListEqual(list(result["loss_1"].size()), [])
self.parent.assertListEqual(
list(result["all_logits_1"].size()), [self.batch_size, self.seq_length, self.vocab_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual(list(result["loss_2"].size()), [])
self.parent.assertListEqual(
list(result["all_logits_2"].size()), [self.batch_size, self.seq_length, self.vocab_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_qa(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetForQuestionAnswering(config)
model.to(torch_device)
model.eval()
outputs = model(input_ids_1)
(start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits, mems,) = outputs
outputs = model(
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetForQuestionAnswering(config)
model.to(torch_device)
model.eval()
outputs = model(input_ids_1)
(start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits, mems,) = outputs
outputs = model(
input_ids_1,
start_positions=sequence_labels,
end_positions=sequence_labels,
cls_index=sequence_labels,
is_impossible=is_impossible_labels,
p_mask=input_mask,
)
outputs = model(
input_ids_1,
start_positions=sequence_labels,
end_positions=sequence_labels,
cls_index=sequence_labels,
is_impossible=is_impossible_labels,
)
total_loss, mems = outputs
outputs = model(input_ids_1, start_positions=sequence_labels, end_positions=sequence_labels,)
total_loss, mems = outputs
result = {
"loss": total_loss,
"start_top_log_probs": start_top_log_probs,
"start_top_index": start_top_index,
"end_top_log_probs": end_top_log_probs,
"end_top_index": end_top_index,
"cls_logits": cls_logits,
"mems": mems,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(
list(result["start_top_log_probs"].size()), [self.batch_size, model.config.start_n_top],
)
self.parent.assertListEqual(
list(result["start_top_index"].size()), [self.batch_size, model.config.start_n_top],
)
self.parent.assertListEqual(
list(result["end_top_log_probs"].size()),
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
)
self.parent.assertListEqual(
list(result["end_top_index"].size()),
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
)
self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_token_classif(
self,
config,
start_positions=sequence_labels,
end_positions=sequence_labels,
cls_index=sequence_labels,
is_impossible=is_impossible_labels,
p_mask=input_mask,
)
outputs = model(
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetForTokenClassification(config)
model.to(torch_device)
model.eval()
logits, mems_1 = model(input_ids_1)
loss, logits, mems_1 = model(input_ids_1, labels=token_labels)
result = {
"loss": loss,
"mems_1": mems_1,
"logits": logits,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(
list(result["logits"].size()), [self.batch_size, self.seq_length, self.type_sequence_label_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_sequence_classif(
self,
start_positions=sequence_labels,
end_positions=sequence_labels,
cls_index=sequence_labels,
is_impossible=is_impossible_labels,
)
total_loss, mems = outputs
outputs = model(input_ids_1, start_positions=sequence_labels, end_positions=sequence_labels,)
total_loss, mems = outputs
result = {
"loss": total_loss,
"start_top_log_probs": start_top_log_probs,
"start_top_index": start_top_index,
"end_top_log_probs": end_top_log_probs,
"end_top_index": end_top_index,
"cls_logits": cls_logits,
"mems": mems,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(
list(result["start_top_log_probs"].size()), [self.batch_size, model.config.start_n_top],
)
self.parent.assertListEqual(
list(result["start_top_index"].size()), [self.batch_size, model.config.start_n_top],
)
self.parent.assertListEqual(
list(result["end_top_log_probs"].size()),
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
)
self.parent.assertListEqual(
list(result["end_top_index"].size()), [self.batch_size, model.config.start_n_top * model.config.end_n_top],
)
self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_token_classif(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetForTokenClassification(config)
model.to(torch_device)
model.eval()
logits, mems_1 = model(input_ids_1)
loss, logits, mems_1 = model(input_ids_1, labels=token_labels)
result = {
"loss": loss,
"mems_1": mems_1,
"logits": logits,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(
list(result["logits"].size()), [self.batch_size, self.seq_length, self.type_sequence_label_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_sequence_classif(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetForSequenceClassification(config)
model.to(torch_device)
model.eval()
logits, mems_1 = model(input_ids_1)
loss, logits, mems_1 = model(input_ids_1, labels=sequence_labels)
result = {
"loss": loss,
"mems_1": mems_1,
"logits": logits,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(
list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids_1,
input_ids_2,
......@@ -430,50 +443,33 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetForSequenceClassification(config)
model.to(torch_device)
model.eval()
logits, mems_1 = model(input_ids_1)
loss, logits, mems_1 = model(input_ids_1, labels=sequence_labels)
result = {
"loss": loss,
"mems_1": mems_1,
"logits": logits,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(
list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids_1}
return config, inputs_dict
) = config_and_inputs
inputs_dict = {"input_ids": input_ids_1}
return config, inputs_dict
@require_torch
class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
XLNetModel,
XLNetLMHeadModel,
XLNetForTokenClassification,
XLNetForSequenceClassification,
XLNetForQuestionAnswering,
XLNetForMultipleChoice,
)
if is_torch_available()
else ()
)
all_generative_model_classes = (
(XLNetLMHeadModel,) if is_torch_available() else ()
) # TODO (PVP): Check other models whether language generation is also applicable
test_pruning = False
def setUp(self):
self.model_tester = XLNetModelTest.XLNetModelTester(self)
self.model_tester = XLNetModelTester(self)
self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37)
def test_config(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment