Unverified Commit c852036b authored by Amil Khare's avatar Amil Khare Committed by GitHub
Browse files

[cleanup] Hoist ModelTester objects to top level (#4939)


Co-authored-by: default avatarSam Shleifer <sshleifer@gmail.com>
parent 0c55a384
...@@ -35,137 +35,83 @@ if is_tf_available(): ...@@ -35,137 +35,83 @@ if is_tf_available():
) )
@require_tf class TFXLMModelTester:
class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase): def __init__(
self, parent,
all_model_classes = ( ):
(TFXLMModel, TFXLMWithLMHeadModel, TFXLMForSequenceClassification, TFXLMForQuestionAnsweringSimple) self.parent = parent
if is_tf_available() self.batch_size = 13
else () self.seq_length = 7
) self.is_training = True
all_generative_model_classes = ( self.use_input_lengths = True
(TFXLMWithLMHeadModel,) if is_tf_available() else () self.use_token_type_ids = True
) # TODO (PVP): Check other models whether language generation is also applicable self.use_labels = True
self.gelu_activation = True
class TFXLMModelTester(object): self.sinusoidal_embeddings = False
def __init__( self.causal = False
self, self.asm = False
parent, self.n_langs = 2
batch_size=13, self.vocab_size = 99
seq_length=7, self.n_special = 0
is_training=True, self.hidden_size = 32
use_input_lengths=True, self.num_hidden_layers = 5
use_token_type_ids=True, self.num_attention_heads = 4
use_labels=True, self.hidden_dropout_prob = 0.1
gelu_activation=True, self.attention_probs_dropout_prob = 0.1
sinusoidal_embeddings=False, self.max_position_embeddings = 512
causal=False, self.type_vocab_size = 16
asm=False, self.type_sequence_label_size = 2
n_langs=2, self.initializer_range = 0.02
vocab_size=99, self.num_labels = 3
n_special=0, self.num_choices = 4
hidden_size=32, self.summary_type = "last"
num_hidden_layers=5, self.use_proj = True
num_attention_heads=4, self.scope = None
hidden_dropout_prob=0.1, self.bos_token_id = 0
attention_probs_dropout_prob=0.1,
max_position_embeddings=512, def prepare_config_and_inputs(self):
type_vocab_size=16, input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
type_sequence_label_size=2, input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32)
initializer_range=0.02,
num_labels=3, input_lengths = None
num_choices=4, if self.use_input_lengths:
summary_type="last", input_lengths = (
use_proj=True, ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2
scope=None, ) # small variation of seq_length
bos_token_id=0,
): token_type_ids = None
self.parent = parent if self.use_token_type_ids:
self.batch_size = batch_size token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.n_langs)
self.seq_length = seq_length
self.is_training = is_training sequence_labels = None
self.use_input_lengths = use_input_lengths token_labels = None
self.use_token_type_ids = use_token_type_ids is_impossible_labels = None
self.use_labels = use_labels if self.use_labels:
self.gelu_activation = gelu_activation sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
self.sinusoidal_embeddings = sinusoidal_embeddings token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
self.asm = asm is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32)
self.n_langs = n_langs
self.vocab_size = vocab_size config = XLMConfig(
self.n_special = n_special vocab_size=self.vocab_size,
self.summary_type = summary_type n_special=self.n_special,
self.causal = causal emb_dim=self.hidden_size,
self.use_proj = use_proj n_layers=self.num_hidden_layers,
self.hidden_size = hidden_size n_heads=self.num_attention_heads,
self.num_hidden_layers = num_hidden_layers dropout=self.hidden_dropout_prob,
self.num_attention_heads = num_attention_heads attention_dropout=self.attention_probs_dropout_prob,
self.hidden_dropout_prob = hidden_dropout_prob gelu_activation=self.gelu_activation,
self.attention_probs_dropout_prob = attention_probs_dropout_prob sinusoidal_embeddings=self.sinusoidal_embeddings,
self.max_position_embeddings = max_position_embeddings asm=self.asm,
self.n_langs = n_langs causal=self.causal,
self.type_sequence_label_size = type_sequence_label_size n_langs=self.n_langs,
self.initializer_range = initializer_range max_position_embeddings=self.max_position_embeddings,
self.summary_type = summary_type initializer_range=self.initializer_range,
self.num_labels = num_labels summary_type=self.summary_type,
self.num_choices = num_choices use_proj=self.use_proj,
self.scope = scope bos_token_id=self.bos_token_id,
self.bos_token_id = bos_token_id )
def prepare_config_and_inputs(self): return (
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32)
input_lengths = None
if self.use_input_lengths:
input_lengths = (
ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2
) # small variation of seq_length
token_type_ids = None
if self.use_token_type_ids:
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.n_langs)
sequence_labels = None
token_labels = None
is_impossible_labels = None
if self.use_labels:
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32)
config = XLMConfig(
vocab_size=self.vocab_size,
n_special=self.n_special,
emb_dim=self.hidden_size,
n_layers=self.num_hidden_layers,
n_heads=self.num_attention_heads,
dropout=self.hidden_dropout_prob,
attention_dropout=self.attention_probs_dropout_prob,
gelu_activation=self.gelu_activation,
sinusoidal_embeddings=self.sinusoidal_embeddings,
asm=self.asm,
causal=self.causal,
n_langs=self.n_langs,
max_position_embeddings=self.max_position_embeddings,
initializer_range=self.initializer_range,
summary_type=self.summary_type,
use_proj=self.use_proj,
bos_token_id=self.bos_token_id,
)
return (
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
)
def create_and_check_xlm_model(
self,
config, config,
input_ids, input_ids,
token_type_ids, token_type_ids,
...@@ -174,23 +120,108 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -174,23 +120,108 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
token_labels, token_labels,
is_impossible_labels, is_impossible_labels,
input_mask, input_mask,
): )
model = TFXLMModel(config=config)
inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids} def create_and_check_xlm_model(
outputs = model(inputs) self,
config,
inputs = [input_ids, input_mask] input_ids,
outputs = model(inputs) token_type_ids,
sequence_output = outputs[0] input_lengths,
result = { sequence_labels,
"sequence_output": sequence_output.numpy(), token_labels,
} is_impossible_labels,
self.parent.assertListEqual( input_mask,
list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size] ):
) model = TFXLMModel(config=config)
inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids}
def create_and_check_xlm_lm_head( outputs = model(inputs)
self,
inputs = [input_ids, input_mask]
outputs = model(inputs)
sequence_output = outputs[0]
result = {
"sequence_output": sequence_output.numpy(),
}
self.parent.assertListEqual(
list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
)
def create_and_check_xlm_lm_head(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = TFXLMWithLMHeadModel(config)
inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids}
outputs = model(inputs)
logits = outputs[0]
result = {
"logits": logits.numpy(),
}
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size])
def create_and_check_xlm_qa(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = TFXLMForQuestionAnsweringSimple(config)
inputs = {"input_ids": input_ids, "lengths": input_lengths}
start_logits, end_logits = model(inputs)
result = {
"start_logits": start_logits.numpy(),
"end_logits": end_logits.numpy(),
}
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
def create_and_check_xlm_sequence_classif(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = TFXLMForSequenceClassification(config)
inputs = {"input_ids": input_ids, "lengths": input_lengths}
(logits,) = model(inputs)
result = {
"logits": logits.numpy(),
}
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size])
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config, config,
input_ids, input_ids,
token_type_ids, token_type_ids,
...@@ -199,92 +230,30 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -199,92 +230,30 @@ class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
token_labels, token_labels,
is_impossible_labels, is_impossible_labels,
input_mask, input_mask,
): ) = config_and_inputs
model = TFXLMWithLMHeadModel(config) inputs_dict = {
"input_ids": input_ids,
"token_type_ids": token_type_ids,
"langs": token_type_ids,
"lengths": input_lengths,
}
return config, inputs_dict
inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids}
outputs = model(inputs)
logits = outputs[0] @require_tf
class TFXLMModelTest(TFModelTesterMixin, unittest.TestCase):
result = {
"logits": logits.numpy(),
}
self.parent.assertListEqual(
list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size]
)
def create_and_check_xlm_qa(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = TFXLMForQuestionAnsweringSimple(config)
inputs = {"input_ids": input_ids, "lengths": input_lengths}
start_logits, end_logits = model(inputs)
result = {
"start_logits": start_logits.numpy(),
"end_logits": end_logits.numpy(),
}
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
def create_and_check_xlm_sequence_classif( all_model_classes = (
self, (TFXLMModel, TFXLMWithLMHeadModel, TFXLMForSequenceClassification, TFXLMForQuestionAnsweringSimple)
config, if is_tf_available()
input_ids, else ()
token_type_ids, )
input_lengths, all_generative_model_classes = (
sequence_labels, (TFXLMWithLMHeadModel,) if is_tf_available() else ()
token_labels, ) # TODO (PVP): Check other models whether language generation is also applicable
is_impossible_labels,
input_mask,
):
model = TFXLMForSequenceClassification(config)
inputs = {"input_ids": input_ids, "lengths": input_lengths}
(logits,) = model(inputs)
result = {
"logits": logits.numpy(),
}
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size])
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
) = config_and_inputs
inputs_dict = {
"input_ids": input_ids,
"token_type_ids": token_type_ids,
"langs": token_type_ids,
"lengths": input_lengths,
}
return config, inputs_dict
def setUp(self): def setUp(self):
self.model_tester = TFXLMModelTest.TFXLMModelTester(self) self.model_tester = TFXLMModelTester(self)
self.config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37) self.config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37)
def test_config(self): def test_config(self):
......
...@@ -37,142 +37,80 @@ if is_tf_available(): ...@@ -37,142 +37,80 @@ if is_tf_available():
) )
@require_tf class TFXLNetModelTester:
class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase): def __init__(
self, parent,
all_model_classes = ( ):
( self.parent = parent
TFXLNetModel, self.batch_size = 13
TFXLNetLMHeadModel, self.seq_length = 7
TFXLNetForSequenceClassification, self.mem_len = 10
TFXLNetForTokenClassification, # self.key_len = seq_length + mem_len
TFXLNetForQuestionAnsweringSimple, self.clamp_len = -1
self.reuse_len = 15
self.is_training = True
self.use_labels = True
self.vocab_size = 99
self.cutoffs = [10, 50, 80]
self.hidden_size = 32
self.num_attention_heads = 4
self.d_inner = 128
self.num_hidden_layers = 5
self.type_sequence_label_size = 2
self.untie_r = True
self.bi_data = False
self.same_length = False
self.initializer_range = 0.05
self.seed = 1
self.type_vocab_size = 2
self.bos_token_id = 1
self.eos_token_id = 2
self.pad_token_id = 5
def prepare_config_and_inputs(self):
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32)
input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size)
perm_mask = tf.zeros((self.batch_size, self.seq_length + 1, self.seq_length), dtype=tf.float32)
perm_mask_last = tf.ones((self.batch_size, self.seq_length + 1, 1), dtype=tf.float32)
perm_mask = tf.concat([perm_mask, perm_mask_last], axis=-1)
# perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = tf.zeros((self.batch_size, 1, self.seq_length), dtype=tf.float32)
target_mapping_last = tf.ones((self.batch_size, 1, 1), dtype=tf.float32)
target_mapping = tf.concat([target_mapping, target_mapping_last], axis=-1)
# target_mapping[:, 0, -1] = 1.0 # predict last token
sequence_labels = None
lm_labels = None
is_impossible_labels = None
if self.use_labels:
lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32)
config = XLNetConfig(
vocab_size=self.vocab_size,
d_model=self.hidden_size,
n_head=self.num_attention_heads,
d_inner=self.d_inner,
n_layer=self.num_hidden_layers,
untie_r=self.untie_r,
mem_len=self.mem_len,
clamp_len=self.clamp_len,
same_length=self.same_length,
reuse_len=self.reuse_len,
bi_data=self.bi_data,
initializer_range=self.initializer_range,
num_labels=self.type_sequence_label_size,
bos_token_id=self.bos_token_id,
pad_token_id=self.pad_token_id,
eos_token_id=self.eos_token_id,
) )
if is_tf_available()
else ()
)
all_generative_model_classes = (
(TFXLNetLMHeadModel,) if is_tf_available() else ()
) # TODO (PVP): Check other models whether language generation is also applicable
test_pruning = False
class TFXLNetModelTester(object): return (
def __init__(
self,
parent,
batch_size=13,
seq_length=7,
mem_len=10,
clamp_len=-1,
reuse_len=15,
is_training=True,
use_labels=True,
vocab_size=99,
cutoffs=[10, 50, 80],
hidden_size=32,
num_attention_heads=4,
d_inner=128,
num_hidden_layers=5,
type_sequence_label_size=2,
untie_r=True,
bi_data=False,
same_length=False,
initializer_range=0.05,
seed=1,
type_vocab_size=2,
bos_token_id=1,
eos_token_id=2,
pad_token_id=5,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.mem_len = mem_len
# self.key_len = seq_length + mem_len
self.clamp_len = clamp_len
self.reuse_len = reuse_len
self.is_training = is_training
self.use_labels = use_labels
self.vocab_size = vocab_size
self.cutoffs = cutoffs
self.hidden_size = hidden_size
self.num_attention_heads = num_attention_heads
self.d_inner = d_inner
self.num_hidden_layers = num_hidden_layers
self.bi_data = bi_data
self.untie_r = untie_r
self.same_length = same_length
self.initializer_range = initializer_range
self.seed = seed
self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = type_sequence_label_size
self.bos_token_id = bos_token_id
self.pad_token_id = pad_token_id
self.eos_token_id = eos_token_id
def prepare_config_and_inputs(self):
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
input_mask = ids_tensor([self.batch_size, self.seq_length], 2, dtype=tf.float32)
input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size)
perm_mask = tf.zeros((self.batch_size, self.seq_length + 1, self.seq_length), dtype=tf.float32)
perm_mask_last = tf.ones((self.batch_size, self.seq_length + 1, 1), dtype=tf.float32)
perm_mask = tf.concat([perm_mask, perm_mask_last], axis=-1)
# perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = tf.zeros((self.batch_size, 1, self.seq_length), dtype=tf.float32)
target_mapping_last = tf.ones((self.batch_size, 1, 1), dtype=tf.float32)
target_mapping = tf.concat([target_mapping, target_mapping_last], axis=-1)
# target_mapping[:, 0, -1] = 1.0 # predict last token
sequence_labels = None
lm_labels = None
is_impossible_labels = None
if self.use_labels:
lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32)
config = XLNetConfig(
vocab_size=self.vocab_size,
d_model=self.hidden_size,
n_head=self.num_attention_heads,
d_inner=self.d_inner,
n_layer=self.num_hidden_layers,
untie_r=self.untie_r,
mem_len=self.mem_len,
clamp_len=self.clamp_len,
same_length=self.same_length,
reuse_len=self.reuse_len,
bi_data=self.bi_data,
initializer_range=self.initializer_range,
num_labels=self.type_sequence_label_size,
bos_token_id=self.bos_token_id,
pad_token_id=self.pad_token_id,
eos_token_id=self.eos_token_id,
)
return (
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
)
def set_seed(self):
random.seed(self.seed)
tf.random.set_seed(self.seed)
def create_and_check_xlnet_base_model(
self,
config, config,
input_ids_1, input_ids_1,
input_ids_2, input_ids_2,
...@@ -184,120 +122,203 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -184,120 +122,203 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
lm_labels, lm_labels,
sequence_labels, sequence_labels,
is_impossible_labels, is_impossible_labels,
): )
model = TFXLNetModel(config)
inputs = {"input_ids": input_ids_1, "input_mask": input_mask, "token_type_ids": segment_ids}
_, _ = model(inputs)
inputs = [input_ids_1, input_mask]
outputs, mems_1 = model(inputs) def set_seed(self):
random.seed(self.seed)
tf.random.set_seed(self.seed)
def create_and_check_xlnet_base_model(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetModel(config)
inputs = {"input_ids": input_ids_1, "input_mask": input_mask, "token_type_ids": segment_ids}
_, _ = model(inputs)
inputs = [input_ids_1, input_mask]
outputs, mems_1 = model(inputs)
result = {
"mems_1": [mem.numpy() for mem in mems_1],
"outputs": outputs.numpy(),
}
config.mem_len = 0
model = TFXLNetModel(config)
no_mems_outputs = model(inputs)
self.parent.assertEqual(len(no_mems_outputs), 1)
self.parent.assertListEqual(
list(result["outputs"].shape), [self.batch_size, self.seq_length, self.hidden_size]
)
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
result = { def create_and_check_xlnet_lm_head(
"mems_1": [mem.numpy() for mem in mems_1], self,
"outputs": outputs.numpy(), config,
} input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetLMHeadModel(config)
config.mem_len = 0 inputs_1 = {"input_ids": input_ids_1, "token_type_ids": segment_ids}
model = TFXLNetModel(config)
no_mems_outputs = model(inputs)
self.parent.assertEqual(len(no_mems_outputs), 1)
self.parent.assertListEqual( all_logits_1, mems_1 = model(inputs_1)
list(result["outputs"].shape), [self.batch_size, self.seq_length, self.hidden_size]
)
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_lm_head( inputs_2 = {"input_ids": input_ids_2, "mems": mems_1, "token_type_ids": segment_ids}
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetLMHeadModel(config)
inputs_1 = {"input_ids": input_ids_1, "token_type_ids": segment_ids} all_logits_2, mems_2 = model(inputs_2)
all_logits_1, mems_1 = model(inputs_1) inputs_3 = {"input_ids": input_ids_q, "perm_mask": perm_mask, "target_mapping": target_mapping}
inputs_2 = {"input_ids": input_ids_2, "mems": mems_1, "token_type_ids": segment_ids} logits, _ = model(inputs_3)
all_logits_2, mems_2 = model(inputs_2) result = {
"mems_1": [mem.numpy() for mem in mems_1],
"all_logits_1": all_logits_1.numpy(),
"mems_2": [mem.numpy() for mem in mems_2],
"all_logits_2": all_logits_2.numpy(),
}
inputs_3 = {"input_ids": input_ids_q, "perm_mask": perm_mask, "target_mapping": target_mapping} self.parent.assertListEqual(
list(result["all_logits_1"].shape), [self.batch_size, self.seq_length, self.vocab_size]
)
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
logits, _ = model(inputs_3) self.parent.assertListEqual(
list(result["all_logits_2"].shape), [self.batch_size, self.seq_length, self.vocab_size]
)
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
result = { def create_and_check_xlnet_qa(
"mems_1": [mem.numpy() for mem in mems_1], self,
"all_logits_1": all_logits_1.numpy(), config,
"mems_2": [mem.numpy() for mem in mems_2], input_ids_1,
"all_logits_2": all_logits_2.numpy(), input_ids_2,
} input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetForQuestionAnsweringSimple(config)
inputs = {"input_ids": input_ids_1, "attention_mask": input_mask, "token_type_ids": segment_ids}
start_logits, end_logits, mems = model(inputs)
result = {
"start_logits": start_logits.numpy(),
"end_logits": end_logits.numpy(),
"mems": [m.numpy() for m in mems],
}
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual( def create_and_check_xlnet_sequence_classif(
list(result["all_logits_1"].shape), [self.batch_size, self.seq_length, self.vocab_size] self,
) config,
self.parent.assertListEqual( input_ids_1,
list(list(mem.shape) for mem in result["mems_1"]), input_ids_2,
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, input_ids_q,
) perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetForSequenceClassification(config)
logits, mems_1 = model(input_ids_1)
result = {
"mems_1": [mem.numpy() for mem in mems_1],
"logits": logits.numpy(),
}
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size])
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual( def create_and_check_xlnet_for_token_classification(
list(result["all_logits_2"].shape), [self.batch_size, self.seq_length, self.vocab_size] self,
) config,
self.parent.assertListEqual( input_ids_1,
list(list(mem.shape) for mem in result["mems_2"]), input_ids_2,
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, input_ids_q,
) perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
config.num_labels = input_ids_1.shape[1]
model = TFXLNetForTokenClassification(config)
inputs = {
"input_ids": input_ids_1,
"attention_mask": input_mask,
# 'token_type_ids': token_type_ids
}
logits, mems_1 = model(inputs)
result = {
"mems_1": [mem.numpy() for mem in mems_1],
"logits": logits.numpy(),
}
self.parent.assertListEqual(
list(result["logits"].shape), [self.batch_size, self.seq_length, config.num_labels]
)
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_qa( def prepare_config_and_inputs_for_common(self):
self, config_and_inputs = self.prepare_config_and_inputs()
config, (
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetForQuestionAnsweringSimple(config)
inputs = {"input_ids": input_ids_1, "attention_mask": input_mask, "token_type_ids": segment_ids}
start_logits, end_logits, mems = model(inputs)
result = {
"start_logits": start_logits.numpy(),
"end_logits": end_logits.numpy(),
"mems": [m.numpy() for m in mems],
}
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_sequence_classif(
self,
config, config,
input_ids_1, input_ids_1,
input_ids_2, input_ids_2,
...@@ -309,76 +330,32 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -309,76 +330,32 @@ class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
lm_labels, lm_labels,
sequence_labels, sequence_labels,
is_impossible_labels, is_impossible_labels,
): ) = config_and_inputs
model = TFXLNetForSequenceClassification(config) inputs_dict = {"input_ids": input_ids_1}
return config, inputs_dict
logits, mems_1 = model(input_ids_1)
result = {
"mems_1": [mem.numpy() for mem in mems_1],
"logits": logits.numpy(),
}
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size]) @require_tf
self.parent.assertListEqual( class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):
list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_for_token_classification( all_model_classes = (
self, (
config, TFXLNetModel,
input_ids_1, TFXLNetLMHeadModel,
input_ids_2, TFXLNetForSequenceClassification,
input_ids_q, TFXLNetForTokenClassification,
perm_mask, TFXLNetForQuestionAnsweringSimple,
input_mask, )
target_mapping, if is_tf_available()
segment_ids, else ()
lm_labels, )
sequence_labels, all_generative_model_classes = (
is_impossible_labels, (TFXLNetLMHeadModel,) if is_tf_available() else ()
): ) # TODO (PVP): Check other models whether language generation is also applicable
config.num_labels = input_ids_1.shape[1] test_pruning = False
model = TFXLNetForTokenClassification(config)
inputs = {
"input_ids": input_ids_1,
"attention_mask": input_mask,
# 'token_type_ids': token_type_ids
}
logits, mems_1 = model(inputs)
result = {
"mems_1": [mem.numpy() for mem in mems_1],
"logits": logits.numpy(),
}
self.parent.assertListEqual(
list(result["logits"].shape), [self.batch_size, self.seq_length, config.num_labels]
)
self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids_1}
return config, inputs_dict
def setUp(self): def setUp(self):
self.model_tester = TFXLNetModelTest.TFXLNetModelTester(self) self.model_tester = TFXLNetModelTester(self)
self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37) self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37)
def test_config(self): def test_config(self):
......
...@@ -29,6 +29,137 @@ if is_torch_available(): ...@@ -29,6 +29,137 @@ if is_torch_available():
from transformers.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST from transformers.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST
class TransfoXLModelTester:
def __init__(
self, parent,
):
self.parent = parent
self.batch_size = 14
self.seq_length = 7
self.mem_len = 30
self.key_length = self.seq_length + self.mem_len
self.clamp_len = 15
self.is_training = True
self.use_labels = True
self.vocab_size = 99
self.cutoffs = [10, 50, 80]
self.hidden_size = 32
self.d_embed = 32
self.num_attention_heads = 4
self.d_head = 8
self.d_inner = 128
self.div_val = 2
self.num_hidden_layers = 5
self.scope = None
self.seed = 1
self.eos_token_id = 0
def prepare_config_and_inputs(self):
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
lm_labels = None
if self.use_labels:
lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
config = TransfoXLConfig(
vocab_size=self.vocab_size,
mem_len=self.mem_len,
clamp_len=self.clamp_len,
cutoffs=self.cutoffs,
d_model=self.hidden_size,
d_embed=self.d_embed,
n_head=self.num_attention_heads,
d_head=self.d_head,
d_inner=self.d_inner,
div_val=self.div_val,
n_layer=self.num_hidden_layers,
eos_token_id=self.eos_token_id,
)
return (config, input_ids_1, input_ids_2, lm_labels)
def set_seed(self):
random.seed(self.seed)
torch.manual_seed(self.seed)
def create_transfo_xl_model(self, config, input_ids_1, input_ids_2, lm_labels):
model = TransfoXLModel(config)
model.to(torch_device)
model.eval()
hidden_states_1, mems_1 = model(input_ids_1)
hidden_states_2, mems_2 = model(input_ids_2, mems_1)
outputs = {
"hidden_states_1": hidden_states_1,
"mems_1": mems_1,
"hidden_states_2": hidden_states_2,
"mems_2": mems_2,
}
return outputs
def check_transfo_xl_model_output(self, result):
self.parent.assertListEqual(
list(result["hidden_states_1"].size()), [self.batch_size, self.seq_length, self.hidden_size],
)
self.parent.assertListEqual(
list(result["hidden_states_2"].size()), [self.batch_size, self.seq_length, self.hidden_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_transfo_xl_lm_head(self, config, input_ids_1, input_ids_2, lm_labels):
model = TransfoXLLMHeadModel(config)
model.to(torch_device)
model.eval()
lm_logits_1, mems_1 = model(input_ids_1)
loss_1, _, mems_1 = model(input_ids_1, labels=lm_labels)
lm_logits_2, mems_2 = model(input_ids_2, mems=mems_1)
loss_2, _, mems_2 = model(input_ids_2, labels=lm_labels, mems=mems_1)
outputs = {
"loss_1": loss_1,
"mems_1": mems_1,
"lm_logits_1": lm_logits_1,
"loss_2": loss_2,
"mems_2": mems_2,
"lm_logits_2": lm_logits_2,
}
return outputs
def check_transfo_xl_lm_head_output(self, result):
self.parent.assertListEqual(list(result["loss_1"].size()), [self.batch_size, self.seq_length - 1])
self.parent.assertListEqual(
list(result["lm_logits_1"].size()), [self.batch_size, self.seq_length, self.vocab_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual(list(result["loss_2"].size()), [self.batch_size, self.seq_length - 1])
self.parent.assertListEqual(
list(result["lm_logits_2"].size()), [self.batch_size, self.seq_length, self.vocab_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids_1, input_ids_2, lm_labels) = config_and_inputs
inputs_dict = {"input_ids": input_ids_1}
return config, inputs_dict
@require_torch @require_torch
class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase): class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
...@@ -38,155 +169,6 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -38,155 +169,6 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
test_torchscript = False test_torchscript = False
test_resize_embeddings = True test_resize_embeddings = True
class TransfoXLModelTester(object):
def __init__(
self,
parent,
batch_size=14,
seq_length=7,
mem_len=30,
clamp_len=15,
is_training=True,
use_labels=True,
vocab_size=99,
cutoffs=[10, 50, 80],
hidden_size=32,
d_embed=32,
num_attention_heads=4,
d_head=8,
d_inner=128,
div_val=2,
num_hidden_layers=5,
scope=None,
seed=1,
eos_token_id=0,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.mem_len = mem_len
self.key_length = seq_length + mem_len
self.clamp_len = clamp_len
self.is_training = is_training
self.use_labels = use_labels
self.vocab_size = vocab_size
self.cutoffs = cutoffs
self.hidden_size = hidden_size
self.d_embed = d_embed
self.num_attention_heads = num_attention_heads
self.d_head = d_head
self.d_inner = d_inner
self.div_val = div_val
self.num_hidden_layers = num_hidden_layers
self.scope = scope
self.seed = seed
self.eos_token_id = eos_token_id
def prepare_config_and_inputs(self):
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
lm_labels = None
if self.use_labels:
lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
config = TransfoXLConfig(
vocab_size=self.vocab_size,
mem_len=self.mem_len,
clamp_len=self.clamp_len,
cutoffs=self.cutoffs,
d_model=self.hidden_size,
d_embed=self.d_embed,
n_head=self.num_attention_heads,
d_head=self.d_head,
d_inner=self.d_inner,
div_val=self.div_val,
n_layer=self.num_hidden_layers,
eos_token_id=self.eos_token_id,
)
return (config, input_ids_1, input_ids_2, lm_labels)
def set_seed(self):
random.seed(self.seed)
torch.manual_seed(self.seed)
def create_transfo_xl_model(self, config, input_ids_1, input_ids_2, lm_labels):
model = TransfoXLModel(config)
model.to(torch_device)
model.eval()
hidden_states_1, mems_1 = model(input_ids_1)
hidden_states_2, mems_2 = model(input_ids_2, mems_1)
outputs = {
"hidden_states_1": hidden_states_1,
"mems_1": mems_1,
"hidden_states_2": hidden_states_2,
"mems_2": mems_2,
}
return outputs
def check_transfo_xl_model_output(self, result):
self.parent.assertListEqual(
list(result["hidden_states_1"].size()), [self.batch_size, self.seq_length, self.hidden_size],
)
self.parent.assertListEqual(
list(result["hidden_states_2"].size()), [self.batch_size, self.seq_length, self.hidden_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_transfo_xl_lm_head(self, config, input_ids_1, input_ids_2, lm_labels):
model = TransfoXLLMHeadModel(config)
model.to(torch_device)
model.eval()
lm_logits_1, mems_1 = model(input_ids_1)
loss_1, _, mems_1 = model(input_ids_1, labels=lm_labels)
lm_logits_2, mems_2 = model(input_ids_2, mems=mems_1)
loss_2, _, mems_2 = model(input_ids_2, labels=lm_labels, mems=mems_1)
outputs = {
"loss_1": loss_1,
"mems_1": mems_1,
"lm_logits_1": lm_logits_1,
"loss_2": loss_2,
"mems_2": mems_2,
"lm_logits_2": lm_logits_2,
}
return outputs
def check_transfo_xl_lm_head_output(self, result):
self.parent.assertListEqual(list(result["loss_1"].size()), [self.batch_size, self.seq_length - 1])
self.parent.assertListEqual(
list(result["lm_logits_1"].size()), [self.batch_size, self.seq_length, self.vocab_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual(list(result["loss_2"].size()), [self.batch_size, self.seq_length - 1])
self.parent.assertListEqual(
list(result["lm_logits_2"].size()), [self.batch_size, self.seq_length, self.vocab_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids_1, input_ids_2, lm_labels) = config_and_inputs
inputs_dict = {"input_ids": input_ids_1}
return config, inputs_dict
def check_cutoffs_and_n_token( def check_cutoffs_and_n_token(
self, copied_cutoffs, layer, model_embed, model, model_class, resized_value, vocab_size self, copied_cutoffs, layer, model_embed, model, model_class, resized_value, vocab_size
): ):
...@@ -210,7 +192,7 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -210,7 +192,7 @@ class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):
self.assertEqual(model.crit.n_token, vocab_size + resized_value) self.assertEqual(model.crit.n_token, vocab_size + resized_value)
def setUp(self): def setUp(self):
self.model_tester = TransfoXLModelTest.TransfoXLModelTester(self) self.model_tester = TransfoXLModelTester(self)
self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37) self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37)
def test_config(self): def test_config(self):
......
...@@ -37,146 +37,82 @@ if is_torch_available(): ...@@ -37,146 +37,82 @@ if is_torch_available():
from transformers.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_LIST from transformers.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_LIST
@require_torch class XLMModelTester:
class XLMModelTest(ModelTesterMixin, unittest.TestCase): def __init__(
self, parent,
all_model_classes = ( ):
( self.parent = parent
XLMModel, self.batch_size = 13
XLMWithLMHeadModel, self.seq_length = 7
XLMForQuestionAnswering, self.is_training = True
XLMForSequenceClassification, self.use_input_lengths = True
XLMForQuestionAnsweringSimple, self.use_token_type_ids = True
self.use_labels = True
self.gelu_activation = True
self.sinusoidal_embeddings = False
self.causal = False
self.asm = False
self.n_langs = 2
self.vocab_size = 99
self.n_special = 0
self.hidden_size = 32
self.num_hidden_layers = 5
self.num_attention_heads = 4
self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = 0.1
self.max_position_embeddings = 512
self.type_sequence_label_size = 2
self.initializer_range = 0.02
self.num_labels = 3
self.num_choices = 4
self.summary_type = "last"
self.use_proj = True
self.scope = None
self.bos_token_id = 0
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float()
input_lengths = None
if self.use_input_lengths:
input_lengths = (
ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2
) # small variation of seq_length
token_type_ids = None
if self.use_token_type_ids:
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.n_langs)
sequence_labels = None
token_labels = None
is_impossible_labels = None
if self.use_labels:
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
is_impossible_labels = ids_tensor([self.batch_size], 2).float()
config = XLMConfig(
vocab_size=self.vocab_size,
n_special=self.n_special,
emb_dim=self.hidden_size,
n_layers=self.num_hidden_layers,
n_heads=self.num_attention_heads,
dropout=self.hidden_dropout_prob,
attention_dropout=self.attention_probs_dropout_prob,
gelu_activation=self.gelu_activation,
sinusoidal_embeddings=self.sinusoidal_embeddings,
asm=self.asm,
causal=self.causal,
n_langs=self.n_langs,
max_position_embeddings=self.max_position_embeddings,
initializer_range=self.initializer_range,
summary_type=self.summary_type,
use_proj=self.use_proj,
bos_token_id=self.bos_token_id,
) )
if is_torch_available()
else ()
)
all_generative_model_classes = (
(XLMWithLMHeadModel,) if is_torch_available() else ()
) # TODO (PVP): Check other models whether language generation is also applicable
class XLMModelTester(object): return (
def __init__(
self,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_lengths=True,
use_token_type_ids=True,
use_labels=True,
gelu_activation=True,
sinusoidal_embeddings=False,
causal=False,
asm=False,
n_langs=2,
vocab_size=99,
n_special=0,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
summary_type="last",
use_proj=True,
scope=None,
bos_token_id=0,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
self.use_input_lengths = use_input_lengths
self.use_token_type_ids = use_token_type_ids
self.use_labels = use_labels
self.gelu_activation = gelu_activation
self.sinusoidal_embeddings = sinusoidal_embeddings
self.asm = asm
self.n_langs = n_langs
self.vocab_size = vocab_size
self.n_special = n_special
self.summary_type = summary_type
self.causal = causal
self.use_proj = use_proj
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = max_position_embeddings
self.n_langs = n_langs
self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = initializer_range
self.summary_type = summary_type
self.num_labels = num_labels
self.num_choices = num_choices
self.scope = scope
self.bos_token_id = bos_token_id
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float()
input_lengths = None
if self.use_input_lengths:
input_lengths = (
ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2
) # small variation of seq_length
token_type_ids = None
if self.use_token_type_ids:
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.n_langs)
sequence_labels = None
token_labels = None
is_impossible_labels = None
if self.use_labels:
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
is_impossible_labels = ids_tensor([self.batch_size], 2).float()
config = XLMConfig(
vocab_size=self.vocab_size,
n_special=self.n_special,
emb_dim=self.hidden_size,
n_layers=self.num_hidden_layers,
n_heads=self.num_attention_heads,
dropout=self.hidden_dropout_prob,
attention_dropout=self.attention_probs_dropout_prob,
gelu_activation=self.gelu_activation,
sinusoidal_embeddings=self.sinusoidal_embeddings,
asm=self.asm,
causal=self.causal,
n_langs=self.n_langs,
max_position_embeddings=self.max_position_embeddings,
initializer_range=self.initializer_range,
summary_type=self.summary_type,
use_proj=self.use_proj,
bos_token_id=self.bos_token_id,
)
return (
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
)
def check_loss_output(self, result):
self.parent.assertListEqual(list(result["loss"].size()), [])
def create_and_check_xlm_model(
self,
config, config,
input_ids, input_ids,
token_type_ids, token_type_ids,
...@@ -185,174 +121,209 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -185,174 +121,209 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
token_labels, token_labels,
is_impossible_labels, is_impossible_labels,
input_mask, input_mask,
): )
model = XLMModel(config=config)
model.to(torch_device)
model.eval()
outputs = model(input_ids, lengths=input_lengths, langs=token_type_ids)
outputs = model(input_ids, langs=token_type_ids)
outputs = model(input_ids)
sequence_output = outputs[0]
result = {
"sequence_output": sequence_output,
}
self.parent.assertListEqual(
list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
)
def create_and_check_xlm_lm_head(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMWithLMHeadModel(config)
model.to(torch_device)
model.eval()
loss, logits = model(input_ids, token_type_ids=token_type_ids, labels=token_labels) def check_loss_output(self, result):
self.parent.assertListEqual(list(result["loss"].size()), [])
def create_and_check_xlm_model(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMModel(config=config)
model.to(torch_device)
model.eval()
outputs = model(input_ids, lengths=input_lengths, langs=token_type_ids)
outputs = model(input_ids, langs=token_type_ids)
outputs = model(input_ids)
sequence_output = outputs[0]
result = {
"sequence_output": sequence_output,
}
self.parent.assertListEqual(
list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
)
result = { def create_and_check_xlm_lm_head(
"loss": loss, self,
"logits": logits, config,
} input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMWithLMHeadModel(config)
model.to(torch_device)
model.eval()
loss, logits = model(input_ids, token_type_ids=token_type_ids, labels=token_labels)
result = {
"loss": loss,
"logits": logits,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.vocab_size])
def create_and_check_xlm_simple_qa(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMForQuestionAnsweringSimple(config)
model.to(torch_device)
model.eval()
outputs = model(input_ids)
outputs = model(input_ids, start_positions=sequence_labels, end_positions=sequence_labels)
loss, start_logits, end_logits = outputs
result = {
"loss": loss,
"start_logits": start_logits,
"end_logits": end_logits,
}
self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length])
self.check_loss_output(result)
def create_and_check_xlm_qa(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMForQuestionAnswering(config)
model.to(torch_device)
model.eval()
self.parent.assertListEqual(list(result["loss"].size()), []) outputs = model(input_ids)
self.parent.assertListEqual( start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits = outputs
list(result["logits"].size()), [self.batch_size, self.seq_length, self.vocab_size]
)
def create_and_check_xlm_simple_qa( outputs = model(
self,
config,
input_ids, input_ids,
token_type_ids, start_positions=sequence_labels,
input_lengths, end_positions=sequence_labels,
sequence_labels, cls_index=sequence_labels,
token_labels, is_impossible=is_impossible_labels,
is_impossible_labels, p_mask=input_mask,
input_mask, )
):
model = XLMForQuestionAnsweringSimple(config) outputs = model(
model.to(torch_device)
model.eval()
outputs = model(input_ids)
outputs = model(input_ids, start_positions=sequence_labels, end_positions=sequence_labels)
loss, start_logits, end_logits = outputs
result = {
"loss": loss,
"start_logits": start_logits,
"end_logits": end_logits,
}
self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length])
self.check_loss_output(result)
def create_and_check_xlm_qa(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMForQuestionAnswering(config)
model.to(torch_device)
model.eval()
outputs = model(input_ids)
start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits = outputs
outputs = model(
input_ids,
start_positions=sequence_labels,
end_positions=sequence_labels,
cls_index=sequence_labels,
is_impossible=is_impossible_labels,
p_mask=input_mask,
)
outputs = model(
input_ids,
start_positions=sequence_labels,
end_positions=sequence_labels,
cls_index=sequence_labels,
is_impossible=is_impossible_labels,
)
(total_loss,) = outputs
outputs = model(input_ids, start_positions=sequence_labels, end_positions=sequence_labels)
(total_loss,) = outputs
result = {
"loss": total_loss,
"start_top_log_probs": start_top_log_probs,
"start_top_index": start_top_index,
"end_top_log_probs": end_top_log_probs,
"end_top_index": end_top_index,
"cls_logits": cls_logits,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(
list(result["start_top_log_probs"].size()), [self.batch_size, model.config.start_n_top]
)
self.parent.assertListEqual(
list(result["start_top_index"].size()), [self.batch_size, model.config.start_n_top]
)
self.parent.assertListEqual(
list(result["end_top_log_probs"].size()),
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
)
self.parent.assertListEqual(
list(result["end_top_index"].size()),
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
)
self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
def create_and_check_xlm_sequence_classif(
self,
config,
input_ids, input_ids,
token_type_ids, start_positions=sequence_labels,
input_lengths, end_positions=sequence_labels,
sequence_labels, cls_index=sequence_labels,
token_labels, is_impossible=is_impossible_labels,
is_impossible_labels, )
input_mask,
): (total_loss,) = outputs
model = XLMForSequenceClassification(config)
model.to(torch_device) outputs = model(input_ids, start_positions=sequence_labels, end_positions=sequence_labels)
model.eval()
(total_loss,) = outputs
(logits,) = model(input_ids)
loss, logits = model(input_ids, labels=sequence_labels) result = {
"loss": total_loss,
result = { "start_top_log_probs": start_top_log_probs,
"loss": loss, "start_top_index": start_top_index,
"logits": logits, "end_top_log_probs": end_top_log_probs,
} "end_top_index": end_top_index,
"cls_logits": cls_logits,
self.parent.assertListEqual(list(result["loss"].size()), []) }
self.parent.assertListEqual(
list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size] self.parent.assertListEqual(list(result["loss"].size()), [])
) self.parent.assertListEqual(
list(result["start_top_log_probs"].size()), [self.batch_size, model.config.start_n_top]
def create_and_check_xlm_for_token_classification( )
self, self.parent.assertListEqual(
list(result["start_top_index"].size()), [self.batch_size, model.config.start_n_top]
)
self.parent.assertListEqual(
list(result["end_top_log_probs"].size()),
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
)
self.parent.assertListEqual(
list(result["end_top_index"].size()), [self.batch_size, model.config.start_n_top * model.config.end_n_top],
)
self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
def create_and_check_xlm_sequence_classif(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = XLMForSequenceClassification(config)
model.to(torch_device)
model.eval()
(logits,) = model(input_ids)
loss, logits = model(input_ids, labels=sequence_labels)
result = {
"loss": loss,
"logits": logits,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size])
def create_and_check_xlm_for_token_classification(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
config.num_labels = self.num_labels
model = XLMForTokenClassification(config)
model.to(torch_device)
model.eval()
loss, logits = model(input_ids, attention_mask=input_mask, labels=token_labels)
result = {
"loss": loss,
"logits": logits,
}
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels])
self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config, config,
input_ids, input_ids,
token_type_ids, token_type_ids,
...@@ -361,39 +332,31 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -361,39 +332,31 @@ class XLMModelTest(ModelTesterMixin, unittest.TestCase):
token_labels, token_labels,
is_impossible_labels, is_impossible_labels,
input_mask, input_mask,
): ) = config_and_inputs
config.num_labels = self.num_labels inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "lengths": input_lengths}
model = XLMForTokenClassification(config) return config, inputs_dict
model.to(torch_device)
model.eval()
@require_torch
loss, logits = model(input_ids, attention_mask=input_mask, labels=token_labels) class XLMModelTest(ModelTesterMixin, unittest.TestCase):
result = {
"loss": loss, all_model_classes = (
"logits": logits, (
} XLMModel,
self.parent.assertListEqual( XLMWithLMHeadModel,
list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels] XLMForQuestionAnswering,
) XLMForSequenceClassification,
self.check_loss_output(result) XLMForQuestionAnsweringSimple,
)
def prepare_config_and_inputs_for_common(self): if is_torch_available()
config_and_inputs = self.prepare_config_and_inputs() else ()
( )
config, all_generative_model_classes = (
input_ids, (XLMWithLMHeadModel,) if is_torch_available() else ()
token_type_ids, ) # TODO (PVP): Check other models whether language generation is also applicable
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "lengths": input_lengths}
return config, inputs_dict
def setUp(self): def setUp(self):
self.model_tester = XLMModelTest.XLMModelTester(self) self.model_tester = XLMModelTester(self)
self.config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37) self.config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37)
def test_config(self): def test_config(self):
......
...@@ -39,148 +39,106 @@ if is_torch_available(): ...@@ -39,148 +39,106 @@ if is_torch_available():
from transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_LIST from transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_LIST
@require_torch class XLNetModelTester:
class XLNetModelTest(ModelTesterMixin, unittest.TestCase): def __init__(
self,
all_model_classes = ( parent,
( batch_size=14,
XLNetModel, seq_length=7,
XLNetLMHeadModel, mem_len=10,
XLNetForTokenClassification, clamp_len=-1,
XLNetForSequenceClassification, reuse_len=15,
XLNetForQuestionAnswering, is_training=True,
XLNetForMultipleChoice, use_labels=True,
vocab_size=99,
cutoffs=[10, 50, 80],
hidden_size=32,
num_attention_heads=4,
d_inner=128,
num_hidden_layers=5,
type_sequence_label_size=2,
untie_r=True,
bi_data=False,
same_length=False,
initializer_range=0.05,
seed=1,
type_vocab_size=2,
bos_token_id=1,
eos_token_id=2,
pad_token_id=5,
num_choices=4,
):
self.parent = parent
self.batch_size = 14
self.seq_length = 7
self.mem_len = 10
# self.key_len = seq_length + mem_len
self.clamp_len = -1
self.reuse_len = 15
self.is_training = True
self.use_labels = True
self.vocab_size = 99
self.cutoffs = [10, 50, 80]
self.hidden_size = 32
self.num_attention_heads = 4
self.d_inner = 128
self.num_hidden_layers = 5
self.type_sequence_label_size = 2
self.untie_r = True
self.bi_data = False
self.same_length = False
self.initializer_range = 0.05
self.seed = 1
self.type_vocab_size = 2
self.bos_token_id = 1
self.eos_token_id = 2
self.pad_token_id = 5
self.num_choices = 4
def prepare_config_and_inputs(self):
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float()
input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size)
perm_mask = torch.zeros(
self.batch_size, self.seq_length + 1, self.seq_length + 1, dtype=torch.float, device=torch_device,
)
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = torch.zeros(self.batch_size, 1, self.seq_length + 1, dtype=torch.float, device=torch_device,)
target_mapping[:, 0, -1] = 1.0 # predict last token
sequence_labels = None
lm_labels = None
is_impossible_labels = None
token_labels = None
if self.use_labels:
lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
is_impossible_labels = ids_tensor([self.batch_size], 2).float()
token_labels = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
config = XLNetConfig(
vocab_size=self.vocab_size,
d_model=self.hidden_size,
n_head=self.num_attention_heads,
d_inner=self.d_inner,
n_layer=self.num_hidden_layers,
untie_r=self.untie_r,
mem_len=self.mem_len,
clamp_len=self.clamp_len,
same_length=self.same_length,
reuse_len=self.reuse_len,
bi_data=self.bi_data,
initializer_range=self.initializer_range,
num_labels=self.type_sequence_label_size,
bos_token_id=self.bos_token_id,
pad_token_id=self.pad_token_id,
eos_token_id=self.eos_token_id,
) )
if is_torch_available()
else ()
)
all_generative_model_classes = (
(XLNetLMHeadModel,) if is_torch_available() else ()
) # TODO (PVP): Check other models whether language generation is also applicable
test_pruning = False
class XLNetModelTester(object): return (
def __init__(
self,
parent,
batch_size=14,
seq_length=7,
mem_len=10,
clamp_len=-1,
reuse_len=15,
is_training=True,
use_labels=True,
vocab_size=99,
cutoffs=[10, 50, 80],
hidden_size=32,
num_attention_heads=4,
d_inner=128,
num_hidden_layers=5,
type_sequence_label_size=2,
untie_r=True,
bi_data=False,
same_length=False,
initializer_range=0.05,
seed=1,
type_vocab_size=2,
bos_token_id=1,
eos_token_id=2,
pad_token_id=5,
num_choices=4,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.mem_len = mem_len
# self.key_len = seq_length + mem_len
self.clamp_len = clamp_len
self.reuse_len = reuse_len
self.is_training = is_training
self.use_labels = use_labels
self.vocab_size = vocab_size
self.cutoffs = cutoffs
self.hidden_size = hidden_size
self.num_attention_heads = num_attention_heads
self.d_inner = d_inner
self.num_hidden_layers = num_hidden_layers
self.bi_data = bi_data
self.untie_r = untie_r
self.same_length = same_length
self.initializer_range = initializer_range
self.seed = seed
self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = type_sequence_label_size
self.bos_token_id = bos_token_id
self.pad_token_id = pad_token_id
self.eos_token_id = eos_token_id
self.num_choices = num_choices
def prepare_config_and_inputs(self):
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_ids_2 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
segment_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
input_mask = ids_tensor([self.batch_size, self.seq_length], 2).float()
input_ids_q = ids_tensor([self.batch_size, self.seq_length + 1], self.vocab_size)
perm_mask = torch.zeros(
self.batch_size, self.seq_length + 1, self.seq_length + 1, dtype=torch.float, device=torch_device,
)
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = torch.zeros(
self.batch_size, 1, self.seq_length + 1, dtype=torch.float, device=torch_device,
)
target_mapping[:, 0, -1] = 1.0 # predict last token
sequence_labels = None
lm_labels = None
is_impossible_labels = None
token_labels = None
if self.use_labels:
lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
is_impossible_labels = ids_tensor([self.batch_size], 2).float()
token_labels = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
config = XLNetConfig(
vocab_size=self.vocab_size,
d_model=self.hidden_size,
n_head=self.num_attention_heads,
d_inner=self.d_inner,
n_layer=self.num_hidden_layers,
untie_r=self.untie_r,
mem_len=self.mem_len,
clamp_len=self.clamp_len,
same_length=self.same_length,
reuse_len=self.reuse_len,
bi_data=self.bi_data,
initializer_range=self.initializer_range,
num_labels=self.type_sequence_label_size,
bos_token_id=self.bos_token_id,
pad_token_id=self.pad_token_id,
eos_token_id=self.eos_token_id,
)
return (
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
)
def set_seed(self):
random.seed(self.seed)
torch.manual_seed(self.seed)
def create_and_check_xlnet_base_model(
self,
config, config,
input_ids_1, input_ids_1,
input_ids_2, input_ids_2,
...@@ -193,231 +151,286 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -193,231 +151,286 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
sequence_labels, sequence_labels,
is_impossible_labels, is_impossible_labels,
token_labels, token_labels,
): )
model = XLNetModel(config)
model.to(torch_device)
model.eval()
_, _ = model(input_ids_1, input_mask=input_mask)
_, _ = model(input_ids_1, attention_mask=input_mask)
_, _ = model(input_ids_1, token_type_ids=segment_ids)
outputs, mems_1 = model(input_ids_1)
result = {
"mems_1": mems_1,
"outputs": outputs,
}
config.mem_len = 0
model = XLNetModel(config)
model.to(torch_device)
model.eval()
no_mems_outputs = model(input_ids_1)
self.parent.assertEqual(len(no_mems_outputs), 1)
self.parent.assertListEqual(
list(result["outputs"].size()), [self.batch_size, self.seq_length, self.hidden_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_base_model_with_att_output(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetModel(config)
model.to(torch_device)
model.eval()
_, _, attentions = model(input_ids_1, target_mapping=target_mapping, output_attentions=True) def set_seed(self):
random.seed(self.seed)
torch.manual_seed(self.seed)
def create_and_check_xlnet_base_model(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetModel(config)
model.to(torch_device)
model.eval()
self.parent.assertEqual(len(attentions), config.n_layer) _, _ = model(input_ids_1, input_mask=input_mask)
self.parent.assertIsInstance(attentions[0], tuple) _, _ = model(input_ids_1, attention_mask=input_mask)
self.parent.assertEqual(len(attentions[0]), 2) _, _ = model(input_ids_1, token_type_ids=segment_ids)
self.parent.assertTrue(attentions[0][0].shape, attentions[0][0].shape) outputs, mems_1 = model(input_ids_1)
def create_and_check_xlnet_lm_head( result = {
self, "mems_1": mems_1,
config, "outputs": outputs,
input_ids_1, }
input_ids_2,
input_ids_q, config.mem_len = 0
perm_mask, model = XLNetModel(config)
input_mask, model.to(torch_device)
target_mapping, model.eval()
segment_ids, no_mems_outputs = model(input_ids_1)
lm_labels, self.parent.assertEqual(len(no_mems_outputs), 1)
sequence_labels,
is_impossible_labels, self.parent.assertListEqual(
token_labels, list(result["outputs"].size()), [self.batch_size, self.seq_length, self.hidden_size],
): )
model = XLNetLMHeadModel(config) self.parent.assertListEqual(
model.to(torch_device) list(list(mem.size()) for mem in result["mems_1"]),
model.eval() [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
loss_1, all_logits_1, mems_1 = model(input_ids_1, token_type_ids=segment_ids, labels=lm_labels)
def create_and_check_xlnet_base_model_with_att_output(
loss_2, all_logits_2, mems_2 = model( self,
input_ids_2, token_type_ids=segment_ids, labels=lm_labels, mems=mems_1 config,
) input_ids_1,
input_ids_2,
logits, _ = model(input_ids_q, perm_mask=perm_mask, target_mapping=target_mapping) input_ids_q,
perm_mask,
result = { input_mask,
"loss_1": loss_1, target_mapping,
"mems_1": mems_1, segment_ids,
"all_logits_1": all_logits_1, lm_labels,
"loss_2": loss_2, sequence_labels,
"mems_2": mems_2, is_impossible_labels,
"all_logits_2": all_logits_2, token_labels,
} ):
model = XLNetModel(config)
self.parent.assertListEqual(list(result["loss_1"].size()), []) model.to(torch_device)
self.parent.assertListEqual( model.eval()
list(result["all_logits_1"].size()), [self.batch_size, self.seq_length, self.vocab_size],
) _, _, attentions = model(input_ids_1, target_mapping=target_mapping, output_attentions=True)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]), self.parent.assertEqual(len(attentions), config.n_layer)
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, self.parent.assertIsInstance(attentions[0], tuple)
) self.parent.assertEqual(len(attentions[0]), 2)
self.parent.assertTrue(attentions[0][0].shape, attentions[0][0].shape)
self.parent.assertListEqual(list(result["loss_2"].size()), [])
self.parent.assertListEqual( def create_and_check_xlnet_lm_head(
list(result["all_logits_2"].size()), [self.batch_size, self.seq_length, self.vocab_size], self,
) config,
self.parent.assertListEqual( input_ids_1,
list(list(mem.size()) for mem in result["mems_2"]), input_ids_2,
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers, input_ids_q,
) perm_mask,
input_mask,
def create_and_check_xlnet_qa( target_mapping,
self, segment_ids,
config, lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetLMHeadModel(config)
model.to(torch_device)
model.eval()
loss_1, all_logits_1, mems_1 = model(input_ids_1, token_type_ids=segment_ids, labels=lm_labels)
loss_2, all_logits_2, mems_2 = model(input_ids_2, token_type_ids=segment_ids, labels=lm_labels, mems=mems_1)
logits, _ = model(input_ids_q, perm_mask=perm_mask, target_mapping=target_mapping)
result = {
"loss_1": loss_1,
"mems_1": mems_1,
"all_logits_1": all_logits_1,
"loss_2": loss_2,
"mems_2": mems_2,
"all_logits_2": all_logits_2,
}
self.parent.assertListEqual(list(result["loss_1"].size()), [])
self.parent.assertListEqual(
list(result["all_logits_1"].size()), [self.batch_size, self.seq_length, self.vocab_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual(list(result["loss_2"].size()), [])
self.parent.assertListEqual(
list(result["all_logits_2"].size()), [self.batch_size, self.seq_length, self.vocab_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_qa(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetForQuestionAnswering(config)
model.to(torch_device)
model.eval()
outputs = model(input_ids_1)
(start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits, mems,) = outputs
outputs = model(
input_ids_1, input_ids_1,
input_ids_2, start_positions=sequence_labels,
input_ids_q, end_positions=sequence_labels,
perm_mask, cls_index=sequence_labels,
input_mask, is_impossible=is_impossible_labels,
target_mapping, p_mask=input_mask,
segment_ids, )
lm_labels,
sequence_labels, outputs = model(
is_impossible_labels,
token_labels,
):
model = XLNetForQuestionAnswering(config)
model.to(torch_device)
model.eval()
outputs = model(input_ids_1)
(start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits, mems,) = outputs
outputs = model(
input_ids_1,
start_positions=sequence_labels,
end_positions=sequence_labels,
cls_index=sequence_labels,
is_impossible=is_impossible_labels,
p_mask=input_mask,
)
outputs = model(
input_ids_1,
start_positions=sequence_labels,
end_positions=sequence_labels,
cls_index=sequence_labels,
is_impossible=is_impossible_labels,
)
total_loss, mems = outputs
outputs = model(input_ids_1, start_positions=sequence_labels, end_positions=sequence_labels,)
total_loss, mems = outputs
result = {
"loss": total_loss,
"start_top_log_probs": start_top_log_probs,
"start_top_index": start_top_index,
"end_top_log_probs": end_top_log_probs,
"end_top_index": end_top_index,
"cls_logits": cls_logits,
"mems": mems,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(
list(result["start_top_log_probs"].size()), [self.batch_size, model.config.start_n_top],
)
self.parent.assertListEqual(
list(result["start_top_index"].size()), [self.batch_size, model.config.start_n_top],
)
self.parent.assertListEqual(
list(result["end_top_log_probs"].size()),
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
)
self.parent.assertListEqual(
list(result["end_top_index"].size()),
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
)
self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_token_classif(
self,
config,
input_ids_1, input_ids_1,
input_ids_2, start_positions=sequence_labels,
input_ids_q, end_positions=sequence_labels,
perm_mask, cls_index=sequence_labels,
input_mask, is_impossible=is_impossible_labels,
target_mapping, )
segment_ids,
lm_labels, total_loss, mems = outputs
sequence_labels,
is_impossible_labels, outputs = model(input_ids_1, start_positions=sequence_labels, end_positions=sequence_labels,)
token_labels,
): total_loss, mems = outputs
model = XLNetForTokenClassification(config)
model.to(torch_device) result = {
model.eval() "loss": total_loss,
"start_top_log_probs": start_top_log_probs,
logits, mems_1 = model(input_ids_1) "start_top_index": start_top_index,
loss, logits, mems_1 = model(input_ids_1, labels=token_labels) "end_top_log_probs": end_top_log_probs,
"end_top_index": end_top_index,
result = { "cls_logits": cls_logits,
"loss": loss, "mems": mems,
"mems_1": mems_1, }
"logits": logits,
} self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(
self.parent.assertListEqual(list(result["loss"].size()), []) list(result["start_top_log_probs"].size()), [self.batch_size, model.config.start_n_top],
self.parent.assertListEqual( )
list(result["logits"].size()), [self.batch_size, self.seq_length, self.type_sequence_label_size], self.parent.assertListEqual(
) list(result["start_top_index"].size()), [self.batch_size, model.config.start_n_top],
self.parent.assertListEqual( )
list(list(mem.size()) for mem in result["mems_1"]), self.parent.assertListEqual(
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, list(result["end_top_log_probs"].size()),
) [self.batch_size, model.config.start_n_top * model.config.end_n_top],
)
def create_and_check_xlnet_sequence_classif( self.parent.assertListEqual(
self, list(result["end_top_index"].size()), [self.batch_size, model.config.start_n_top * model.config.end_n_top],
)
self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_token_classif(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetForTokenClassification(config)
model.to(torch_device)
model.eval()
logits, mems_1 = model(input_ids_1)
loss, logits, mems_1 = model(input_ids_1, labels=token_labels)
result = {
"loss": loss,
"mems_1": mems_1,
"logits": logits,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(
list(result["logits"].size()), [self.batch_size, self.seq_length, self.type_sequence_label_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_sequence_classif(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
):
model = XLNetForSequenceClassification(config)
model.to(torch_device)
model.eval()
logits, mems_1 = model(input_ids_1)
loss, logits, mems_1 = model(input_ids_1, labels=sequence_labels)
result = {
"loss": loss,
"mems_1": mems_1,
"logits": logits,
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual(
list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size],
)
self.parent.assertListEqual(
list(list(mem.size()) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(
config, config,
input_ids_1, input_ids_1,
input_ids_2, input_ids_2,
...@@ -430,50 +443,33 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -430,50 +443,33 @@ class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
sequence_labels, sequence_labels,
is_impossible_labels, is_impossible_labels,
token_labels, token_labels,
): ) = config_and_inputs
model = XLNetForSequenceClassification(config) inputs_dict = {"input_ids": input_ids_1}
model.to(torch_device) return config, inputs_dict
model.eval()
logits, mems_1 = model(input_ids_1) @require_torch
loss, logits, mems_1 = model(input_ids_1, labels=sequence_labels) class XLNetModelTest(ModelTesterMixin, unittest.TestCase):
result = { all_model_classes = (
"loss": loss, (
"mems_1": mems_1, XLNetModel,
"logits": logits, XLNetLMHeadModel,
} XLNetForTokenClassification,
XLNetForSequenceClassification,
self.parent.assertListEqual(list(result["loss"].size()), []) XLNetForQuestionAnswering,
self.parent.assertListEqual( XLNetForMultipleChoice,
list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size], )
) if is_torch_available()
self.parent.assertListEqual( else ()
list(list(mem.size()) for mem in result["mems_1"]), )
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers, all_generative_model_classes = (
) (XLNetLMHeadModel,) if is_torch_available() else ()
) # TODO (PVP): Check other models whether language generation is also applicable
def prepare_config_and_inputs_for_common(self): test_pruning = False
config_and_inputs = self.prepare_config_and_inputs()
(
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
token_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids_1}
return config, inputs_dict
def setUp(self): def setUp(self):
self.model_tester = XLNetModelTest.XLNetModelTester(self) self.model_tester = XLNetModelTester(self)
self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37) self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37)
def test_config(self): def test_config(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment