"examples/vscode:/vscode.git/clone" did not exist on "b83bdce42abb60e72314bb8507c710e6f649dfb1"
Commit 7d5c47aa authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 330982547
parent b792005c
...@@ -406,8 +406,6 @@ class MobileBERTEncoder(tf.keras.Model): ...@@ -406,8 +406,6 @@ class MobileBERTEncoder(tf.keras.Model):
num_feedforward_networks=4, num_feedforward_networks=4,
normalization_type='no_norm', normalization_type='no_norm',
classifier_activation=False, classifier_activation=False,
return_all_layers=False,
return_attention_score=False,
**kwargs): **kwargs):
"""Class initialization. """Class initialization.
...@@ -438,8 +436,6 @@ class MobileBERTEncoder(tf.keras.Model): ...@@ -438,8 +436,6 @@ class MobileBERTEncoder(tf.keras.Model):
MobileBERT paper. 'layer_norm' is used for the teacher model. MobileBERT paper. 'layer_norm' is used for the teacher model.
classifier_activation: If using the tanh activation for the final classifier_activation: If using the tanh activation for the final
representation of the [CLS] token in fine-tuning. representation of the [CLS] token in fine-tuning.
return_all_layers: If return all layer outputs.
return_attention_score: If return attention scores for each layer.
**kwargs: Other keyworded and arguments. **kwargs: Other keyworded and arguments.
""" """
self._self_setattr_tracking = False self._self_setattr_tracking = False
...@@ -513,12 +509,11 @@ class MobileBERTEncoder(tf.keras.Model): ...@@ -513,12 +509,11 @@ class MobileBERTEncoder(tf.keras.Model):
else: else:
self._pooler_layer = None self._pooler_layer = None
if return_all_layers: outputs = dict(
outputs = [all_layer_outputs, first_token] sequence_output=prev_output,
else: pooled_output=first_token,
outputs = [prev_output, first_token] encoder_outputs=all_layer_outputs,
if return_attention_score: attention_scores=all_attention_scores)
outputs.append(all_attention_scores)
super(MobileBERTEncoder, self).__init__( super(MobileBERTEncoder, self).__init__(
inputs=self.inputs, outputs=outputs, **kwargs) inputs=self.inputs, outputs=outputs, **kwargs)
......
...@@ -32,7 +32,7 @@ def generate_fake_input(batch_size=1, seq_len=5, vocab_size=10000, seed=0): ...@@ -32,7 +32,7 @@ def generate_fake_input(batch_size=1, seq_len=5, vocab_size=10000, seed=0):
return fake_input return fake_input
class ModelingTest(parameterized.TestCase, tf.test.TestCase): class MobileBertEncoderTest(parameterized.TestCase, tf.test.TestCase):
def test_embedding_layer_with_token_type(self): def test_embedding_layer_with_token_type(self):
layer = mobile_bert_encoder.MobileBertEmbedding(10, 8, 2, 16) layer = mobile_bert_encoder.MobileBertEmbedding(10, 8, 2, 16)
...@@ -116,7 +116,9 @@ class ModelingTest(parameterized.TestCase, tf.test.TestCase): ...@@ -116,7 +116,9 @@ class ModelingTest(parameterized.TestCase, tf.test.TestCase):
word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
layer_output, pooler_output = test_network([word_ids, mask, type_ids]) outputs = test_network([word_ids, mask, type_ids])
layer_output, pooler_output = outputs['sequence_output'], outputs[
'pooled_output']
self.assertIsInstance(test_network.transformer_layers, list) self.assertIsInstance(test_network.transformer_layers, list)
self.assertLen(test_network.transformer_layers, num_blocks) self.assertLen(test_network.transformer_layers, num_blocks)
...@@ -134,13 +136,13 @@ class ModelingTest(parameterized.TestCase, tf.test.TestCase): ...@@ -134,13 +136,13 @@ class ModelingTest(parameterized.TestCase, tf.test.TestCase):
test_network = mobile_bert_encoder.MobileBERTEncoder( test_network = mobile_bert_encoder.MobileBERTEncoder(
word_vocab_size=100, word_vocab_size=100,
hidden_size=hidden_size, hidden_size=hidden_size,
num_blocks=num_blocks, num_blocks=num_blocks)
return_all_layers=True)
word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
all_layer_output, _ = test_network([word_ids, mask, type_ids]) outputs = test_network([word_ids, mask, type_ids])
all_layer_output = outputs['encoder_outputs']
self.assertIsInstance(all_layer_output, list) self.assertIsInstance(all_layer_output, list)
self.assertLen(all_layer_output, num_blocks + 1) self.assertLen(all_layer_output, num_blocks + 1)
...@@ -153,16 +155,13 @@ class ModelingTest(parameterized.TestCase, tf.test.TestCase): ...@@ -153,16 +155,13 @@ class ModelingTest(parameterized.TestCase, tf.test.TestCase):
test_network = mobile_bert_encoder.MobileBERTEncoder( test_network = mobile_bert_encoder.MobileBERTEncoder(
word_vocab_size=vocab_size, word_vocab_size=vocab_size,
hidden_size=hidden_size, hidden_size=hidden_size,
num_blocks=num_blocks, num_blocks=num_blocks)
return_all_layers=False)
word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
layer_out_tensor, pooler_out_tensor = test_network( outputs = test_network([word_ids, mask, type_ids])
[word_ids, mask, type_ids]) model = tf.keras.Model([word_ids, mask, type_ids], outputs)
model = tf.keras.Model([word_ids, mask, type_ids],
[layer_out_tensor, pooler_out_tensor])
input_seq = generate_fake_input( input_seq = generate_fake_input(
batch_size=1, seq_len=sequence_length, vocab_size=vocab_size) batch_size=1, seq_len=sequence_length, vocab_size=vocab_size)
...@@ -170,13 +169,12 @@ class ModelingTest(parameterized.TestCase, tf.test.TestCase): ...@@ -170,13 +169,12 @@ class ModelingTest(parameterized.TestCase, tf.test.TestCase):
batch_size=1, seq_len=sequence_length, vocab_size=2) batch_size=1, seq_len=sequence_length, vocab_size=2)
token_type = generate_fake_input( token_type = generate_fake_input(
batch_size=1, seq_len=sequence_length, vocab_size=2) batch_size=1, seq_len=sequence_length, vocab_size=2)
layer_output, pooler_output = model.predict( outputs = model.predict([input_seq, input_mask, token_type])
[input_seq, input_mask, token_type])
layer_output_shape = [1, sequence_length, hidden_size] sequence_output_shape = [1, sequence_length, hidden_size]
self.assertAllEqual(layer_output.shape, layer_output_shape) self.assertAllEqual(outputs['sequence_output'].shape, sequence_output_shape)
pooler_output_shape = [1, hidden_size] pooled_output_shape = [1, hidden_size]
self.assertAllEqual(pooler_output.shape, pooler_output_shape) self.assertAllEqual(outputs['pooled_output'].shape, pooled_output_shape)
def test_mobilebert_encoder_invocation_with_attention_score(self): def test_mobilebert_encoder_invocation_with_attention_score(self):
vocab_size = 100 vocab_size = 100
...@@ -186,18 +184,13 @@ class ModelingTest(parameterized.TestCase, tf.test.TestCase): ...@@ -186,18 +184,13 @@ class ModelingTest(parameterized.TestCase, tf.test.TestCase):
test_network = mobile_bert_encoder.MobileBERTEncoder( test_network = mobile_bert_encoder.MobileBERTEncoder(
word_vocab_size=vocab_size, word_vocab_size=vocab_size,
hidden_size=hidden_size, hidden_size=hidden_size,
num_blocks=num_blocks, num_blocks=num_blocks)
return_all_layers=False,
return_attention_score=True)
word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
layer_out_tensor, pooler_out_tensor, attention_out_tensor = test_network( outputs = test_network([word_ids, mask, type_ids])
[word_ids, mask, type_ids]) model = tf.keras.Model([word_ids, mask, type_ids], outputs)
model = tf.keras.Model(
[word_ids, mask, type_ids],
[layer_out_tensor, pooler_out_tensor, attention_out_tensor])
input_seq = generate_fake_input( input_seq = generate_fake_input(
batch_size=1, seq_len=sequence_length, vocab_size=vocab_size) batch_size=1, seq_len=sequence_length, vocab_size=vocab_size)
...@@ -205,9 +198,8 @@ class ModelingTest(parameterized.TestCase, tf.test.TestCase): ...@@ -205,9 +198,8 @@ class ModelingTest(parameterized.TestCase, tf.test.TestCase):
batch_size=1, seq_len=sequence_length, vocab_size=2) batch_size=1, seq_len=sequence_length, vocab_size=2)
token_type = generate_fake_input( token_type = generate_fake_input(
batch_size=1, seq_len=sequence_length, vocab_size=2) batch_size=1, seq_len=sequence_length, vocab_size=2)
_, _, attention_score_output = model.predict( outputs = model.predict([input_seq, input_mask, token_type])
[input_seq, input_mask, token_type]) self.assertLen(outputs['attention_scores'], num_blocks)
self.assertLen(attention_score_output, num_blocks)
@parameterized.named_parameters( @parameterized.named_parameters(
('sequence_classification', models.BertClassifier, [None, 5]), ('sequence_classification', models.BertClassifier, [None, 5]),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment