"...git@developer.sourcefind.cn:OpenDAS/mmdetection3d.git" did not exist on "ccd3047a1d62048cc5707e60181b2ab586b8e479"
Commit 2a5c349d authored by Chen Chen's avatar Chen Chen Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 333784610
parent 07a07f6a
...@@ -40,8 +40,7 @@ class AlbertConfig(configs.BertConfig): ...@@ -40,8 +40,7 @@ class AlbertConfig(configs.BertConfig):
super(AlbertConfig, self).__init__(**kwargs) super(AlbertConfig, self).__init__(**kwargs)
# TODO(chendouble): 'inner_group_num' and 'num_hidden_groups' are always 1 # TODO(chendouble): 'inner_group_num' and 'num_hidden_groups' are always 1
# in the released ALBERT. Support other values in AlbertTransformerEncoder # in the released ALBERT. Support other values in AlbertEncoder if needed.
# if needed.
if inner_group_num != 1 or num_hidden_groups != 1: if inner_group_num != 1 or num_hidden_groups != 1:
raise ValueError("We only support 'inner_group_num' and " raise ValueError("We only support 'inner_group_num' and "
"'num_hidden_groups' as 1.") "'num_hidden_groups' as 1.")
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
"""A converter from a tf1 ALBERT encoder checkpoint to a tf2 encoder checkpoint. """A converter from a tf1 ALBERT encoder checkpoint to a tf2 encoder checkpoint.
The conversion will yield an object-oriented checkpoint that can be used The conversion will yield an object-oriented checkpoint that can be used
to restore a AlbertTransformerEncoder object. to restore an AlbertEncoder object.
""" """
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -81,7 +81,7 @@ def _create_albert_model(cfg): ...@@ -81,7 +81,7 @@ def _create_albert_model(cfg):
Returns: Returns:
A keras model. A keras model.
""" """
albert_encoder = networks.AlbertTransformerEncoder( albert_encoder = networks.AlbertEncoder(
vocab_size=cfg.vocab_size, vocab_size=cfg.vocab_size,
hidden_size=cfg.hidden_size, hidden_size=cfg.hidden_size,
embedding_width=cfg.embedding_size, embedding_width=cfg.embedding_size,
......
...@@ -167,7 +167,7 @@ def get_transformer_encoder(bert_config, ...@@ -167,7 +167,7 @@ def get_transformer_encoder(bert_config,
initializer=tf.keras.initializers.TruncatedNormal( initializer=tf.keras.initializers.TruncatedNormal(
stddev=bert_config.initializer_range)) stddev=bert_config.initializer_range))
if isinstance(bert_config, albert_configs.AlbertConfig): if isinstance(bert_config, albert_configs.AlbertConfig):
return networks.AlbertTransformerEncoder(**kwargs) return networks.AlbertEncoder(**kwargs)
else: else:
assert isinstance(bert_config, configs.BertConfig) assert isinstance(bert_config, configs.BertConfig)
kwargs['output_range'] = output_range kwargs['output_range'] = output_range
......
...@@ -149,7 +149,7 @@ class EncoderConfig(hyperparams.OneOfConfig): ...@@ -149,7 +149,7 @@ class EncoderConfig(hyperparams.OneOfConfig):
ENCODER_CLS = { ENCODER_CLS = {
"bert": networks.BertEncoder, "bert": networks.BertEncoder,
"mobilebert": networks.MobileBERTEncoder, "mobilebert": networks.MobileBERTEncoder,
"albert": networks.AlbertTransformerEncoder, "albert": networks.AlbertEncoder,
"bigbird": bigbird_encoder.BigBirdEncoder, "bigbird": bigbird_encoder.BigBirdEncoder,
} }
......
...@@ -10,7 +10,7 @@ Transformer-based encoder as described in ["BERT: Pre-training of Deep ...@@ -10,7 +10,7 @@ Transformer-based encoder as described in ["BERT: Pre-training of Deep
Bidirectional Transformers for Language Understanding"](https://arxiv.org/abs/1810.04805). It includes the embedding lookups, Bidirectional Transformers for Language Understanding"](https://arxiv.org/abs/1810.04805). It includes the embedding lookups,
transformer layers and pooling layer. transformer layers and pooling layer.
* [`AlbertTransformerEncoder`](albert_transformer_encoder.py) implements a * [`AlbertEncoder`](albert_encoder.py) implements a
Transformer-encoder described in the paper ["ALBERT: A Lite BERT for Transformer-encoder described in the paper ["ALBERT: A Lite BERT for
Self-supervised Learning of Language Representations"] Self-supervised Learning of Language Representations"]
(https://arxiv.org/abs/1909.11942). Compared with [BERT](https://arxiv.org/abs/1810.04805), ALBERT refactorizes embedding parameters (https://arxiv.org/abs/1909.11942). Compared with [BERT](https://arxiv.org/abs/1810.04805), ALBERT refactorizes embedding parameters
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Networks package definition.""" """Networks package definition."""
from official.nlp.modeling.networks.albert_transformer_encoder import AlbertTransformerEncoder from official.nlp.modeling.networks.albert_encoder import AlbertEncoder
from official.nlp.modeling.networks.bert_encoder import BertEncoder from official.nlp.modeling.networks.bert_encoder import BertEncoder
from official.nlp.modeling.networks.classification import Classification from official.nlp.modeling.networks.classification import Classification
from official.nlp.modeling.networks.encoder_scaffold import EncoderScaffold from official.nlp.modeling.networks.encoder_scaffold import EncoderScaffold
......
...@@ -23,7 +23,7 @@ from official.nlp.modeling import layers ...@@ -23,7 +23,7 @@ from official.nlp.modeling import layers
@tf.keras.utils.register_keras_serializable(package='Text') @tf.keras.utils.register_keras_serializable(package='Text')
class AlbertTransformerEncoder(tf.keras.Model): class AlbertEncoder(tf.keras.Model):
"""ALBERT (https://arxiv.org/abs/1810.04805) text encoder network. """ALBERT (https://arxiv.org/abs/1810.04805) text encoder network.
This network implements the encoder described in the paper "ALBERT: A Lite This network implements the encoder described in the paper "ALBERT: A Lite
...@@ -158,8 +158,10 @@ class AlbertTransformerEncoder(tf.keras.Model): ...@@ -158,8 +158,10 @@ class AlbertTransformerEncoder(tf.keras.Model):
attention_dropout=attention_dropout_rate, attention_dropout=attention_dropout_rate,
kernel_initializer=initializer, kernel_initializer=initializer,
name='transformer') name='transformer')
encoder_outputs = []
for _ in range(num_layers): for _ in range(num_layers):
data = shared_layer([data, attention_mask]) data = shared_layer([data, attention_mask])
encoder_outputs.append(data)
first_token_tensor = ( first_token_tensor = (
tf.keras.layers.Lambda(lambda x: tf.squeeze(x[:, 0:1, :], axis=1))(data) tf.keras.layers.Lambda(lambda x: tf.squeeze(x[:, 0:1, :], axis=1))(data)
...@@ -173,12 +175,13 @@ class AlbertTransformerEncoder(tf.keras.Model): ...@@ -173,12 +175,13 @@ class AlbertTransformerEncoder(tf.keras.Model):
if dict_outputs: if dict_outputs:
outputs = dict( outputs = dict(
sequence_output=data, sequence_output=data,
encoder_outputs=encoder_outputs,
pooled_output=cls_output, pooled_output=cls_output,
) )
else: else:
outputs = [data, cls_output] outputs = [data, cls_output]
super(AlbertTransformerEncoder, self).__init__( super(AlbertEncoder, self).__init__(
inputs=[word_ids, mask, type_ids], outputs=outputs, **kwargs) inputs=[word_ids, mask, type_ids], outputs=outputs, **kwargs)
def get_embedding_table(self): def get_embedding_table(self):
......
...@@ -23,16 +23,16 @@ import numpy as np ...@@ -23,16 +23,16 @@ import numpy as np
import tensorflow as tf import tensorflow as tf
from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import
from official.nlp.modeling.networks import albert_transformer_encoder from official.nlp.modeling.networks import albert_encoder
# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It # This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It
# guarantees forward compatibility of this code for the V2 switchover. # guarantees forward compatibility of this code for the V2 switchover.
@keras_parameterized.run_all_keras_modes @keras_parameterized.run_all_keras_modes
class AlbertTransformerEncoderTest(keras_parameterized.TestCase): class AlbertEncoderTest(keras_parameterized.TestCase):
def tearDown(self): def tearDown(self):
super(AlbertTransformerEncoderTest, self).tearDown() super(AlbertEncoderTest, self).tearDown()
tf.keras.mixed_precision.experimental.set_policy("float32") tf.keras.mixed_precision.experimental.set_policy("float32")
@parameterized.named_parameters( @parameterized.named_parameters(
...@@ -52,7 +52,7 @@ class AlbertTransformerEncoderTest(keras_parameterized.TestCase): ...@@ -52,7 +52,7 @@ class AlbertTransformerEncoderTest(keras_parameterized.TestCase):
tf.keras.mixed_precision.experimental.set_policy("mixed_float16") tf.keras.mixed_precision.experimental.set_policy("mixed_float16")
# Create a small TransformerEncoder for testing. # Create a small TransformerEncoder for testing.
test_network = albert_transformer_encoder.AlbertTransformerEncoder(**kwargs) test_network = albert_encoder.AlbertEncoder(**kwargs)
# Create the inputs (note that the first dimension is implicit). # Create the inputs (note that the first dimension is implicit).
word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
...@@ -84,13 +84,14 @@ class AlbertTransformerEncoderTest(keras_parameterized.TestCase): ...@@ -84,13 +84,14 @@ class AlbertTransformerEncoderTest(keras_parameterized.TestCase):
sequence_length = 21 sequence_length = 21
vocab_size = 57 vocab_size = 57
num_types = 7 num_types = 7
num_layers = 3
# Create a small TransformerEncoder for testing. # Create a small TransformerEncoder for testing.
test_network = albert_transformer_encoder.AlbertTransformerEncoder( test_network = albert_encoder.AlbertEncoder(
vocab_size=vocab_size, vocab_size=vocab_size,
embedding_width=8, embedding_width=8,
hidden_size=hidden_size, hidden_size=hidden_size,
num_attention_heads=2, num_attention_heads=2,
num_layers=3, num_layers=num_layers,
type_vocab_size=num_types) type_vocab_size=num_types)
# Create the inputs (note that the first dimension is implicit). # Create the inputs (note that the first dimension is implicit).
word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
...@@ -113,25 +114,25 @@ class AlbertTransformerEncoderTest(keras_parameterized.TestCase): ...@@ -113,25 +114,25 @@ class AlbertTransformerEncoderTest(keras_parameterized.TestCase):
# Creates a TransformerEncoder with max_sequence_length != sequence_length # Creates a TransformerEncoder with max_sequence_length != sequence_length
max_sequence_length = 128 max_sequence_length = 128
test_network = albert_transformer_encoder.AlbertTransformerEncoder( test_network = albert_encoder.AlbertEncoder(
vocab_size=vocab_size, vocab_size=vocab_size,
embedding_width=8, embedding_width=8,
hidden_size=hidden_size, hidden_size=hidden_size,
max_sequence_length=max_sequence_length, max_sequence_length=max_sequence_length,
num_attention_heads=2, num_attention_heads=2,
num_layers=3, num_layers=num_layers,
type_vocab_size=num_types) type_vocab_size=num_types)
model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled]) model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled])
_ = model.predict([word_id_data, mask_data, type_id_data]) _ = model.predict([word_id_data, mask_data, type_id_data])
# Tests dictionary outputs. # Tests dictionary outputs.
test_network_dict = albert_transformer_encoder.AlbertTransformerEncoder( test_network_dict = albert_encoder.AlbertEncoder(
vocab_size=vocab_size, vocab_size=vocab_size,
embedding_width=8, embedding_width=8,
hidden_size=hidden_size, hidden_size=hidden_size,
max_sequence_length=max_sequence_length, max_sequence_length=max_sequence_length,
num_attention_heads=2, num_attention_heads=2,
num_layers=3, num_layers=num_layers,
type_vocab_size=num_types, type_vocab_size=num_types,
dict_outputs=True) dict_outputs=True)
_ = test_network_dict([word_ids, mask, type_ids]) _ = test_network_dict([word_ids, mask, type_ids])
...@@ -144,6 +145,7 @@ class AlbertTransformerEncoderTest(keras_parameterized.TestCase): ...@@ -144,6 +145,7 @@ class AlbertTransformerEncoderTest(keras_parameterized.TestCase):
input_type_ids=type_id_data)) input_type_ids=type_id_data))
self.assertAllEqual(list_outputs[0], dict_outputs["sequence_output"]) self.assertAllEqual(list_outputs[0], dict_outputs["sequence_output"])
self.assertAllEqual(list_outputs[1], dict_outputs["pooled_output"]) self.assertAllEqual(list_outputs[1], dict_outputs["pooled_output"])
self.assertLen(dict_outputs["pooled_output"], num_layers)
def test_serialize_deserialize(self): def test_serialize_deserialize(self):
tf.keras.mixed_precision.experimental.set_policy("mixed_float16") tf.keras.mixed_precision.experimental.set_policy("mixed_float16")
...@@ -161,7 +163,7 @@ class AlbertTransformerEncoderTest(keras_parameterized.TestCase): ...@@ -161,7 +163,7 @@ class AlbertTransformerEncoderTest(keras_parameterized.TestCase):
dropout_rate=0.05, dropout_rate=0.05,
attention_dropout_rate=0.22, attention_dropout_rate=0.22,
initializer="glorot_uniform") initializer="glorot_uniform")
network = albert_transformer_encoder.AlbertTransformerEncoder(**kwargs) network = albert_encoder.AlbertEncoder(**kwargs)
expected_config = dict(kwargs) expected_config = dict(kwargs)
expected_config["activation"] = tf.keras.activations.serialize( expected_config["activation"] = tf.keras.activations.serialize(
...@@ -172,7 +174,7 @@ class AlbertTransformerEncoderTest(keras_parameterized.TestCase): ...@@ -172,7 +174,7 @@ class AlbertTransformerEncoderTest(keras_parameterized.TestCase):
# Create another network object from the first object's config. # Create another network object from the first object's config.
new_network = ( new_network = (
albert_transformer_encoder.AlbertTransformerEncoder.from_config( albert_encoder.AlbertEncoder.from_config(
network.get_config())) network.get_config()))
# Validate that the config can be forced to JSON. # Validate that the config can be forced to JSON.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment