Creates configs folder inside nlp/

PiperOrigin-RevId: 314634299

Creates configs folder inside nlp/
PiperOrigin-RevId: 314634299
869a4806 · Hongkun Yu · A. Unique TensorFlower · b2e422b0 · 869a4806 · 869a4806
Commit 869a4806 authored Jun 03, 2020 by Hongkun Yu Committed by A. Unique TensorFlower Jun 03, 2020
6 changed files
--- a/official/nlp/configs/__init__.py
+++ b/official/nlp/configs/__init__.py
+
--- a/official/nlp/configs/bert.py
+++ b/official/nlp/configs/bert.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A multi-head BERT encoder network for pretraining."""
+from typing import List, Optional, Text
+
+import dataclasses
+import tensorflow as tf
+
+from official.modeling import tf_utils
+from official.modeling.hyperparams import base_config
+from official.nlp.configs import encoders
+from official.nlp.modeling import layers
+from official.nlp.modeling import networks
+from official.nlp.modeling.models import bert_pretrainer
+
+
+@dataclasses.dataclass
+class ClsHeadConfig(base_config.Config):
+  inner_dim: int = 0
+  num_classes: int = 2
+  activation: Optional[Text] = "tanh"
+  dropout_rate: float = 0.0
+  cls_token_idx: int = 0
+  name: Optional[Text] = None
+
+
+@dataclasses.dataclass
+class BertPretrainerConfig(base_config.Config):
+  """BERT encoder configuration."""
+  num_masked_tokens: int = 76
+  encoder: encoders.TransformerEncoderConfig = (
+      encoders.TransformerEncoderConfig())
+  cls_heads: List[ClsHeadConfig] = dataclasses.field(default_factory=list)
+
+
+def instantiate_from_cfg(
+    config: BertPretrainerConfig,
+    encoder_network: Optional[tf.keras.layers.Layer] = None):
+  """Instantiates a BertPretrainer from the config."""
+  if encoder_network is None:
+    encoder_cfg = config.encoder
+    encoder_network = networks.TransformerEncoder(
+        vocab_size=encoder_cfg.vocab_size,
+        hidden_size=encoder_cfg.hidden_size,
+        num_layers=encoder_cfg.num_layers,
+        num_attention_heads=encoder_cfg.num_attention_heads,
+        intermediate_size=encoder_cfg.intermediate_size,
+        activation=tf_utils.get_activation(encoder_cfg.hidden_activation),
+        dropout_rate=encoder_cfg.dropout_rate,
+        attention_dropout_rate=encoder_cfg.attention_dropout_rate,
+        max_sequence_length=encoder_cfg.max_position_embeddings,
+        type_vocab_size=encoder_cfg.type_vocab_size,
+        initializer=tf.keras.initializers.TruncatedNormal(
+            stddev=encoder_cfg.initializer_range))
+  if config.cls_heads:
+    classification_heads = [
+        layers.ClassificationHead(**cfg.as_dict()) for cfg in config.cls_heads
+    ]
+  else:
+    classification_heads = []
+  return bert_pretrainer.BertPretrainerV2(
+      config.num_masked_tokens,
+      mlm_initializer=tf.keras.initializers.TruncatedNormal(
+          stddev=encoder_cfg.initializer_range),
+      encoder_network=encoder_network,
+      classification_heads=classification_heads)
--- a/official/nlp/configs/bert_test.py
+++ b/official/nlp/configs/bert_test.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for BERT configurations and models instantiation."""
+
+import tensorflow as tf
+
+from official.nlp.configs import bert
+from official.nlp.configs import encoders
+
+
+class BertModelsTest(tf.test.TestCase):
+
+  def test_network_invocation(self):
+    config = bert.BertPretrainerConfig(
+        encoder=encoders.TransformerEncoderConfig(vocab_size=10, num_layers=1))
+    _ = bert.instantiate_from_cfg(config)
+
+    # Invokes with classification heads.
+    config = bert.BertPretrainerConfig(
+        encoder=encoders.TransformerEncoderConfig(vocab_size=10, num_layers=1),
+        cls_heads=[
+            bert.ClsHeadConfig(
+                inner_dim=10, num_classes=2, name="next_sentence")
+        ])
+    _ = bert.instantiate_from_cfg(config)
+
+    with self.assertRaises(ValueError):
+      config = bert.BertPretrainerConfig(
+          encoder=encoders.TransformerEncoderConfig(
+              vocab_size=10, num_layers=1),
+          cls_heads=[
+              bert.ClsHeadConfig(
+                  inner_dim=10, num_classes=2, name="next_sentence"),
+              bert.ClsHeadConfig(
+                  inner_dim=10, num_classes=2, name="next_sentence")
+          ])
+      _ = bert.instantiate_from_cfg(config)
+
+  def test_checkpoint_items(self):
+    config = bert.BertPretrainerConfig(
+        encoder=encoders.TransformerEncoderConfig(vocab_size=10, num_layers=1),
+        cls_heads=[
+            bert.ClsHeadConfig(
+                inner_dim=10, num_classes=2, name="next_sentence")
+        ])
+    encoder = bert.instantiate_from_cfg(config)
+    self.assertSameElements(encoder.checkpoint_items.keys(),
+                            ["encoder", "next_sentence.pooler_dense"])
+
+
+if __name__ == "__main__":
+  tf.test.main()
--- a/official/nlp/configs/encoders.py
+++ b/official/nlp/configs/encoders.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Configurations for Encoders."""
+
+import dataclasses
+
+from official.modeling.hyperparams import base_config
+
+
+@dataclasses.dataclass
+class TransformerEncoderConfig(base_config.Config):
+  """BERT encoder configuration."""
+  vocab_size: int = 30522
+  hidden_size: int = 768
+  num_layers: int = 12
+  num_attention_heads: int = 12
+  hidden_activation: str = "gelu"
+  intermediate_size: int = 3076
+  dropout_rate: float = 0.1
+  attention_dropout_rate: float = 0.1
+  max_position_embeddings: int = 512
+  type_vocab_size: int = 2
+  initializer_range: float = 0.02
--- a/official/nlp/modeling/models/bert_pretrainer.py
+++ b/official/nlp/modeling/models/bert_pretrainer.py
@@ -20,6 +20,9 @@ from __future__ import division
 from __future__ import print_function

 import copy
+from typing import List, Optional
+
+import gin
 import tensorflow as tf

 from official.nlp.modeling import networks
@@ -131,3 +134,90 @@ class BertPretrainer(tf.keras.Model):
  @classmethod
  def from_config(cls, config, custom_objects=None):
    return cls(**config)
+
+
+# TODO(hongkuny): Migrate to BertPretrainerV2 for all usages.
+@tf.keras.utils.register_keras_serializable(package='Text')
+@gin.configurable
+class BertPretrainerV2(tf.keras.Model):
+  """BERT pretraining model V2.
+
+  (Experimental).
+  Adds the masked language model head and optional classification heads upon the
+  transformer encoder. When num_masked_tokens == 0, there won't be MaskedLM
+  head.
+
+  Arguments:
+    num_masked_tokens: Number of tokens to predict from the masked LM.
+    encoder_network: A transformer network. This network should output a
+      sequence output and a classification output.
+    mlm_initializer: The initializer (if any) to use in the masked LM. Default
+      to a Glorot uniform initializer.
+    classification_heads: A list of optional head layers to transform on encoder
+      sequence outputs.
+    name: The name of the model.
+  Inputs: Inputs defined by the encoder network, plus `masked_lm_positions` as a
+    dictionary.
+  Outputs: A dictionary of `lm_output` and classification head outputs keyed by
+    head names.
+  """
+
+  def __init__(
+      self,
+      num_masked_tokens: int,
+      encoder_network: tf.keras.Model,
+      mlm_initializer='glorot_uniform',
+      classification_heads: Optional[List[tf.keras.layers.Layer]] = None,
+      name: str = 'bert',
+      **kwargs):
+    self._self_setattr_tracking = False
+    self._config = {
+        'encoder_network': encoder_network,
+        'num_masked_tokens': num_masked_tokens,
+        'mlm_initializer': mlm_initializer,
+        'classification_heads': classification_heads,
+        'name': name,
+    }
+
+    self.encoder_network = encoder_network
+    inputs = copy.copy(self.encoder_network.inputs)
+    sequence_output, _ = self.encoder_network(inputs)
+
+    self.classification_heads = classification_heads or []
+    if len(set([cls.name for cls in self.classification_heads])) != len(
+        self.classification_heads):
+      raise ValueError('Classification heads should have unique names.')
+
+    outputs = dict()
+    if num_masked_tokens > 0:
+      self.masked_lm = networks.MaskedLM(
+          num_predictions=num_masked_tokens,
+          input_width=sequence_output.shape[-1],
+          source_network=self.encoder_network,
+          initializer=mlm_initializer,
+          name='masked_lm')
+      masked_lm_positions = copy.copy(self.masked_lm.inputs[-1])
+      inputs.append(masked_lm_positions)
+      outputs['lm_output'] = self.masked_lm(
+          [sequence_output, masked_lm_positions])
+    for cls_head in self.classification_heads:
+      outputs[cls_head.name] = cls_head(sequence_output)
+
+    super(BertPretrainerV2, self).__init__(
+        inputs=inputs, outputs=outputs, name=name, **kwargs)
+
+  @property
+  def checkpoint_items(self):
+    """Returns a dictionary of items to be additionally checkpointed."""
+    items = dict(encoder=self.encoder_network)
+    for head in self.classification_heads:
+      for key, item in head.checkpoint_items.items():
+        items['.'.join([head.name, key])] = item
+    return items
+
+  def get_config(self):
+    return self._config
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    return cls(**config)
--- a/official/nlp/modeling/models/bert_pretrainer_test.py
+++ b/official/nlp/modeling/models/bert_pretrainer_test.py
@@ -30,7 +30,7 @@ from official.nlp.modeling.models import bert_pretrainer
 @keras_parameterized.run_all_keras_modes
 class BertPretrainerTest(keras_parameterized.TestCase):

-  def test_bert_trainer(self):
+  def test_bert_pretrainer(self):
    """Validate that the Keras object can be created."""
    # Build a transformer network to use within the BERT trainer.
    vocab_size = 100
@@ -106,6 +106,56 @@ class BertPretrainerTest(keras_parameterized.TestCase):
    self.assertAllEqual(bert_trainer_model.get_config(),
                        new_bert_trainer_model.get_config())

+  def test_bert_pretrainerv2(self):
+    """Validate that the Keras object can be created."""
+    # Build a transformer network to use within the BERT trainer.
+    vocab_size = 100
+    sequence_length = 512
+    test_network = networks.TransformerEncoder(
+        vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length)
+
+    # Create a BERT trainer with the created network.
+    num_token_predictions = 2
+    bert_trainer_model = bert_pretrainer.BertPretrainerV2(
+        encoder_network=test_network, num_masked_tokens=num_token_predictions)
+
+    # Create a set of 2-dimensional inputs (the first dimension is implicit).
+    word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
+    mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
+    type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
+    lm_mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
+
+    # Invoke the trainer model on the inputs. This causes the layer to be built.
+    outputs = bert_trainer_model([word_ids, mask, type_ids, lm_mask])
+
+    # Validate that the outputs are of the expected shape.
+    expected_lm_shape = [None, num_token_predictions, vocab_size]
+    self.assertAllEqual(expected_lm_shape, outputs['lm_output'].shape.as_list())
+
+  def test_v2_serialize_deserialize(self):
+    """Validate that the BERT trainer can be serialized and deserialized."""
+    # Build a transformer network to use within the BERT trainer. (Here, we use
+    # a short sequence_length for convenience.)
+    test_network = networks.TransformerEncoder(
+        vocab_size=100, num_layers=2, sequence_length=5)
+
+    # Create a BERT trainer with the created network. (Note that all the args
+    # are different, so we can catch any serialization mismatches.)
+    bert_trainer_model = bert_pretrainer.BertPretrainerV2(
+        encoder_network=test_network, num_masked_tokens=2)
+
+    # Create another BERT trainer via serialization and deserialization.
+    config = bert_trainer_model.get_config()
+    new_bert_trainer_model = bert_pretrainer.BertPretrainerV2.from_config(
+        config)
+
+    # Validate that the config can be forced to JSON.
+    _ = new_bert_trainer_model.to_json()
+
+    # If the serialization was successful, the new config should match the old.
+    self.assertAllEqual(bert_trainer_model.get_config(),
+                        new_bert_trainer_model.get_config())
+

 if __name__ == '__main__':
  tf.test.main()