Add ELECTRA pretrainer v2 configuration and adjust the visibility of ELECTRAPretrainer.

Contributed by mickeystroller PiperOrigin-RevId: 319073522

Add ELECTRA pretrainer v2 configuration and adjust the visibility of ELECTRAPretrainer.
Contributed by mickeystroller PiperOrigin-RevId: 319073522
a36f85c6 · A. Unique TensorFlower · 0e57630c · a36f85c6 · a36f85c6 · a36f85c6
Commit a36f85c6 authored Jun 30, 2020 by A. Unique TensorFlower
3 changed files
--- a/official/nlp/configs/electra.py
+++ b/official/nlp/configs/electra.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""ELECTRA model configurations and instantiation methods."""
+from typing import List, Optional
+import dataclasses
+import tensorflow as tf
+from official.modeling import tf_utils
+from official.modeling.hyperparams import base_config
+from official.nlp.configs import bert
+from official.nlp.configs import encoders
+from official.nlp.modeling import layers
+from official.nlp.modeling.models import electra_pretrainer
+@dataclasses.dataclass
+class ELECTRAPretrainerConfig(base_config.Config):
+  """ELECTRA pretrainer configuration."""
+  num_masked_tokens: int = 76
+  sequence_length: int = 512
+  num_classes: int = 2
+  discriminator_loss_weight: float = 50.0
+  generator_encoder: encoders.TransformerEncoderConfig = (
+      encoders.TransformerEncoderConfig())
+  discriminator_encoder: encoders.TransformerEncoderConfig = (
+      encoders.TransformerEncoderConfig())
+  cls_heads: List[bert.ClsHeadConfig] = dataclasses.field(default_factory=list)
+def instantiate_classification_heads_from_cfgs(
+    cls_head_configs: List[bert.ClsHeadConfig]
+) -> List[layers.ClassificationHead]:
+  if cls_head_configs:
+    return [
+        layers.ClassificationHead(**cfg.as_dict()) for cfg in cls_head_configs
+    ]
+  else:
+    return []
+def instantiate_pretrainer_from_cfg(
+    config: ELECTRAPretrainerConfig,
+    generator_network: Optional[tf.keras.Model] = None,
+    discriminator_network: Optional[tf.keras.Model] = None,
+    ) -> electra_pretrainer.ElectraPretrainer:
+  """Instantiates ElectraPretrainer from the config."""
+  generator_encoder_cfg = config.generator_encoder
+  discriminator_encoder_cfg = config.discriminator_encoder
+  if generator_network is None:
+    generator_network = encoders.instantiate_encoder_from_cfg(
+        generator_encoder_cfg)
+  if discriminator_network is None:
+    discriminator_network = encoders.instantiate_encoder_from_cfg(
+        discriminator_encoder_cfg)
+  return electra_pretrainer.ElectraPretrainer(
+      generator_network=generator_network,
+      discriminator_network=discriminator_network,
+      vocab_size=config.generator_encoder.vocab_size,
+      num_classes=config.num_classes,
+      sequence_length=config.sequence_length,
+      last_hidden_dim=config.generator_encoder.hidden_size,
+      num_token_predictions=config.num_masked_tokens,
+      mlm_activation=tf_utils.get_activation(
+          generator_encoder_cfg.hidden_activation),
+      mlm_initializer=tf.keras.initializers.TruncatedNormal(
+          stddev=generator_encoder_cfg.initializer_range),
+      classification_heads=instantiate_classification_heads_from_cfgs(
+          config.cls_heads))
--- a/official/nlp/configs/electra_test.py
+++ b/official/nlp/configs/electra_test.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for ELECTRA configurations and models instantiation."""
+import tensorflow as tf
+from official.nlp.configs import bert
+from official.nlp.configs import electra
+from official.nlp.configs import encoders
+class ELECTRAModelsTest(tf.test.TestCase):
+  def test_network_invocation(self):
+    config = electra.ELECTRAPretrainerConfig(
+        generator_encoder=encoders.TransformerEncoderConfig(
+            vocab_size=10, num_layers=1),
+        discriminator_encoder=encoders.TransformerEncoderConfig(
+            vocab_size=10, num_layers=2),
+    )
+    _ = electra.instantiate_pretrainer_from_cfg(config)
+    # Invokes with classification heads.
+    config = electra.ELECTRAPretrainerConfig(
+        generator_encoder=encoders.TransformerEncoderConfig(
+            vocab_size=10, num_layers=1),
+        discriminator_encoder=encoders.TransformerEncoderConfig(
+            vocab_size=10, num_layers=2),
+        cls_heads=[
+            bert.ClsHeadConfig(
+                inner_dim=10, num_classes=2, name="next_sentence")
+        ])
+    _ = electra.instantiate_pretrainer_from_cfg(config)
+if __name__ == "__main__":
+  tf.test.main()
--- a/official/nlp/modeling/models/__init__.py
+++ b/official/nlp/modeling/models/__init__.py
@@ -17,3 +17,4 @@ from official.nlp.modeling.models.bert_classifier import BertClassifier
 from official.nlp.modeling.models.bert_pretrainer import BertPretrainer
 from official.nlp.modeling.models.bert_span_labeler import BertSpanLabeler
 from official.nlp.modeling.models.bert_token_classifier import BertTokenClassifier
+from official.nlp.modeling.models.electra_pretrainer import ElectraPretrainer