Merge branch 'master' of https://github.com/tensorflow/models into RTESuperGLUE

bb124157 · stephenwu · 2e9bb539 · 0edeb7f6 · bb124157 · bb124157
Commit bb124157 authored Mar 10, 2021 by stephenwu
20 changed files
--- a/official/nlp/data/tagging_dataloader_test.py
+++ b/official/nlp/data/tagging_dataloader_test.py
-# Lint as: python3
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """Tests for official.nlp.data.tagging_data_loader."""
 import os


--- a/official/nlp/data/train_sentencepiece.py
+++ b/official/nlp/data/train_sentencepiece.py
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """A script to train sentencepiece model from tensorflow datasets.

 Reserved tokens:

--- a/official/nlp/data/wmt_dataloader.py
+++ b/official/nlp/data/wmt_dataloader.py
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """Input pipeline for the transformer model to read, filter, and batch examples.

 Batching scheme

--- a/official/nlp/data/wmt_dataloader_test.py
+++ b/official/nlp/data/wmt_dataloader_test.py
-# Lint as: python3
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """Tests for official.nlp.data.wmt_dataloader."""
 import os
 from absl.testing import parameterized

--- a/official/nlp/finetuning/binary_helper.py
+++ b/official/nlp/finetuning/binary_helper.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""The helper for finetuning binaries."""
+import json
+import math
+import sys
+from typing import Any, Dict, List, Optional
+
+from absl import logging
+import tensorflow as tf
+
+from official.core import config_definitions as cfg
+from official.modeling import hyperparams
+from official.nlp.configs import encoders
+from official.nlp.data import question_answering_dataloader
+from official.nlp.data import sentence_prediction_dataloader
+from official.nlp.data import tagging_dataloader
+from official.nlp.tasks import question_answering
+from official.nlp.tasks import sentence_prediction
+from official.nlp.tasks import tagging
+
+
+def override_trainer_cfg(trainer_cfg: cfg.TrainerConfig, learning_rate: float,
+                         num_epoch: int, global_batch_size: int,
+                         warmup_ratio: float, training_data_size: int,
+                         eval_data_size: int, num_eval_per_epoch: int,
+                         best_checkpoint_export_subdir: str,
+                         best_checkpoint_eval_metric: str,
+                         best_checkpoint_metric_comp: str):
+  """Overrides a `cfg.TrainerConfig` object."""
+  steps_per_epoch = training_data_size // global_batch_size
+  train_steps = steps_per_epoch * num_epoch
+  # TODO(b/165081095): always set to -1 after the bug is resolved.
+  if eval_data_size:
+    eval_steps = int(math.ceil(eval_data_size / global_batch_size))
+  else:
+    eval_steps = -1  # exhaust the validation data.
+  warmp_steps = int(train_steps * warmup_ratio)
+  validation_interval = steps_per_epoch // num_eval_per_epoch
+  trainer_cfg.override({
+      'optimizer_config': {
+          'learning_rate': {
+              'type': 'polynomial',
+              'polynomial': {
+                  'decay_steps': train_steps,
+                  'initial_learning_rate': learning_rate,
+                  'end_learning_rate': 0,
+              }
+          },
+          'optimizer': {
+              'type': 'adamw',
+          },
+          'warmup': {
+              'polynomial': {
+                  'warmup_steps': warmp_steps,
+              },
+              'type': 'polynomial',
+          },
+      },
+      'train_steps': train_steps,
+      'validation_interval': validation_interval,
+      'validation_steps': eval_steps,
+      'best_checkpoint_export_subdir': best_checkpoint_export_subdir,
+      'best_checkpoint_eval_metric': best_checkpoint_eval_metric,
+      'best_checkpoint_metric_comp': best_checkpoint_metric_comp,
+  })
+
+
+def load_model_config_file(model_config_file: str) -> Dict[str, Any]:
+  """Loads bert config json file or `encoders.EncoderConfig` in yaml file."""
+  if not model_config_file:
+    # model_config_file may be empty when using tf.hub.
+    return {}
+
+  try:
+    encoder_config = encoders.EncoderConfig()
+    encoder_config = hyperparams.override_params_dict(
+        encoder_config, model_config_file, is_strict=True)
+    logging.info('Load encoder_config yaml file from %s.', model_config_file)
+    return encoder_config.as_dict()
+  except KeyError:
+    pass
+
+  logging.info('Load bert config json file from %s', model_config_file)
+  with tf.io.gfile.GFile(model_config_file, 'r') as reader:
+    text = reader.read()
+    config = json.loads(text)
+
+  def get_value(key1, key2):
+    if key1 in config and key2 in config:
+      raise ValueError('Unexpected that both %s and %s are in config.' %
+                       (key1, key2))
+
+    return config[key1] if key1 in config else config[key2]
+
+  def get_value_or_none(key):
+    return config[key] if key in config else None
+
+  # Support both legacy bert_config attributes and the new config attributes.
+  return {
+      'bert': {
+          'attention_dropout_rate':
+              get_value('attention_dropout_rate',
+                        'attention_probs_dropout_prob'),
+          'dropout_rate':
+              get_value('dropout_rate', 'hidden_dropout_prob'),
+          'hidden_activation':
+              get_value('hidden_activation', 'hidden_act'),
+          'hidden_size':
+              config['hidden_size'],
+          'embedding_size':
+              get_value_or_none('embedding_size'),
+          'initializer_range':
+              config['initializer_range'],
+          'intermediate_size':
+              config['intermediate_size'],
+          'max_position_embeddings':
+              config['max_position_embeddings'],
+          'num_attention_heads':
+              config['num_attention_heads'],
+          'num_layers':
+              get_value('num_layers', 'num_hidden_layers'),
+          'type_vocab_size':
+              config['type_vocab_size'],
+          'vocab_size':
+              config['vocab_size'],
+      }
+  }
+
+
+def override_sentence_prediction_task_config(
+    task_cfg: sentence_prediction.SentencePredictionConfig,
+    model_config_file: str,
+    init_checkpoint: str,
+    hub_module_url: str,
+    global_batch_size: int,
+    train_input_path: str,
+    validation_input_path: str,
+    seq_length: int,
+    num_classes: int,
+    metric_type: Optional[str] = 'accuracy',
+    label_type: Optional[str] = 'int'):
+  """Overrides a `SentencePredictionConfig` object."""
+  task_cfg.override({
+      'init_checkpoint': init_checkpoint,
+      'metric_type': metric_type,
+      'model': {
+          'num_classes': num_classes,
+          'encoder': load_model_config_file(model_config_file),
+      },
+      'hub_module_url': hub_module_url,
+      'train_data': {
+          'drop_remainder': True,
+          'global_batch_size': global_batch_size,
+          'input_path': train_input_path,
+          'is_training': True,
+          'seq_length': seq_length,
+          'label_type': label_type,
+      },
+      'validation_data': {
+          'drop_remainder': False,
+          'global_batch_size': global_batch_size,
+          'input_path': validation_input_path,
+          'is_training': False,
+          'seq_length': seq_length,
+          'label_type': label_type,
+      }
+  })
+
+
+def override_qa_task_config(
+    task_cfg: question_answering.QuestionAnsweringConfig,
+    model_config_file: str, init_checkpoint: str, hub_module_url: str,
+    global_batch_size: int, train_input_path: str, validation_input_path: str,
+    seq_length: int, tokenization: str, vocab_file: str, do_lower_case: bool,
+    version_2_with_negative: bool):
+  """Overrides a `QuestionAnsweringConfig` object."""
+  task_cfg.override({
+      'init_checkpoint': init_checkpoint,
+      'model': {
+          'encoder': load_model_config_file(model_config_file),
+      },
+      'hub_module_url': hub_module_url,
+      'train_data': {
+          'drop_remainder': True,
+          'global_batch_size': global_batch_size,
+          'input_path': train_input_path,
+          'is_training': True,
+          'seq_length': seq_length,
+      },
+      'validation_data': {
+          'do_lower_case': do_lower_case,
+          'drop_remainder': False,
+          'global_batch_size': global_batch_size,
+          'input_path': validation_input_path,
+          'is_training': False,
+          'seq_length': seq_length,
+          'tokenization': tokenization,
+          'version_2_with_negative': version_2_with_negative,
+          'vocab_file': vocab_file,
+      }
+  })
+
+
+def override_tagging_task_config(task_cfg: tagging.TaggingConfig,
+                                 model_config_file: str, init_checkpoint: str,
+                                 hub_module_url: str, global_batch_size: int,
+                                 train_input_path: str,
+                                 validation_input_path: str, seq_length: int,
+                                 class_names: List[str]):
+  """Overrides a `TaggingConfig` object."""
+  task_cfg.override({
+      'init_checkpoint': init_checkpoint,
+      'model': {
+          'encoder': load_model_config_file(model_config_file),
+      },
+      'hub_module_url': hub_module_url,
+      'train_data': {
+          'drop_remainder': True,
+          'global_batch_size': global_batch_size,
+          'input_path': train_input_path,
+          'is_training': True,
+          'seq_length': seq_length,
+      },
+      'validation_data': {
+          'drop_remainder': False,
+          'global_batch_size': global_batch_size,
+          'input_path': validation_input_path,
+          'is_training': False,
+          'seq_length': seq_length,
+      },
+      'class_names': class_names,
+  })
+
+
+def write_glue_classification(task,
+                              model,
+                              input_file,
+                              output_file,
+                              predict_batch_size,
+                              seq_length,
+                              class_names,
+                              label_type='int',
+                              min_float_value=None,
+                              max_float_value=None):
+  """Makes classification predictions for glue and writes to output file.
+
+  Args:
+    task: `Task` instance.
+    model: `keras.Model` instance.
+    input_file: Input test data file path.
+    output_file: Output test data file path.
+    predict_batch_size: Batch size for prediction.
+    seq_length: Input sequence length.
+    class_names: List of string class names.
+    label_type: String denoting label type ('int', 'float'), defaults to 'int'.
+    min_float_value: If set, predictions will be min-clipped to this value (only
+      for regression when `label_type` is set to 'float'). Defaults to `None`
+      (no clipping).
+    max_float_value: If set, predictions will be max-clipped to this value (only
+      for regression when `label_type` is set to 'float'). Defaults to `None`
+      (no clipping).
+  """
+  if label_type not in ('int', 'float'):
+    raise ValueError('Unsupported `label_type`. Given: %s, expected `int` or '
+                     '`float`.' % label_type)
+
+  data_config = sentence_prediction_dataloader.SentencePredictionDataConfig(
+      input_path=input_file,
+      global_batch_size=predict_batch_size,
+      is_training=False,
+      seq_length=seq_length,
+      label_type=label_type,
+      drop_remainder=False,
+      include_example_id=True)
+  predictions = sentence_prediction.predict(task, data_config, model)
+
+  if label_type == 'float':
+    min_float_value = (-sys.float_info.max
+                       if min_float_value is None else min_float_value)
+    max_float_value = (
+        sys.float_info.max if max_float_value is None else max_float_value)
+
+    # Clip predictions to range [min_float_value, max_float_value].
+    predictions = [
+        min(max(prediction, min_float_value), max_float_value)
+        for prediction in predictions
+    ]
+
+  with tf.io.gfile.GFile(output_file, 'w') as writer:
+    writer.write('index\tprediction\n')
+    for index, prediction in enumerate(predictions):
+      if label_type == 'float':
+        # Regression.
+        writer.write('%d\t%.3f\n' % (index, prediction))
+      else:
+        # Classification.
+        writer.write('%d\t%s\n' % (index, class_names[prediction]))
+
+
+def write_xtreme_classification(task,
+                                model,
+                                input_file,
+                                output_file,
+                                predict_batch_size,
+                                seq_length,
+                                class_names,
+                                translated_input_file=None,
+                                test_time_aug_wgt=0.3):
+  """Makes classification predictions for xtreme and writes to output file."""
+  data_config = sentence_prediction_dataloader.SentencePredictionDataConfig(
+      input_path=input_file,
+      seq_length=seq_length,
+      is_training=False,
+      label_type='int',
+      global_batch_size=predict_batch_size,
+      drop_remainder=False,
+      include_example_id=True)
+  if translated_input_file is not None:
+    data_config_aug = (
+        sentence_prediction_dataloader.SentencePredictionDataConfig(
+            input_path=translated_input_file,
+            seq_length=seq_length,
+            is_training=False,
+            label_type='int',
+            global_batch_size=predict_batch_size,
+            drop_remainder=False,
+            include_example_id=True))
+  else:
+    data_config_aug = None
+  predictions = sentence_prediction.predict(task, data_config, model,
+                                            data_config_aug, test_time_aug_wgt)
+  with tf.io.gfile.GFile(output_file, 'w') as writer:
+    for prediction in predictions:
+      writer.write('%s\n' % class_names[prediction])
+
+
+def write_question_answering(task,
+                             model,
+                             input_file,
+                             output_file,
+                             predict_batch_size,
+                             seq_length,
+                             tokenization,
+                             vocab_file,
+                             do_lower_case,
+                             version_2_with_negative=False):
+  """Makes question answering predictions and writes to output file."""
+  data_config = question_answering_dataloader.QADataConfig(
+      do_lower_case=do_lower_case,
+      doc_stride=128,
+      drop_remainder=False,
+      global_batch_size=predict_batch_size,
+      input_path=input_file,
+      is_training=False,
+      query_length=64,
+      seq_length=seq_length,
+      tokenization=tokenization,
+      version_2_with_negative=version_2_with_negative,
+      vocab_file=vocab_file)
+  all_predictions, _, _ = question_answering.predict(task, data_config, model)
+  with tf.io.gfile.GFile(output_file, 'w') as writer:
+    writer.write(json.dumps(all_predictions, indent=4) + '\n')
+
+
+def write_tagging(task, model, input_file, output_file, predict_batch_size,
+                  seq_length):
+  """Makes tagging predictions and writes to output file."""
+  data_config = tagging_dataloader.TaggingDataConfig(
+      input_path=input_file,
+      is_training=False,
+      seq_length=seq_length,
+      global_batch_size=predict_batch_size,
+      drop_remainder=False,
+      include_sentence_id=True)
+  results = tagging.predict(task, data_config, model)
+  class_names = task.task_config.class_names
+  last_sentence_id = -1
+
+  with tf.io.gfile.GFile(output_file, 'w') as writer:
+    for sentence_id, _, predict_ids in results:
+      token_labels = [class_names[x] for x in predict_ids]
+      assert sentence_id == last_sentence_id or (
+          sentence_id == last_sentence_id + 1)
+
+      if sentence_id != last_sentence_id and last_sentence_id != -1:
+        writer.write('\n')
+
+      writer.write('\n'.join(token_labels))
+      writer.write('\n')
+      last_sentence_id = sentence_id
--- a/official/nlp/finetuning/glue/flags.py
+++ b/official/nlp/finetuning/glue/flags.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Common flags for GLUE finetuning binary."""
+from typing import Callable
+
+from absl import flags
+from absl import logging
+
+
+def define_flags():
+  """Defines flags."""
+
+  # ===========================================================================
+  # Glue binary flags.
+  # ===========================================================================
+  flags.DEFINE_enum(
+      'mode', 'train_eval_and_predict',
+      ['train_eval_and_predict', 'train_eval', 'predict'],
+      'The mode to run the binary. If `train_eval_and_predict` '
+      'it will (1) train on the training data and (2) evaluate on '
+      'the validation data and (3) finally generate predictions '
+      'on the prediction data; if `train_eval`, it will only '
+      'run training and evaluation; if `predict`, it will only '
+      'run prediction using the model in `model_dir`.')
+
+  flags.DEFINE_enum('task_name', None, [
+      'AX', 'COLA', 'MNLI', 'MRPC', 'QNLI', 'QQP', 'RTE', 'SST-2', 'STS-B',
+      'WNLI'
+  ], 'The type of GLUE task.')
+
+  flags.DEFINE_string('train_input_path', None,
+                      'The file path to the training data.')
+
+  flags.DEFINE_string('validation_input_path', None,
+                      'The file path to the evaluation data.')
+
+  flags.DEFINE_string('test_input_path', None,
+                      'The file path to the test input data.')
+
+  flags.DEFINE_string('test_output_path', None,
+                      'The file path to the test output data.')
+
+  flags.DEFINE_string('model_dir', '', 'The model directory containing '
+                      'subdirectories for each task. Only needed for "predict" '
+                      'mode. For all other modes, if not provided, a unique '
+                      'directory will be created automatically for each run.')
+
+  flags.DEFINE_string(
+      'input_meta_data_path', None, 'Path to file that contains '
+      'metadata about input file. It is output by the `create_finetuning_data` '
+      'binary. Required for all modes except "predict".')
+
+  flags.DEFINE_string('init_checkpoint', '',
+                      'Initial checkpoint from a pre-trained BERT model.')
+
+  flags.DEFINE_string(
+      'model_config_file', '', 'The config file specifying the architecture '
+      'of the pre-trained model. The file can be either a bert_config.json '
+      'file or `encoders.EncoderConfig` in yaml file.')
+
+  flags.DEFINE_string(
+      'hub_module_url', '', 'TF-Hub path/url to a pretrained model. If '
+      'specified, `init_checkpoint` and `model_config_file` flag should not be '
+      'used.')
+
+  flags.DEFINE_multi_string('gin_file', None,
+                            'List of paths to the gin config files.')
+
+  flags.DEFINE_multi_string('gin_params', None,
+                            'Newline separated list of gin parameter bindings.')
+
+  flags.DEFINE_multi_string(
+      'config_file', None, 'This is the advanced usage to specify the '
+      '`ExperimentConfig` directly. When specified, '
+      'we will ignore FLAGS related to `ExperimentConfig` such as '
+      '`train_input_path`, `validation_input_path` and following hparams.')
+
+  # ===========================================================================
+  # Tuning hparams.
+  # ===========================================================================
+  flags.DEFINE_integer('global_batch_size', 32,
+                       'Global batch size for train/eval/predict.')
+
+  flags.DEFINE_float('learning_rate', 3e-5, 'Initial learning rate.')
+
+  flags.DEFINE_integer('num_epoch', 3, 'Number of training epochs.')
+
+  flags.DEFINE_float('warmup_ratio', 0.1,
+                     'Proportion of learning rate warmup steps.')
+
+  flags.DEFINE_integer('num_eval_per_epoch', 2,
+                       'Number of evaluations to run per epoch.')
+
+
+def validate_flags(flags_obj: flags.FlagValues,
+                   file_exists_fn: Callable[[str], bool]):
+  """Raises ValueError if any flags are misconfigured.
+
+  Args:
+    flags_obj: A `flags.FlagValues` object, usually from `flags.FLAG`.
+    file_exists_fn: A callable to decide if a file path exists or not.
+  """
+
+  def _check_path_exists(flag_path, flag_name):
+    if not file_exists_fn(flag_path):
+      raise ValueError('Flag `%s` at %s does not exist.' %
+                       (flag_name, flag_path))
+
+  def _validate_path(flag_path, flag_name):
+    if not flag_path:
+      raise ValueError('Flag `%s` must be provided in mode %s.' %
+                       (flag_name, flags_obj.mode))
+    _check_path_exists(flag_path, flag_name)
+
+  if 'train' in flags_obj.mode:
+    _validate_path(flags_obj.train_input_path, 'train_input_path')
+    _validate_path(flags_obj.input_meta_data_path, 'input_meta_data_path')
+
+    if flags_obj.gin_file:
+      for gin_file in flags_obj.gin_file:
+        _check_path_exists(gin_file, 'gin_file')
+    if flags_obj.config_file:
+      for config_file in flags_obj.config_file:
+        _check_path_exists(config_file, 'config_file')
+
+  if 'eval' in flags_obj.mode:
+    _validate_path(flags_obj.validation_input_path, 'validation_input_path')
+
+  if flags_obj.mode == 'predict':
+    # model_dir is only needed strictly in 'predict' mode.
+    _validate_path(flags_obj.model_dir, 'model_dir')
+
+  if 'predict' in flags_obj.mode:
+    _validate_path(flags_obj.test_input_path, 'test_input_path')
+
+  if not flags_obj.config_file and flags_obj.mode != 'predict':
+    if flags_obj.hub_module_url:
+      if flags_obj.init_checkpoint or flags_obj.model_config_file:
+        raise ValueError(
+            'When `hub_module_url` is specified, `init_checkpoint` and '
+            '`model_config_file` should be empty.')
+      logging.info(
+          'Using the pretrained tf.hub from %s', flags_obj.hub_module_url)
+    else:
+      if not (flags_obj.init_checkpoint and flags_obj.model_config_file):
+        raise ValueError('Both `init_checkpoint` and `model_config_file` '
+                         'should be specified if `config_file` is not '
+                         'specified.')
+      _validate_path(flags_obj.model_config_file, 'model_config_file')
+      logging.info(
+          'Using the pretrained checkpoint from %s and model_config_file from '
+          '%s.', flags_obj.init_checkpoint, flags_obj.model_config_file)
--- a/official/nlp/finetuning/glue/run_glue.py
+++ b/official/nlp/finetuning/glue/run_glue.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Runs prediction to generate submission files for GLUE tasks."""
+import functools
+import json
+import os
+import pprint
+
+from absl import app
+from absl import flags
+from absl import logging
+
+import gin
+import tensorflow as tf
+
+from official.common import distribute_utils
+# Imports registered experiment configs.
+from official.common import registry_imports  # pylint: disable=unused-import
+from official.core import exp_factory
+from official.core import task_factory
+from official.core import train_lib
+from official.core import train_utils
+from official.modeling.hyperparams import params_dict
+from official.nlp.finetuning import binary_helper
+from official.nlp.finetuning.glue import flags as glue_flags
+
+
+# Device configs.
+flags.DEFINE_string('distribution_strategy', 'tpu',
+                    'The Distribution Strategy to use for training.')
+flags.DEFINE_string(
+    'tpu', '',
+    'The Cloud TPU to use for training. This should be either the name '
+    'used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 url.')
+flags.DEFINE_integer('num_gpus', 1, 'The number of GPUs to use at each worker.')
+
+FLAGS = flags.FLAGS
+
+EXPERIMENT_TYPE = 'bert/sentence_prediction'
+BEST_CHECKPOINT_EXPORT_SUBDIR = 'best_ckpt'
+
+EVAL_METRIC_MAP = {
+    'AX': 'matthews_corrcoef',
+    'COLA': 'matthews_corrcoef',
+    'MNLI': 'cls_accuracy',
+    'MRPC': 'cls_accuracy',
+    'QNLI': 'cls_accuracy',
+    'QQP': 'cls_accuracy',
+    'RTE': 'cls_accuracy',
+    'SST-2': 'cls_accuracy',
+    'STS-B': 'pearson_spearman_corr',
+    'WNLI': 'cls_accuracy',
+}
+
+AX_CLASS_NAMES = ['contradiction', 'entailment', 'neutral']
+COLA_CLASS_NAMES = ['0', '1']
+MNLI_CLASS_NAMES = ['contradiction', 'entailment', 'neutral']
+MRPC_CLASS_NAMES = ['0', '1']
+QNLI_CLASS_NAMES = ['entailment', 'not_entailment']
+QQP_CLASS_NAMES = ['0', '1']
+RTE_CLASS_NAMES = ['entailment', 'not_entailment']
+SST_2_CLASS_NAMES = ['0', '1']
+WNLI_CLASS_NAMES = ['0', '1']
+
+
+def _override_exp_config_by_file(exp_config, exp_config_files):
+  """Overrides an `ExperimentConfig` object by files."""
+  for exp_config_file in exp_config_files:
+    if not tf.io.gfile.exists(exp_config_file):
+      raise ValueError('%s does not exist.' % exp_config_file)
+    params_dict.override_params_dict(
+        exp_config, exp_config_file, is_strict=True)
+
+  return exp_config
+
+
+def _override_exp_config_by_flags(exp_config, input_meta_data):
+  """Overrides an `ExperimentConfig` object by flags."""
+  if FLAGS.task_name in ('AX', 'COLA',):
+    override_task_cfg_fn = functools.partial(
+        binary_helper.override_sentence_prediction_task_config,
+        num_classes=input_meta_data['num_labels'],
+        metric_type='matthews_corrcoef')
+  elif FLAGS.task_name in ('MNLI', 'MRPC', 'QNLI', 'QQP', 'RTE', 'SST-2',
+                           'WNLI'):
+    override_task_cfg_fn = functools.partial(
+        binary_helper.override_sentence_prediction_task_config,
+        num_classes=input_meta_data['num_labels'])
+  elif FLAGS.task_name in ('STS-B',):
+    override_task_cfg_fn = functools.partial(
+        binary_helper.override_sentence_prediction_task_config,
+        num_classes=1,
+        metric_type='pearson_spearman_corr',
+        label_type='float')
+  else:
+    raise ValueError('Task %s not supported.' % FLAGS.task_name)
+
+  binary_helper.override_trainer_cfg(
+      exp_config.trainer,
+      learning_rate=FLAGS.learning_rate,
+      num_epoch=FLAGS.num_epoch,
+      global_batch_size=FLAGS.global_batch_size,
+      warmup_ratio=FLAGS.warmup_ratio,
+      training_data_size=input_meta_data['train_data_size'],
+      eval_data_size=input_meta_data['eval_data_size'],
+      num_eval_per_epoch=FLAGS.num_eval_per_epoch,
+      best_checkpoint_export_subdir=BEST_CHECKPOINT_EXPORT_SUBDIR,
+      best_checkpoint_eval_metric=EVAL_METRIC_MAP[FLAGS.task_name],
+      best_checkpoint_metric_comp='higher')
+
+  override_task_cfg_fn(
+      exp_config.task,
+      model_config_file=FLAGS.model_config_file,
+      init_checkpoint=FLAGS.init_checkpoint,
+      hub_module_url=FLAGS.hub_module_url,
+      global_batch_size=FLAGS.global_batch_size,
+      train_input_path=FLAGS.train_input_path,
+      validation_input_path=FLAGS.validation_input_path,
+      seq_length=input_meta_data['max_seq_length'])
+  return exp_config
+
+
+def _get_exp_config(input_meta_data, exp_config_files):
+  """Gets an `ExperimentConfig` object."""
+  exp_config = exp_factory.get_exp_config(EXPERIMENT_TYPE)
+
+  if exp_config_files:
+    logging.info(
+        'Loading `ExperimentConfig` from file, and flags will be ignored.')
+    exp_config = _override_exp_config_by_file(exp_config, exp_config_files)
+  else:
+    logging.info('Loading `ExperimentConfig` from flags.')
+    exp_config = _override_exp_config_by_flags(exp_config, input_meta_data)
+
+  exp_config.validate()
+  exp_config.lock()
+
+  pp = pprint.PrettyPrinter()
+  logging.info('Final experiment parameters: %s',
+               pp.pformat(exp_config.as_dict()))
+
+  return exp_config
+
+
+def _write_submission_file(task, seq_length):
+  """Writes submission files that can be uploaded to the leaderboard."""
+  tf.io.gfile.makedirs(os.path.dirname(FLAGS.test_output_path))
+  model = task.build_model()
+
+  ckpt_file = tf.train.latest_checkpoint(
+      os.path.join(FLAGS.model_dir, BEST_CHECKPOINT_EXPORT_SUBDIR))
+  logging.info('Restoring checkpoints from %s', ckpt_file)
+  checkpoint = tf.train.Checkpoint(model=model)
+  checkpoint.read(ckpt_file).expect_partial()
+
+  write_fn = binary_helper.write_glue_classification
+  write_fn_map = {
+      'AX':
+          functools.partial(
+              write_fn, class_names=AX_CLASS_NAMES),
+      'COLA':
+          functools.partial(
+              write_fn, class_names=COLA_CLASS_NAMES),
+      'MNLI':
+          functools.partial(
+              write_fn, class_names=MNLI_CLASS_NAMES),
+      'MRPC':
+          functools.partial(
+              write_fn, class_names=MRPC_CLASS_NAMES),
+      'QNLI':
+          functools.partial(
+              write_fn, class_names=QNLI_CLASS_NAMES),
+      'QQP':
+          functools.partial(
+              write_fn, class_names=QQP_CLASS_NAMES),
+      'RTE':
+          functools.partial(
+              write_fn, class_names=RTE_CLASS_NAMES),
+      'SST-2':
+          functools.partial(
+              write_fn, class_names=SST_2_CLASS_NAMES),
+      'STS-B':
+          # No class_names (regression), clip predictions to [0.0, 5.0] per glue
+          # benchmark grader.
+          functools.partial(
+              write_fn, class_names=None, label_type='float',
+              min_float_value=0.0, max_float_value=5.0),
+      'WNLI':
+          functools.partial(
+              write_fn, class_names=WNLI_CLASS_NAMES),
+  }
+  logging.info('Predicting %s', FLAGS.test_input_path)
+  write_fn_map[FLAGS.task_name](
+      task=task,
+      model=model,
+      input_file=FLAGS.test_input_path,
+      output_file=FLAGS.test_output_path,
+      predict_batch_size=(
+          task.task_config.train_data.global_batch_size),
+      seq_length=seq_length)
+
+
+def main(argv):
+  if len(argv) > 1:
+    raise app.UsageError('Too many command-line arguments.')
+
+  glue_flags.validate_flags(FLAGS, file_exists_fn=tf.io.gfile.exists)
+
+  gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params)
+  distribution_strategy = distribute_utils.get_distribution_strategy(
+      distribution_strategy=FLAGS.distribution_strategy,
+      num_gpus=FLAGS.num_gpus,
+      tpu_address=FLAGS.tpu)
+
+  with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader:
+    input_meta_data = json.loads(reader.read().decode('utf-8'))
+
+  with distribution_strategy.scope():
+    task = None
+    if 'train_eval' in FLAGS.mode:
+      logging.info('Starting training and eval...')
+      logging.info('Model dir: %s', FLAGS.model_dir)
+
+      exp_config = _get_exp_config(
+          input_meta_data=input_meta_data,
+          exp_config_files=FLAGS.config_file)
+      train_utils.serialize_config(exp_config, FLAGS.model_dir)
+      task = task_factory.get_task(exp_config.task, logging_dir=FLAGS.model_dir)
+      train_lib.run_experiment(
+          distribution_strategy=distribution_strategy,
+          task=task,
+          mode='train_and_eval',
+          params=exp_config,
+          model_dir=FLAGS.model_dir)
+
+    if 'predict' in FLAGS.mode:
+      logging.info('Starting predict...')
+      # When mode is `predict`, `task` will be None.
+      if task is None:
+        exp_config = _get_exp_config(
+            input_meta_data=input_meta_data,
+            exp_config_files=[os.path.join(FLAGS.model_dir, 'params.yaml')])
+        task = task_factory.get_task(
+            exp_config.task, logging_dir=FLAGS.model_dir)
+      _write_submission_file(task, input_meta_data['max_seq_length'])
+
+
+if __name__ == '__main__':
+  glue_flags.define_flags()
+  flags.mark_flag_as_required('mode')
+  flags.mark_flag_as_required('task_name')
+  app.run(main)
--- a/official/nlp/keras_nlp/__init__.py
+++ b/official/nlp/keras_nlp/__init__.py
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """Keras-NLP package definition."""
 # pylint: disable=wildcard-import
 from official.nlp.keras_nlp import encoders

--- a/official/nlp/keras_nlp/encoders/__init__.py
+++ b/official/nlp/keras_nlp/encoders/__init__.py
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,6 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """Keras-NLP layers package definition."""
 from official.nlp.keras_nlp.encoders.bert_encoder import BertEncoder
--- a/official/nlp/keras_nlp/encoders/bert_encoder.py
+++ b/official/nlp/keras_nlp/encoders/bert_encoder.py
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """Bert encoder network."""
 # pylint: disable=g-classes-have-attributes

@@ -60,7 +60,7 @@ class BertEncoder(tf.keras.Model):
    initializer: The initialzer to use for all weights in this encoder.
    output_range: The sequence output range, [0, output_range), by slicing the
      target sequence of the last transformer layer. `None` means the entire
-      target sequence will attend to the source sequence, which yeilds the full
+      target sequence will attend to the source sequence, which yields the full
      output.
    embedding_width: The width of the word embeddings. If the embedding width is
      not equal to hidden size, embedding parameters will be factorized into two

--- a/official/nlp/keras_nlp/encoders/bert_encoder_test.py
+++ b/official/nlp/keras_nlp/encoders/bert_encoder_test.py
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """Tests for transformer-based bert encoder network."""

 from absl.testing import parameterized

--- a/official/nlp/keras_nlp/layers/__init__.py
+++ b/official/nlp/keras_nlp/layers/__init__.py
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """Keras-NLP layers package definition."""
 from official.nlp.keras_nlp.layers.masked_lm import MaskedLM
 from official.nlp.keras_nlp.layers.on_device_embedding import OnDeviceEmbedding

--- a/official/nlp/keras_nlp/layers/masked_lm.py
+++ b/official/nlp/keras_nlp/layers/masked_lm.py
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """Masked language model network."""
 # pylint: disable=g-classes-have-attributes
 import tensorflow as tf
@@ -94,12 +94,12 @@ class MaskedLM(tf.keras.layers.Layer):
                              'it has variable sharing logic.')

  def _gather_indexes(self, sequence_tensor, positions):
-    """Gathers the vectors at the specific positions.
+    """Gathers the vectors at the specific positions, for performance.

    Args:
-        sequence_tensor: Sequence output of `BertModel` layer of shape
+        sequence_tensor: Sequence output of shape
          (`batch_size`, `seq_length`, num_hidden) where num_hidden is number of
-          hidden units of `BertModel` layer.
+          hidden units.
        positions: Positions ids of tokens in sequence to mask for pretraining
          of with dimension (batch_size, num_predictions) where
          `num_predictions` is maximum number of tokens to mask out and predict

--- a/official/nlp/keras_nlp/layers/on_device_embedding.py
+++ b/official/nlp/keras_nlp/layers/on_device_embedding.py
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """Keras-based one-hot embedding layer."""
 # pylint: disable=g-classes-have-attributes


--- a/official/nlp/keras_nlp/layers/on_device_embedding_test.py
+++ b/official/nlp/keras_nlp/layers/on_device_embedding_test.py
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """Tests for Keras-based one-hot embedding layer."""

 import numpy as np

--- a/official/nlp/keras_nlp/layers/position_embedding.py
+++ b/official/nlp/keras_nlp/layers/position_embedding.py
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """Keras-based positional embedding layer."""
 # pylint: disable=g-classes-have-attributes
 import tensorflow as tf

--- a/official/nlp/keras_nlp/layers/position_embedding_test.py
+++ b/official/nlp/keras_nlp/layers/position_embedding_test.py
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """Tests for Keras-based positional embedding layer."""

 import numpy as np

--- a/official/nlp/keras_nlp/layers/self_attention_mask.py
+++ b/official/nlp/keras_nlp/layers/self_attention_mask.py
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """Keras layer that creates a self-attention mask."""

 import tensorflow as tf

--- a/official/nlp/keras_nlp/layers/transformer_encoder_block.py
+++ b/official/nlp/keras_nlp/layers/transformer_encoder_block.py
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """Keras-based TransformerEncoder block layer."""

 import tensorflow as tf

--- a/official/nlp/keras_nlp/layers/transformer_encoder_block_test.py
+++ b/official/nlp/keras_nlp/layers/transformer_encoder_block_test.py
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
+
 """Tests for Keras-based transformer block layer."""

 from absl.testing import parameterized