Merge pull request #10009 from supersteph:run_superglue

PiperOrigin-RevId: 375508114

Merge pull request #10009 from supersteph:run_superglue
PiperOrigin-RevId: 375508114
589fe5d1 · A. Unique TensorFlower · 2ad3e213 · 93cdbaf5 · 589fe5d1 · 589fe5d1
Commit 589fe5d1 authored May 24, 2021 by A. Unique TensorFlower
3 changed files
--- a/official/nlp/finetuning/binary_helper.py
+++ b/official/nlp/finetuning/binary_helper.py
@@ -310,6 +310,48 @@ def write_glue_classification(task,
        writer.write('%d\t%s\n' % (index, class_names[prediction]))


+def write_superglue_classification(task,
+                                   model,
+                                   input_file,
+                                   output_file,
+                                   predict_batch_size,
+                                   seq_length,
+                                   class_names,
+                                   label_type='int'):
+  """Makes classification predictions for superglue and writes to output file.
+
+  Args:
+    task: `Task` instance.
+    model: `keras.Model` instance.
+    input_file: Input test data file path.
+    output_file: Output test data file path.
+    predict_batch_size: Batch size for prediction.
+    seq_length: Input sequence length.
+    class_names: List of string class names.
+    label_type: String denoting label type ('int', 'float'), defaults to 'int'.
+  """
+  if label_type not in 'int':
+    raise ValueError('Unsupported `label_type`. Given: %s, expected `int` or '
+                     '`float`.' % label_type)
+
+  data_config = sentence_prediction_dataloader.SentencePredictionDataConfig(
+      input_path=input_file,
+      global_batch_size=predict_batch_size,
+      is_training=False,
+      seq_length=seq_length,
+      label_type=label_type,
+      drop_remainder=False,
+      include_example_id=True)
+  predictions = sentence_prediction.predict(task, data_config, model)
+
+  with tf.io.gfile.GFile(output_file, 'w') as writer:
+    for index, prediction in enumerate(predictions):
+      if label_type == 'int':
+        # Classification.
+        writer.write('{"idx": %d, "label": %s}\n' %
+                     (index, class_names[prediction]))
+
+
 def write_xtreme_classification(task,
                                model,
                                input_file,

--- a/official/nlp/finetuning/superglue/flags.py
+++ b/official/nlp/finetuning/superglue/flags.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Common flags for SuperGLUE finetuning binary."""
+from typing import Callable
+
+from absl import flags
+from absl import logging
+
+
+def define_flags():
+  """Defines flags."""
+
+  # ===========================================================================
+  # SuperGlue binary flags.
+  # ===========================================================================
+  flags.DEFINE_enum(
+      'mode', 'train_eval_and_predict',
+      ['train_eval_and_predict', 'train_eval', 'predict'],
+      'The mode to run the binary. If `train_eval_and_predict` '
+      'it will (1) train on the training data and (2) evaluate on '
+      'the validation data and (3) finally generate predictions '
+      'on the prediction data; if `train_eval`, it will only '
+      'run training and evaluation; if `predict`, it will only '
+      'run prediction using the model in `model_dir`.')
+
+  flags.DEFINE_enum('task_name', None, [
+      'AX-b',
+      'CB',
+      'COPA',
+      'MULTIRC',
+      'RTE',
+      'WiC',
+      'WSC',
+      'BoolQ',
+      'ReCoRD',
+      'AX-g',
+  ], 'The type of SuperGLUE task.')
+
+  flags.DEFINE_string('train_input_path', None,
+                      'The file path to the training data.')
+
+  flags.DEFINE_string('validation_input_path', None,
+                      'The file path to the evaluation data.')
+
+  flags.DEFINE_string('test_input_path', None,
+                      'The file path to the test input data.')
+
+  flags.DEFINE_string('test_output_path', None,
+                      'The file path to the test output data.')
+
+  flags.DEFINE_string(
+      'model_dir', '', 'The model directory containing '
+      'subdirectories for each task. Only needed for "predict" '
+      'mode. For all other modes, if not provided, a unique '
+      'directory will be created automatically for each run.')
+
+  flags.DEFINE_string(
+      'input_meta_data_path', None, 'Path to file that contains '
+      'metadata about input file. It is output by the `create_finetuning_data` '
+      'binary. Required for all modes except "predict".')
+
+  flags.DEFINE_string('init_checkpoint', '',
+                      'Initial checkpoint from a pre-trained BERT model.')
+
+  flags.DEFINE_string(
+      'model_config_file', '', 'The config file specifying the architecture '
+      'of the pre-trained model. The file can be either a bert_config.json '
+      'file or `encoders.EncoderConfig` in yaml file.')
+
+  flags.DEFINE_string(
+      'hub_module_url', '', 'TF-Hub path/url to a pretrained model. If '
+      'specified, `init_checkpoint` and `model_config_file` flag should not be '
+      'used.')
+
+  flags.DEFINE_multi_string('gin_file', None,
+                            'List of paths to the gin config files.')
+
+  flags.DEFINE_multi_string(
+      'gin_params', None, 'Newline separated list of gin parameter bindings.')
+
+  flags.DEFINE_multi_string(
+      'config_file', None, 'This is the advanced usage to specify the '
+      '`ExperimentConfig` directly. When specified, '
+      'we will ignore FLAGS related to `ExperimentConfig` such as '
+      '`train_input_path`, `validation_input_path` and following hparams.')
+
+  # ===========================================================================
+  # Tuning hparams.
+  # ===========================================================================
+  flags.DEFINE_integer('global_batch_size', 32,
+                       'Global batch size for train/eval/predict.')
+
+  flags.DEFINE_float('learning_rate', 3e-5, 'Initial learning rate.')
+
+  flags.DEFINE_integer('num_epoch', 3, 'Number of training epochs.')
+
+  flags.DEFINE_float('warmup_ratio', 0.1,
+                     'Proportion of learning rate warmup steps.')
+
+  flags.DEFINE_integer('num_eval_per_epoch', 2,
+                       'Number of evaluations to run per epoch.')
+
+
+def validate_flags(flags_obj: flags.FlagValues, file_exists_fn: Callable[[str],
+                                                                         bool]):
+  """Raises ValueError if any flags are misconfigured.
+
+  Args:
+    flags_obj: A `flags.FlagValues` object, usually from `flags.FLAG`.
+    file_exists_fn: A callable to decide if a file path exists or not.
+  """
+
+  def _check_path_exists(flag_path, flag_name):
+    if not file_exists_fn(flag_path):
+      raise ValueError('Flag `%s` at %s does not exist.' %
+                       (flag_name, flag_path))
+
+  def _validate_path(flag_path, flag_name):
+    if not flag_path:
+      raise ValueError('Flag `%s` must be provided in mode %s.' %
+                       (flag_name, flags_obj.mode))
+    _check_path_exists(flag_path, flag_name)
+
+  if 'train' in flags_obj.mode:
+    _validate_path(flags_obj.train_input_path, 'train_input_path')
+    _validate_path(flags_obj.input_meta_data_path, 'input_meta_data_path')
+
+    if flags_obj.gin_file:
+      for gin_file in flags_obj.gin_file:
+        _check_path_exists(gin_file, 'gin_file')
+    if flags_obj.config_file:
+      for config_file in flags_obj.config_file:
+        _check_path_exists(config_file, 'config_file')
+
+  if 'eval' in flags_obj.mode:
+    _validate_path(flags_obj.validation_input_path, 'validation_input_path')
+
+  if flags_obj.mode == 'predict':
+    # model_dir is only needed strictly in 'predict' mode.
+    _validate_path(flags_obj.model_dir, 'model_dir')
+
+  if 'predict' in flags_obj.mode:
+    _validate_path(flags_obj.test_input_path, 'test_input_path')
+
+  if not flags_obj.config_file and flags_obj.mode != 'predict':
+    if flags_obj.hub_module_url:
+      if flags_obj.init_checkpoint or flags_obj.model_config_file:
+        raise ValueError(
+            'When `hub_module_url` is specified, `init_checkpoint` and '
+            '`model_config_file` should be empty.')
+      logging.info('Using the pretrained tf.hub from %s',
+                   flags_obj.hub_module_url)
+    else:
+      if not (flags_obj.init_checkpoint and flags_obj.model_config_file):
+        raise ValueError('Both `init_checkpoint` and `model_config_file` '
+                         'should be specified if `config_file` is not '
+                         'specified.')
+      _validate_path(flags_obj.model_config_file, 'model_config_file')
+      logging.info(
+          'Using the pretrained checkpoint from %s and model_config_file from '
+          '%s.', flags_obj.init_checkpoint, flags_obj.model_config_file)
--- a/official/nlp/finetuning/superglue/run_superglue.py
+++ b/official/nlp/finetuning/superglue/run_superglue.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Runs prediction to generate submission files for SuperGLUE tasks."""
+import functools
+import json
+import os
+import pprint
+
+from absl import app
+from absl import flags
+from absl import logging
+
+import gin
+import tensorflow as tf
+
+from official.common import distribute_utils
+# Imports registered experiment configs.
+from official.core import exp_factory
+from official.core import task_factory
+from official.core import train_lib
+from official.core import train_utils
+from official.modeling.hyperparams import params_dict
+from official.nlp.finetuning import binary_helper
+from official.nlp.finetuning.superglue import flags as superglue_flags
+
+# Device configs.
+flags.DEFINE_string('distribution_strategy', 'tpu',
+                    'The Distribution Strategy to use for training.')
+flags.DEFINE_string(
+    'tpu', '',
+    'The Cloud TPU to use for training. This should be either the name '
+    'used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 url.')
+flags.DEFINE_integer('num_gpus', 1, 'The number of GPUs to use at each worker.')
+
+FLAGS = flags.FLAGS
+
+EXPERIMENT_TYPE = 'bert/sentence_prediction'
+BEST_CHECKPOINT_EXPORT_SUBDIR = 'best_ckpt'
+
+EVAL_METRIC_MAP = {
+    'AX-b': 'matthews_corrcoef',
+    'CB': 'cls_accuracy',
+    'COPA': 'cls_accuracy',
+    'MULTIRC': 'exact_match',
+    'RTE': 'cls_accuracy',
+    'WiC': 'cls_accuracy',
+    'WSC': 'cls_accuracy',
+    'BoolQ': 'cls_accuracy',
+    'ReCoRD': 'cls_accuracy',
+    'AX-g': 'cls_accuracy',
+}
+
+AXG_CLASS_NAMES = ['entailment', 'not_entailment']
+RTE_CLASS_NAMES = ['entailment', 'not_entailment']
+
+
+def _override_exp_config_by_file(exp_config, exp_config_files):
+  """Overrides an `ExperimentConfig` object by files."""
+  for exp_config_file in exp_config_files:
+    if not tf.io.gfile.exists(exp_config_file):
+      raise ValueError('%s does not exist.' % exp_config_file)
+    params_dict.override_params_dict(
+        exp_config, exp_config_file, is_strict=True)
+
+  return exp_config
+
+
+def _override_exp_config_by_flags(exp_config, input_meta_data):
+  """Overrides an `ExperimentConfig` object by flags."""
+  if FLAGS.task_name in 'AX-b':
+    override_task_cfg_fn = functools.partial(
+        binary_helper.override_sentence_prediction_task_config,
+        num_classes=input_meta_data['num_labels'],
+        metric_type='matthews_corrcoef')
+  elif FLAGS.task_name in ('CB', 'COPA', 'RTE', 'WiC', 'WSC', 'BoolQ', 'ReCoRD',
+                           'AX-g'):
+    override_task_cfg_fn = functools.partial(
+        binary_helper.override_sentence_prediction_task_config,
+        num_classes=input_meta_data['num_labels'])
+  else:
+    raise ValueError('Task %s not supported.' % FLAGS.task_name)
+
+  binary_helper.override_trainer_cfg(
+      exp_config.trainer,
+      learning_rate=FLAGS.learning_rate,
+      num_epoch=FLAGS.num_epoch,
+      global_batch_size=FLAGS.global_batch_size,
+      warmup_ratio=FLAGS.warmup_ratio,
+      training_data_size=input_meta_data['train_data_size'],
+      eval_data_size=input_meta_data['eval_data_size'],
+      num_eval_per_epoch=FLAGS.num_eval_per_epoch,
+      best_checkpoint_export_subdir=BEST_CHECKPOINT_EXPORT_SUBDIR,
+      best_checkpoint_eval_metric=EVAL_METRIC_MAP[FLAGS.task_name],
+      best_checkpoint_metric_comp='higher')
+
+  override_task_cfg_fn(
+      exp_config.task,
+      model_config_file=FLAGS.model_config_file,
+      init_checkpoint=FLAGS.init_checkpoint,
+      hub_module_url=FLAGS.hub_module_url,
+      global_batch_size=FLAGS.global_batch_size,
+      train_input_path=FLAGS.train_input_path,
+      validation_input_path=FLAGS.validation_input_path,
+      seq_length=input_meta_data['max_seq_length'])
+  return exp_config
+
+
+def _get_exp_config(input_meta_data, exp_config_files):
+  """Gets an `ExperimentConfig` object."""
+  exp_config = exp_factory.get_exp_config(EXPERIMENT_TYPE)
+
+  if exp_config_files:
+    logging.info(
+        'Loading `ExperimentConfig` from file, and flags will be ignored.')
+    exp_config = _override_exp_config_by_file(exp_config, exp_config_files)
+  else:
+    logging.info('Loading `ExperimentConfig` from flags.')
+    exp_config = _override_exp_config_by_flags(exp_config, input_meta_data)
+
+  exp_config.validate()
+  exp_config.lock()
+
+  pp = pprint.PrettyPrinter()
+  logging.info('Final experiment parameters: %s',
+               pp.pformat(exp_config.as_dict()))
+
+  return exp_config
+
+
+def _write_submission_file(task, seq_length):
+  """Writes submission files that can be uploaded to the leaderboard."""
+  tf.io.gfile.makedirs(os.path.dirname(FLAGS.test_output_path))
+  model = task.build_model()
+
+  ckpt_file = tf.train.latest_checkpoint(
+      os.path.join(FLAGS.model_dir, BEST_CHECKPOINT_EXPORT_SUBDIR))
+  logging.info('Restoring checkpoints from %s', ckpt_file)
+  checkpoint = tf.train.Checkpoint(model=model)
+  checkpoint.read(ckpt_file).expect_partial()
+
+  write_fn = binary_helper.write_superglue_classification
+  write_fn_map = {
+      'RTE': functools.partial(write_fn, class_names=RTE_CLASS_NAMES),
+      'AX-g': functools.partial(write_fn, class_names=AXG_CLASS_NAMES)
+  }
+  logging.info('Predicting %s', FLAGS.test_input_path)
+  write_fn_map[FLAGS.task_name](
+      task=task,
+      model=model,
+      input_file=FLAGS.test_input_path,
+      output_file=FLAGS.test_output_path,
+      predict_batch_size=(task.task_config.train_data.global_batch_size),
+      seq_length=seq_length)
+
+
+def main(argv):
+  if len(argv) > 1:
+    raise app.UsageError('Too many command-line arguments.')
+
+  superglue_flags.validate_flags(FLAGS, file_exists_fn=tf.io.gfile.exists)
+
+  gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params)
+  distribution_strategy = distribute_utils.get_distribution_strategy(
+      distribution_strategy=FLAGS.distribution_strategy,
+      num_gpus=FLAGS.num_gpus,
+      tpu_address=FLAGS.tpu)
+
+  with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader:
+    input_meta_data = json.loads(reader.read().decode('utf-8'))
+
+  with distribution_strategy.scope():
+    task = None
+    if 'train_eval' in FLAGS.mode:
+      logging.info('Starting training and eval...')
+      logging.info('Model dir: %s', FLAGS.model_dir)
+
+      exp_config = _get_exp_config(
+          input_meta_data=input_meta_data, exp_config_files=FLAGS.config_file)
+      train_utils.serialize_config(exp_config, FLAGS.model_dir)
+      task = task_factory.get_task(exp_config.task, logging_dir=FLAGS.model_dir)
+      train_lib.run_experiment(
+          distribution_strategy=distribution_strategy,
+          task=task,
+          mode='train_and_eval',
+          params=exp_config,
+          model_dir=FLAGS.model_dir)
+
+    if 'predict' in FLAGS.mode:
+      logging.info('Starting predict...')
+      # When mode is `predict`, `task` will be None.
+      if task is None:
+        exp_config = _get_exp_config(
+            input_meta_data=input_meta_data,
+            exp_config_files=[os.path.join(FLAGS.model_dir, 'params.yaml')])
+        task = task_factory.get_task(
+            exp_config.task, logging_dir=FLAGS.model_dir)
+      _write_submission_file(task, input_meta_data['max_seq_length'])
+
+
+if __name__ == '__main__':
+  superglue_flags.define_flags()
+  flags.mark_flag_as_required('mode')
+  flags.mark_flag_as_required('task_name')
+  app.run(main)