Keras model benchmark (#4476)

* Add callbacks * Add readme * update readme * fix some comments * Address all comments * Update docstrings * Add method docstrings * Update callbacks * Add comments on global_step initialization * Some updates * Address comments

Keras model benchmark (#4476)
* Add callbacks * Add readme * update readme * fix some comments * Address all comments * Update docstrings * Add method docstrings * Update callbacks * Add comments on global_step initialization * Some updates * Address comments
937a530a · Yanhui Liang · GitHub · 7d0fcd09 · 937a530a · 937a530a
Unverified Commit 937a530a authored Jul 13, 2018 by Yanhui Liang Committed by GitHub Jul 13, 2018
5 changed files
--- a/official/keras_application_models/README.md
+++ b/official/keras_application_models/README.md
+# Keras Application Models Benchmark
+## Overview
+This provides a single scaffold to benchmark the Keras built-in application [models](https://keras.io/applications/). All the models are for image classification applications, and include:
+
+ - Xception
+ - VGG16
+ - VGG19
+ - ResNet50
+ - InceptionV3
+ - InceptionResNetV2
+ - MobileNet
+ - DenseNet
+ - NASNet
+
+## Dataset
+Synthetic dataset is used for the benchmark.
+
+## Callbacks
+Two custom callbacks are provided for model benchmarking: ExamplesPerSecondCallback and LoggingMetricCallback. For each callback, `epoch_based` and `batch_based` options are available to set the benchmark level. Check [model_callbacks.py](model_callbacks.py) for more details.
+
+## Running Code
+To benchmark a model, use `--model` to specify the model name, and issue the following command:
+```
+python benchmark_main.py --model=resnet
+```
+Arguments:
+  * `--model`: Which model to be benchmarked. The model name is defined as the keys of `MODELS` in [benchmark_main.py](benchmark_main.py).
+  * `--callbacks`: To specify a list of callbacks.
+
+Use the `--help` or `-h` flag to get a full list of possible arguments.
--- a/official/keras_application_models/__init__.py
+++ b/official/keras_application_models/__init__.py
--- a/official/keras_application_models/benchmark_main.py
+++ b/official/keras_application_models/benchmark_main.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Benchmark on the keras built-in application models."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# pylint: disable=g-bad-import-order
+import numpy as np
+from absl import app as absl_app
+from absl import flags
+import tensorflow as tf
+# pylint: enable=g-bad-import-order
+
+from official.keras_application_models import dataset
+from official.keras_application_models import model_callbacks
+from official.utils.flags import core as flags_core
+from official.utils.logs import logger
+
+# Define a dictionary that maps model names to their model classes inside Keras
+MODELS = {
+    "vgg16": tf.keras.applications.VGG16,
+    "vgg19": tf.keras.applications.VGG19,
+    "inceptionv3": tf.keras.applications.InceptionV3,
+    "xception": tf.keras.applications.Xception,
+    "resnet50": tf.keras.applications.ResNet50,
+    "inceptionresnetv2": tf.keras.applications.InceptionResNetV2,
+    "mobilenet": tf.keras.applications.MobileNet,
+    "densenet121": tf.keras.applications.DenseNet121,
+    "densenet169": tf.keras.applications.DenseNet169,
+    "densenet201": tf.keras.applications.DenseNet201,
+    # TODO(b/80431378)
+    # "nasnetlarge": tf.keras.applications.NASNetLarge,
+    # "nasnetmobile": tf.keras.applications.NASNetMobile,
+}
+
+
+def run_keras_model_benchmark(_):
+  """Run the benchmark on keras model."""
+  # Ensure a valid model name was supplied via command line argument
+  if FLAGS.model not in MODELS.keys():
+    raise AssertionError("The --model command line argument should "
+                         "be a key in the `MODELS` dictionary.")
+
+  # Load the model
+  tf.logging.info("Benchmark on {} model...".format(FLAGS.model))
+  keras_model = MODELS[FLAGS.model]
+  model = keras_model(weights=None)
+
+  # Get dataset
+  dataset_name = "ImageNet"
+  if FLAGS.use_synthetic_data:
+    tf.logging.info("Using synthetic dataset...")
+    dataset_name += "_Synthetic"
+    train_num_images = FLAGS.batch_size
+    val_num_images = FLAGS.batch_size
+    train_dataset = dataset.generate_synthetic_input_dataset(
+        FLAGS.model, train_num_images)
+    val_dataset = dataset.generate_synthetic_input_dataset(
+        FLAGS.model, val_num_images)
+  else:
+    raise ValueError("Only synthetic dataset is supported!")
+
+  # If run with multiple GPUs
+  num_gpus = flags_core.get_num_gpus(FLAGS)
+  if num_gpus > 0:
+    model = tf.keras.utils.multi_gpu_model(model, gpus=num_gpus)
+
+  # Configure the model
+  model.compile(loss="categorical_crossentropy",
+                optimizer="sgd",
+                metrics=["accuracy"])
+
+  # Create benchmark logger for benchmark logging
+  run_params = {
+      "batch_size": FLAGS.batch_size,
+      "synthetic_data": FLAGS.use_synthetic_data,
+      "train_epochs": FLAGS.train_epochs
+  }
+
+  benchmark_logger = logger.get_benchmark_logger()
+  benchmark_logger.log_run_info(
+      model_name=FLAGS.model,
+      dataset_name=dataset_name,
+      run_params=run_params,
+      test_id=FLAGS.benchmark_test_id)
+
+  # Create callbacks that log metric values about the training and evaluation
+  callbacks = model_callbacks.get_model_callbacks(
+      FLAGS.callbacks,
+      batch_size=FLAGS.batch_size,
+      metric_logger=benchmark_logger)
+  # Train and evaluate the model
+  history = model.fit(
+      train_dataset,
+      epochs=FLAGS.train_epochs,
+      callbacks=callbacks,
+      validation_data=val_dataset,
+      steps_per_epoch=int(np.ceil(train_num_images / FLAGS.batch_size)),
+      validation_steps=int(np.ceil(val_num_images / FLAGS.batch_size))
+  )
+
+  tf.logging.info("Logging the evaluation results...")
+  for epoch in range(FLAGS.train_epochs):
+    eval_results = {
+        "accuracy": history.history["val_acc"][epoch],
+        "loss": history.history["val_loss"][epoch],
+        tf.GraphKeys.GLOBAL_STEP: (epoch + 1) * np.ceil(
+            train_num_images/FLAGS.batch_size)
+    }
+    benchmark_logger.log_evaluation_result(eval_results)
+
+  # Clear the session explicitly to avoid session delete error
+  tf.keras.backend.clear_session()
+
+
+def define_keras_benchmark_flags():
+  """Add flags for keras built-in application models."""
+  flags_core.define_base(hooks=False)
+  flags_core.define_performance()
+  flags_core.define_image()
+  flags_core.define_benchmark()
+  flags.adopt_module_key_flags(flags_core)
+
+  flags_core.set_defaults(
+      data_format="channels_last",
+      use_synthetic_data=True,
+      batch_size=32,
+      train_epochs=2)
+
+  flags.DEFINE_enum(
+      name="model", default=None,
+      enum_values=MODELS.keys(), case_sensitive=False,
+      help=flags_core.help_wrap(
+          "Model to be benchmarked."))
+
+  flags.DEFINE_list(
+      name="callbacks",
+      default=["ExamplesPerSecondCallback", "LoggingMetricCallback"],
+      help=flags_core.help_wrap(
+          "A list of (case insensitive) strings to specify the names of "
+          "callbacks. For example: `--callbacks ExamplesPerSecondCallback,"
+          "LoggingMetricCallback`"))
+
+
+def main(_):
+  with logger.benchmark_context(FLAGS):
+    run_keras_model_benchmark(FLAGS)
+
+if __name__ == "__main__":
+  tf.logging.set_verbosity(tf.logging.INFO)
+  define_keras_benchmark_flags()
+  FLAGS = flags.FLAGS
+  absl_app.run(main)
--- a/official/keras_application_models/dataset.py
+++ b/official/keras_application_models/dataset.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Prepare dataset for keras model benchmark."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+# Default values for dataset.
+_NUM_CHANNELS = 3
+_NUM_CLASSES = 1000
+
+
+def _get_default_image_size(model):
+  """Provide default image size for each model."""
+  image_size = (224, 224)
+  if model in ["inception", "xception", "inceptionresnet"]:
+    image_size = (299, 299)
+  elif model in ["nasnetlarge"]:
+    image_size = (331, 331)
+  return image_size
+
+
+def generate_synthetic_input_dataset(model, num_imgs):
+  """Generate synthetic dataset."""
+  image_size = _get_default_image_size(model)
+  input_shape = (num_imgs,) + image_size + (_NUM_CHANNELS,)
+
+  images = tf.zeros(input_shape, dtype=tf.float32)
+  labels = tf.zeros((num_imgs, _NUM_CLASSES), dtype=tf.float32)
+
+  return tf.data.Dataset.from_tensors((images, labels)).repeat()
--- a/official/keras_application_models/model_callbacks.py
+++ b/official/keras_application_models/model_callbacks.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Callbacks for Keras built-in application models.
+
+Note that, in the callbacks, the global_step is initialized in the __init__ of
+each callback rather than on_train_begin. As on_train_begin gets called in
+the fit_loop, and it will be reset with each call to fit(). To keep the
+global_step persistent across all training sessions, it should be initialized in
+the __init__.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+
+import tensorflow as tf  # pylint: disable=g-bad-import-order
+
+from official.utils.logs import logger
+
+# Metrics to log after each batch and epoch
+_PER_BATCH_METRICS = {
+    "loss": "train_loss",
+    "acc": "train_accuracy",
+}
+_PER_EPOCH_METRICS = {
+    "loss": "train_loss",
+    "acc": "train_accuracy",
+    "val_loss": "loss",
+    "val_acc": "accuracy"
+}
+
+
+class ExamplesPerSecondCallback(tf.keras.callbacks.Callback):
+  """ExamplesPerSecond callback.
+
+  This callback records the average_examples_per_sec and
+  current_examples_per_sec during training.
+  """
+
+  def __init__(self, batch_size, every_n_steps=1, metric_logger=None):
+    self._batch_size = batch_size
+    self._every_n_steps = every_n_steps
+    self._logger = metric_logger or logger.BaseBenchmarkLogger()
+    self._global_step = 0  # Initialize it in __init__
+    super(ExamplesPerSecondCallback, self).__init__()
+
+  def on_train_begin(self, logs=None):
+    self._train_start_time = time.time()
+    self._last_recorded_time = time.time()
+
+  def on_batch_end(self, batch, logs=None):
+    """Log the examples_per_sec metric every_n_steps."""
+    self._global_step += 1
+    current_time = time.time()
+
+    if self._global_step % self._every_n_steps == 0:
+      average_examples_per_sec = self._batch_size * (
+          self._global_step / (current_time - self._train_start_time))
+      self._logger.log_metric(
+          "average_examples_per_sec", average_examples_per_sec,
+          global_step=self._global_step)
+
+      current_examples_per_sec = self._batch_size * (
+          self._every_n_steps / (current_time - self._last_recorded_time))
+      self._logger.log_metric(
+          "current_examples_per_sec", current_examples_per_sec,
+          global_step=self._global_step)
+      self._last_recorded_time = current_time  # Update last_recorded_time
+
+
+class LoggingMetricCallback(tf.keras.callbacks.Callback):
+  """LoggingMetric callback.
+
+  Log the predefined _PER_BATCH_METRICS after each batch, and log the predefined
+  _PER_EPOCH_METRICS after each epoch.
+  """
+
+  def __init__(self, metric_logger=None):
+    self._logger = metric_logger or logger.BaseBenchmarkLogger()
+    self._per_batch_metrics = _PER_BATCH_METRICS
+    self._per_epoch_metrics = _PER_EPOCH_METRICS
+    self._global_step = 0  # Initialize it in __init__
+    super(LoggingMetricCallback, self).__init__()
+
+  def on_batch_end(self, batch, logs=None):
+    """Log metrics after each batch."""
+    self._global_step += 1
+    for metric in _PER_BATCH_METRICS:
+      self._logger.log_metric(
+          _PER_BATCH_METRICS[metric],
+          logs.get(metric),
+          global_step=self._global_step)
+
+  def on_epoch_end(self, epoch, logs=None):
+    """Log metrics after each epoch."""
+    for metric in _PER_EPOCH_METRICS:
+      self._logger.log_metric(
+          _PER_EPOCH_METRICS[metric],
+          logs.get(metric),
+          global_step=self._global_step)
+
+
+def get_model_callbacks(name_list, **kwargs):
+  """Factory for getting a list of TensorFlow hooks for training by name.
+
+  Args:
+    name_list: a list of strings to name desired callback classes. Allowed:
+      ExamplesPerSecondCallback, LoggingMetricCallback, which are defined
+      as keys in CALLBACKS.
+    **kwargs: a dictionary of arguments to the callbacks.
+
+  Returns:
+    list of instantiated callbacks, ready to be used in a classifier.train call.
+
+  Raises:
+    ValueError: if an unrecognized name is passed.
+  """
+
+  if not name_list:
+    return []
+
+  callbacks = []
+  for name in name_list:
+    callback_name = CALLBACKS.get(name.strip().lower())
+    if callback_name is None:
+      raise ValueError(
+          "Unrecognized training callback requested: {}".format(name))
+    else:
+      callbacks.append(callback_name(**kwargs))
+
+  return callbacks
+
+
+def get_examples_per_second_callback(
+    every_n_steps=1, batch_size=32, metric_logger=None, **kwargs):  # pylint: disable=unused-argument
+  """Function to get ExamplesPerSecondCallback."""
+  return ExamplesPerSecondCallback(
+      batch_size=batch_size, every_n_steps=every_n_steps,
+      metric_logger=metric_logger or logger.get_benchmark_logger())
+
+
+def get_logging_metric_callback(metric_logger=None, **kwargs):  # pylint: disable=unused-argument
+  """Function to get LoggingMetricCallback."""
+  return LoggingMetricCallback(
+      metric_logger=metric_logger or logger.get_benchmark_logger())
+
+
+# A dictionary to map the callback name and its corresponding function
+CALLBACKS = {
+    "examplespersecondcallback": get_examples_per_second_callback,
+    "loggingmetriccallback": get_logging_metric_callback,
+}