Resnet benchmark logging (#3704)

* Update reset model for benchmark logging. To enable benchmark logging, just add "--hooks LoggingMetricHook" * Benchmark logger fix for resnet. 1. Update default at_end to False for metric logger to avoid checkpoint error. 2. Update resnet run to log final evaluation result. * Update log output for final eval_result. * Typo fix. * Unset the default value for benchmark_log_dir. Usually the benchmark should be logged to different directly for each run. Having a default value will hide the choice from user. * Bug fix for benchmark logger initialization. * Fix lint error. * Address the review comment. 1. Update the logger to cover evaluation result. 2. Move the flag to performance parser. * Undo the change for arg_parser.

Resnet benchmark logging (#3704)
* Update reset model for benchmark logging. To enable benchmark logging, just add "--hooks LoggingMetricHook" * Benchmark logger fix for resnet. 1. Update default at_end to False for metric logger to avoid checkpoint error. 2. Update resnet run to log final evaluation result. * Update log output for final eval_result. * Typo fix. * Unset the default value for benchmark_log_dir. Usually the benchmark should be logged to different directly for each run. Having a default value will hide the choice from user. * Bug fix for benchmark logger initialization. * Fix lint error. * Address the review comment. 1. Update the logger to cover evaluation result. 2. Move the flag to performance parser. * Undo the change for arg_parser.
b9b44f7b · Qianli Scott Zhu · GitHub · 8652f38d · b9b44f7b · b9b44f7b
Unverified Commit b9b44f7b authored Mar 23, 2018 by Qianli Scott Zhu Committed by GitHub Mar 23, 2018
7 changed files
--- a/official/resnet/resnet_run_loop.py
+++ b/official/resnet/resnet_run_loop.py
@@ -31,6 +31,7 @@ import tensorflow as tf  # pylint: disable=g-bad-import-order
 from official.resnet import resnet_model
 from official.utils.arg_parsers import parsers
 from official.utils.logging import hooks_helper
+from official.utils.logging import logger
 ################################################################################
@@ -349,7 +350,9 @@ def resnet_main(flags, model_function, input_function):
  for _ in range(flags.train_epochs // flags.epochs_between_evals):
    train_hooks = hooks_helper.get_train_hooks(
-        flags.hooks, batch_size=flags.batch_size)
+        flags.hooks,
+        batch_size=flags.batch_size,
+        benchmark_log_dir=flags.benchmark_log_dir)
    print('Starting a training cycle.')
@@ -377,6 +380,10 @@ def resnet_main(flags, model_function, input_function):
                                       steps=flags.max_train_steps)
    print(eval_results)
+    if flags.benchmark_log_dir is not None:
+      benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir)
+      benchmark_logger.log_estimator_evaluation_result(eval_results)
 class ResnetArgParser(argparse.ArgumentParser):
  """Arguments for configuring and running a Resnet Model.
@@ -387,6 +394,7 @@ class ResnetArgParser(argparse.ArgumentParser):
        parsers.BaseParser(),
        parsers.PerformanceParser(),
        parsers.ImageModelParser(),
+        parsers.BenchmarkParser(),
    ])
    self.add_argument(

--- a/official/utils/arg_parsers/parsers.py
+++ b/official/utils/arg_parsers/parsers.py
@@ -131,7 +131,7 @@ class BaseParser(argparse.ArgumentParser):
               "of train hooks. "
               "Example: --hooks LoggingTensorHook ExamplesPerSecondHook. "
               "Allowed hook names (case-insensitive): LoggingTensorHook, "
-               "ProfilerHook, ExamplesPerSecondHook. "
+               "ProfilerHook, ExamplesPerSecondHook, LoggingMetricHook."
               "See official.utils.logging.hooks_helper for details.",
          metavar="<HK>"
      )
@@ -224,3 +224,21 @@ class ImageModelParser(argparse.ArgumentParser):
               "was built for CPU or GPU.",
          metavar="<CF>"
      )
+class BenchmarkParser(argparse.ArgumentParser):
+  """Default parser for benchmark logging.
+  Args:
+    add_help: Create the "--help" flag. False if class instance is a parent.
+    benchmark_log_dir: Create a flag to specify location for benchmark logging.
+  """
+  def __init__(self, add_help=False, benchmark_log_dir=True):
+    super(BenchmarkParser, self).__init__(add_help=add_help)
+    if benchmark_log_dir:
+      self.add_argument(
+          "--benchmark_log_dir", "-bld", default=None,
+          help="[default: %(default)s] The location of the benchmark logging.",
+          metavar="<BLD>"
+      )
--- a/official/utils/arg_parsers/parsers_test.py
+++ b/official/utils/arg_parsers/parsers_test.py
@@ -28,7 +28,8 @@ class TestParser(argparse.ArgumentParser):
        parsers.BaseParser(),
        parsers.PerformanceParser(num_parallel_calls=True, inter_op=True,
                                  intra_op=True, use_synthetic_data=True),
-        parsers.ImageModelParser(data_format=True)
+        parsers.ImageModelParser(data_format=True),
+        parsers.BenchmarkParser(benchmark_log_dir=True)
    ])
@@ -58,6 +59,19 @@ class BaseTester(unittest.TestCase):
    for key, value in defaults.items():
      assert namespace_vars[key] == value
+  def test_benchmark_setting(self):
+    defaults = dict(
+        hooks=["LoggingMetricHook"],
+        benchmark_log_dir="/tmp/12345"
+    )
+    parser = TestParser()
+    parser.set_defaults(**defaults)
+    namespace_vars = vars(parser.parse_args([]))
+    for key, value in defaults.items():
+      assert namespace_vars[key] == value
  def test_booleans(self):
    """Test to ensure boolean flags trigger as expected.
    """

--- a/official/utils/logging/hooks_helper.py
+++ b/official/utils/logging/hooks_helper.py
@@ -27,6 +27,7 @@ from __future__ import print_function
 import tensorflow as tf  # pylint: disable=g-bad-import-order
 from official.utils.logging import hooks
+from official.utils.logging import metric_hook
 _TENSORS_TO_LOG = dict((x, x) for x in ['learning_rate',
                                        'cross_entropy',
@@ -122,9 +123,37 @@ def get_examples_per_second_hook(every_n_steps=100,
                                     warm_steps=warm_steps)
+def get_logging_metric_hook(benchmark_log_dir=None,
+                            tensors_to_log=None,
+                            every_n_secs=600,
+                            **kwargs):  # pylint: disable=unused-argument
+  """Function to get LoggingMetricHook.
+  Args:
+    benchmark_log_dir: `string`, directory path to save the metric log.
+    tensors_to_log: List of tensor names or dictionary mapping labels to tensor
+      names. If not set, log _TENSORS_TO_LOG by default.
+    every_n_secs: `int`, the frequency for logging the metric. Default to every
+      10 mins.
+  Returns:
+    Returns a ProfilerHook that writes out timelines that can be loaded into
+    profiling tools like chrome://tracing.
+  """
+  if benchmark_log_dir is None:
+    raise ValueError("metric_log_dir should be provided to use metric logger")
+  if tensors_to_log is None:
+    tensors_to_log = _TENSORS_TO_LOG
+  return metric_hook.LoggingMetricHook(
+      tensors=tensors_to_log,
+      log_dir=benchmark_log_dir,
+      every_n_secs=every_n_secs)
 # A dictionary to map one hook name and its corresponding function
 HOOKS = {
    'loggingtensorhook': get_logging_tensor_hook,
    'profilerhook': get_profiler_hook,
    'examplespersecondhook': get_examples_per_second_hook,
+    'loggingmetrichook': get_logging_metric_hook,
 }
--- a/official/utils/logging/hooks_helper_test.py
+++ b/official/utils/logging/hooks_helper_test.py
@@ -49,16 +49,19 @@ class BaseTest(unittest.TestCase):
                     expected_hook_name)
  def test_get_train_hooks_logging_tensor_hook(self):
-    test_hook_name = 'LoggingTensorHook'
+    self.validate_train_hook_name('LoggingTensorHook', 'loggingtensorhook')
-    self.validate_train_hook_name(test_hook_name, 'loggingtensorhook')
  def test_get_train_hooks_profiler_hook(self):
-    test_hook_name = 'ProfilerHook'
+    self.validate_train_hook_name('ProfilerHook', 'profilerhook')
-    self.validate_train_hook_name(test_hook_name, 'profilerhook')
  def test_get_train_hooks_examples_per_second_hook(self):
-    test_hook_name = 'ExamplesPerSecondHook'
+    self.validate_train_hook_name('ExamplesPerSecondHook',
-    self.validate_train_hook_name(test_hook_name, 'examplespersecondhook')
+                                  'examplespersecondhook')
+  def test_get_logging_metric_hook(self):
+    test_hook_name = 'LoggingMetricHook'
+    self.validate_train_hook_name(test_hook_name, 'loggingmetrichook',
+                                  benchmark_log_dir='/tmp')
 if __name__ == '__main__':
  tf.test.main()
--- a/official/utils/logging/logger.py
+++ b/official/utils/logging/logger.py
@@ -37,6 +37,25 @@ class BenchmarkLogger(object):
    if not tf.gfile.IsDirectory(self._logging_dir):
      tf.gfile.MakeDirs(self._logging_dir)
+  def log_estimator_evaluation_result(self, eval_results):
+    """Log the evaluation result for a estimator.
+    The evaluate result is a directory that contains metrics defined in
+    model_fn. It also contains a entry for global_step which contains the value
+    of the global step when evaluation was performed.
+    Args:
+      eval_results: dict, the result of evaluate() from a estimator.
+    """
+    if not isinstance(eval_results, dict):
+      tf.logging.warning("eval_results should be directory for logging. Got %s",
+                         type(eval_results))
+      return
+    global_step = eval_results[tf.GraphKeys.GLOBAL_STEP]
+    for key in eval_results:
+      if key != tf.GraphKeys.GLOBAL_STEP:
+        self.log_metric(key, eval_results[key], global_step=global_step)
  def log_metric(self, name, value, unit=None, global_step=None, extras=None):
    """Log the benchmark metric information to local file.

--- a/official/utils/logging/logger_test.py
+++ b/official/utils/logging/logger_test.py
@@ -87,5 +87,37 @@ class BenchmarkLoggerTest(tf.test.TestCase):
    metric_log = os.path.join(log_dir, "metric.log")
    self.assertFalse(tf.gfile.Exists(metric_log))
+  def test_log_evaluation_result(self):
+    eval_result = {'loss': 0.46237424,
+                   'global_step': 207082,
+                   'accuracy': 0.9285}
+    log_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
+    log = logger.BenchmarkLogger(log_dir)
+    log.log_estimator_evaluation_result(eval_result)
+    metric_log = os.path.join(log_dir, "metric.log")
+    self.assertTrue(tf.gfile.Exists(metric_log))
+    with tf.gfile.GFile(metric_log) as f:
+      loss = json.loads(f.readline())
+      self.assertEqual(loss["name"], "loss")
+      self.assertEqual(loss["value"], 0.46237424)
+      self.assertEqual(loss["unit"], None)
+      self.assertEqual(loss["global_step"], 207082)
+      accuracy = json.loads(f.readline())
+      self.assertEqual(accuracy["name"], "accuracy")
+      self.assertEqual(accuracy["value"], 0.9285)
+      self.assertEqual(accuracy["unit"], None)
+      self.assertEqual(accuracy["global_step"], 207082)
+  def test_log_evaluation_result_with_invalid_type(self):
+    eval_result = "{'loss': 0.46237424, 'global_step': 207082}"
+    log_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
+    log = logger.BenchmarkLogger(log_dir)
+    log.log_estimator_evaluation_result(eval_result)
+    metric_log = os.path.join(log_dir, "metric.log")
+    self.assertFalse(tf.gfile.Exists(metric_log))
 if __name__ == "__main__":
  tf.test.main()