Move Keras Hook to use global step to resolve issues across epochs. (#7186)

* Move to global_step. * Hook to use global_step. * fix comment start step 1 not step 0. * remove hack used for testing. * Add docstring.

Move Keras Hook to use global step to resolve issues across epochs. (#7186)
* Move to global_step. * Hook to use global_step. * fix comment start step 1 not step 0. * remove hack used for testing. * Add docstring.
f4b02d15 · Toby Boyd · GitHub · 9d53a513 · f4b02d15 · f4b02d15
Unverified Commit f4b02d15 authored Jul 10, 2019 by Toby Boyd Committed by GitHub Jul 10, 2019
Showing with 15 additions and 21 deletions

official/resnet/keras/keras_cifar_benchmark.py official/resnet/keras/keras_cifar_benchmark.py +0 -2

official/utils/misc/keras_utils.py official/utils/misc/keras_utils.py +15 -19

No files found.
--- a/official/resnet/keras/keras_cifar_benchmark.py
+++ b/official/resnet/keras/keras_cifar_benchmark.py
@@ -22,10 +22,8 @@ import time
 from absl import flags
 import tensorflow as tf  # pylint: disable=g-bad-import-order
-from official.resnet import cifar10_main as cifar_main
 from official.resnet.keras import keras_benchmark
 from official.resnet.keras import keras_cifar_main
-from official.resnet.keras import keras_common
 MIN_TOP_1_ACCURACY = 0.925
 MAX_TOP_1_ACCURACY = 0.938

--- a/official/utils/misc/keras_utils.py
+++ b/official/utils/misc/keras_utils.py
@@ -46,41 +46,37 @@ class TimeHistory(tf.keras.callbacks.Callback):
    Args:
      batch_size: Total batch size.
      log_steps: Interval of time history logs.
    """
    self.batch_size = batch_size
    super(TimeHistory, self).__init__()
    self.log_steps = log_steps
+    self.global_steps = 0
-    # Logs start of step 0 then end of each step based on log_steps interval.
+    # Logs start of step 1 then end of each step based on log_steps interval.
    self.timestamp_log = []
-  def on_train_begin(self, logs=None):
-    self.record_batch = True
  def on_train_end(self, logs=None):
    self.train_finish_time = time.time()
  def on_batch_begin(self, batch, logs=None):
-    if self.record_batch:
+    self.global_steps += 1
-      timestamp = time.time()
+    if self.global_steps == 1:
-      self.start_time = timestamp
+      self.start_time = time.time()
-      self.record_batch = False
+      self.timestamp_log.append(BatchTimestamp(self.global_steps,
-      if batch == 0:
+                                               self.start_time))
-        self.timestamp_log.append(BatchTimestamp(batch, timestamp))
  def on_batch_end(self, batch, logs=None):
-    if batch % self.log_steps == 0:
+    """Records elapse time of the batch and calculates examples per second."""
+    if self.global_steps % self.log_steps == 0:
      timestamp = time.time()
      elapsed_time = timestamp - self.start_time
      examples_per_second = (self.batch_size * self.log_steps) / elapsed_time
-      if batch != 0:
+      self.timestamp_log.append(BatchTimestamp(self.global_steps, timestamp))
-        self.record_batch = True
+      tf.compat.v1.logging.info(
-        self.timestamp_log.append(BatchTimestamp(batch, timestamp))
+          "BenchmarkMetric: {'global step':%d, 'time_taken': %f,"
-        tf.compat.v1.logging.info(
+          "'examples_per_second': %f}" %
-            "BenchmarkMetric: {'num_batches':%d, 'time_taken': %f,"
+          (self.global_steps, elapsed_time, examples_per_second))
-            "'examples_per_second': %f}" %
+      self.start_time = timestamp
-            (batch, elapsed_time, examples_per_second))
 def get_profiler_callback(model_dir, profile_steps, enable_tensorboard):