add log_steps with faster logging for 8xGPU. (#7274)

8390b362 · Toby Boyd · GitHub · 64d6c094 · 8390b362 · 8390b362
Unverified Commit 8390b362 authored Jul 23, 2019 by Toby Boyd Committed by GitHub Jul 23, 2019
Showing with 10 additions and 2 deletions

official/staging/shakespeare/shakespeare_benchmark.py official/staging/shakespeare/shakespeare_benchmark.py +4 -1

official/staging/shakespeare/shakespeare_main.py official/staging/shakespeare/shakespeare_main.py +6 -1

No files found.
--- a/official/staging/shakespeare/shakespeare_benchmark.py
+++ b/official/staging/shakespeare/shakespeare_benchmark.py
@@ -220,6 +220,7 @@ class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase):
    def_flags['training_data'] = self.train_data
    def_flags['model_dir'] = ''
    def_flags['train_epochs'] = 4
+    def_flags['log_steps'] = 50
    super(ShakespeareKerasBenchmarkReal, self).__init__(
        output_dir=output_dir,
@@ -287,6 +288,7 @@ class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase):
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.batch_size = 64 * 8
+    FLAGS.log_steps = 10
    self._run_and_report_benchmark()
  def benchmark_xla_8_gpu(self):
@@ -294,10 +296,11 @@ class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase):
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.batch_size = 64 * 8
+    FLAGS.log_steps = 10
    FLAGS.enable_xla = True
    self._run_and_report_benchmark()
  def _run_and_report_benchmark(self):
    """Run and report benchmark."""
    super(ShakespeareKerasBenchmarkReal, self)._run_and_report_benchmark(
-        top_1_train_min=None)
+        top_1_train_min=None, log_steps=FLAGS.log_steps)
--- a/official/staging/shakespeare/shakespeare_main.py
+++ b/official/staging/shakespeare/shakespeare_main.py
@@ -70,6 +70,10 @@ def define_flags():
  flags.DEFINE_integer(
      name='predict_length', default=1000,
      help='Length of the predicted text including the context.')
+  flags.DEFINE_integer(
+      name='log_steps', default=100,
+      help='For every log_steps, we log the timing information such as '
+      'examples per second.')
  flags.DEFINE_string(
      name='training_data', default=None,
      help='Path to file containing the training data.')
@@ -171,7 +175,8 @@ def train_model(flags_obj, dataset, vocab_size, strategy, checkpoint_dir=None):
        filepath=checkpoint_prefix,
        save_weights_only=True)
    callbacks.append(checkpoint_callback)
-  time_callback = keras_utils.TimeHistory(flags_obj.batch_size, 100)
+  time_callback = keras_utils.TimeHistory(flags_obj.batch_size,
+                                          flags_obj.log_steps)
  callbacks.append(time_callback)
  history = model.fit(dataset,
                      epochs=flags_obj.train_epochs,