Unverified Commit 8390b362 authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

add log_steps with faster logging for 8xGPU. (#7274)

parent 64d6c094
...@@ -220,6 +220,7 @@ class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase): ...@@ -220,6 +220,7 @@ class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase):
def_flags['training_data'] = self.train_data def_flags['training_data'] = self.train_data
def_flags['model_dir'] = '' def_flags['model_dir'] = ''
def_flags['train_epochs'] = 4 def_flags['train_epochs'] = 4
def_flags['log_steps'] = 50
super(ShakespeareKerasBenchmarkReal, self).__init__( super(ShakespeareKerasBenchmarkReal, self).__init__(
output_dir=output_dir, output_dir=output_dir,
...@@ -287,6 +288,7 @@ class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase): ...@@ -287,6 +288,7 @@ class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase):
self._setup() self._setup()
FLAGS.num_gpus = 8 FLAGS.num_gpus = 8
FLAGS.batch_size = 64 * 8 FLAGS.batch_size = 64 * 8
FLAGS.log_steps = 10
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_8_gpu(self): def benchmark_xla_8_gpu(self):
...@@ -294,10 +296,11 @@ class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase): ...@@ -294,10 +296,11 @@ class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase):
self._setup() self._setup()
FLAGS.num_gpus = 1 FLAGS.num_gpus = 1
FLAGS.batch_size = 64 * 8 FLAGS.batch_size = 64 * 8
FLAGS.log_steps = 10
FLAGS.enable_xla = True FLAGS.enable_xla = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def _run_and_report_benchmark(self): def _run_and_report_benchmark(self):
"""Run and report benchmark.""" """Run and report benchmark."""
super(ShakespeareKerasBenchmarkReal, self)._run_and_report_benchmark( super(ShakespeareKerasBenchmarkReal, self)._run_and_report_benchmark(
top_1_train_min=None) top_1_train_min=None, log_steps=FLAGS.log_steps)
...@@ -70,6 +70,10 @@ def define_flags(): ...@@ -70,6 +70,10 @@ def define_flags():
flags.DEFINE_integer( flags.DEFINE_integer(
name='predict_length', default=1000, name='predict_length', default=1000,
help='Length of the predicted text including the context.') help='Length of the predicted text including the context.')
flags.DEFINE_integer(
name='log_steps', default=100,
help='For every log_steps, we log the timing information such as '
'examples per second.')
flags.DEFINE_string( flags.DEFINE_string(
name='training_data', default=None, name='training_data', default=None,
help='Path to file containing the training data.') help='Path to file containing the training data.')
...@@ -171,7 +175,8 @@ def train_model(flags_obj, dataset, vocab_size, strategy, checkpoint_dir=None): ...@@ -171,7 +175,8 @@ def train_model(flags_obj, dataset, vocab_size, strategy, checkpoint_dir=None):
filepath=checkpoint_prefix, filepath=checkpoint_prefix,
save_weights_only=True) save_weights_only=True)
callbacks.append(checkpoint_callback) callbacks.append(checkpoint_callback)
time_callback = keras_utils.TimeHistory(flags_obj.batch_size, 100) time_callback = keras_utils.TimeHistory(flags_obj.batch_size,
flags_obj.log_steps)
callbacks.append(time_callback) callbacks.append(time_callback)
history = model.fit(dataset, history = model.fit(dataset,
epochs=flags_obj.train_epochs, epochs=flags_obj.train_epochs,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment