Unverified Commit e0a2b8c3 authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Refactor and add benchmarks as well as accuracy tests for GPU and CPU (#7248)

* Added benchmarks and common flags.

* Add cpu tests.

* Add tracking epoch times.

* fix transformer.

* Add examples_per_second.

* fix pylint
parent 63605b95
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Shakespeare (LSTM) benchmark and accuracy tests."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
from absl import flags
from official.staging.shakespeare import shakespeare_main
from official.utils.flags import core as flags_core
from official.utils.misc import keras_utils
from official.utils.testing.perfzero_benchmark import PerfZeroBenchmark
SHAKESPEARE_TRAIN_DATA = 'shakespeare/shakespeare.txt'
FLAGS = flags.FLAGS
class ShakespeareBenchmarkBase(PerfZeroBenchmark):
"""Base class for Shakespeare (LSTM) benchmark and accuracy tests."""
def __init__(self, output_dir=None, default_flags=None, root_data_dir=None):
super(ShakespeareBenchmarkBase, self).__init__(
output_dir=output_dir,
default_flags=default_flags,
flag_methods=[shakespeare_main.define_flags])
def _run_and_report_benchmark(self,
top_1_train_min=0.923,
top_1_train_max=0.93,
warmup=1,
log_steps=100):
"""Report benchmark results by writing to local protobuf file.
Average epoch time is calculated by skipping the first epoch. This average
ignores time spent between epoch and is recorded by begin and end epoch. To
skip accuracy check set `top_1_train_min=None`.
Args:
top_1_train_min: lowest passing value.
top_1_train_max: highest passing value.
warmup: number of entries in `timestamp_log` to ignore.
log_steps: How often the log was created for `timestamp_log`.
"""
total_batch_size = FLAGS.batch_size
metrics = []
start_time_sec = time.time()
stats = shakespeare_main.run(FLAGS)
wall_time_sec = time.time() - start_time_sec
if top_1_train_min:
metrics.append({'name': 'accuracy_top_1_train',
'value': stats['history']['RecallAt1'][-1],
'min_value': top_1_train_min,
'max_value': top_1_train_max})
# Look for the time history callback which was used during keras.fit
for callback in stats['callbacks']:
if isinstance(callback, keras_utils.TimeHistory):
epoch_timings = callback.epoch_runtime_log
average_time = sum(epoch_timings[1:]) / len(epoch_timings[1:])
metrics.append({'name': 'avg_epoch_time',
'value': average_time})
# First entry in timestamp_log is the start of step 1. The rest of the
# entries are the end of each step recorded.
time_log = callback.timestamp_log
elapsed = time_log[-1].timestamp - time_log[warmup].timestamp
num_examples = (
total_batch_size * log_steps * (len(time_log) - warmup - 1))
examples_per_sec = num_examples / elapsed
metrics.append({'name': 'exp_per_second',
'value': examples_per_sec})
flags_str = flags_core.get_nondefault_flags_as_str()
self.report_benchmark(iters=-1, wall_time=wall_time_sec,
metrics=metrics,
extras={'flags': flags_str})
class ShakespeareAccuracy(ShakespeareBenchmarkBase):
"""Shakespeare accuracy tests.
This is not an ideal test. The best we can use for the accuracy check is to
validate top_1 of the training set. At batch size 64 the top_1 training
stabilizes to ~0.92 around 40-45 epochs.
"""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
"""Shakespeare accuracy tests.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor.
"""
self.train_data = os.path.join(root_data_dir, SHAKESPEARE_TRAIN_DATA)
super(ShakespeareAccuracy, self).__init__(
output_dir=output_dir, root_data_dir=root_data_dir)
def benchmark_cpu(self):
"""Benchmark cpu."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
self._run_and_report_benchmark()
def benchmark_cpu_no_ds_run_eagerly(self):
"""Benchmark cpu without distribution strategies and run eagerly."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu(self):
"""Benchmark 1 gpu."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_run_eagerly(self):
"""Benchmark 1 gpu."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_xla_1_gpu(self):
"""Benchmark 1 gpu w/xla."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def benchmark_8_gpu(self):
"""Benchmark 8 gpu.
This is test is for accuracy not scaling. The batch-size is not scaled to
the number of gpus.
"""
self._setup()
FLAGS.num_gpus = 8
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
self._run_and_report_benchmark()
def benchmark_xla_8_gpu(self):
"""Benchmark 8 gpu w/xla.
This is test is for accuracy not scaling. The batch-size is not scaled to
the number of gpus.
"""
self._setup()
FLAGS.num_gpus = 8
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
FLAGS.enable_xla = True
self._run_and_report_benchmark()
class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase):
"""Benchmark accuracy tests."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
"""Benchmark tests w/Keras.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor.
"""
self.train_data = os.path.join(root_data_dir, SHAKESPEARE_TRAIN_DATA)
def_flags = {}
def_flags['training_data'] = self.train_data
def_flags['model_dir'] = ''
def_flags['train_epochs'] = 4
super(ShakespeareKerasBenchmarkReal, self).__init__(
output_dir=output_dir,
root_data_dir=root_data_dir,
default_flags=def_flags)
def benchmark_cpu(self):
"""Benchmark cpu."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.batch_size = 64
self._run_and_report_benchmark()
def benchmark_cpu_no_ds_run_eagerly(self):
"""Benchmark cpu without distribution strategy and run eagerly."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.batch_size = 64
FLAGS.distribution_strategy = 'off'
FLAGS.run_eagerly = True
self._run_and_report_benchmark()
def benchmark_cpu_no_ds(self):
"""Benchmark cpu without distribution strategy."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.batch_size = 64
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu(self):
"""Benchmark 1 gpu."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 64
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds(self):
"""Benchmark 1 gpu without distribution strategies."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 64
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_run_eagerly(self):
"""Benchmark 1 gpu."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 64
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_xla_1_gpu(self):
"""Benchmark 1 gpu."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 64
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def benchmark_8_gpu(self):
"""Benchmark 8 gpu."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.batch_size = 64 * 8
self._run_and_report_benchmark()
def benchmark_xla_8_gpu(self):
"""Benchmark 8 gpu w/xla."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 64 * 8
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def _run_and_report_benchmark(self):
"""Run and report benchmark."""
super(ShakespeareKerasBenchmarkReal, self)._run_and_report_benchmark(
top_1_train_min=None)
...@@ -19,26 +19,48 @@ from __future__ import division ...@@ -19,26 +19,48 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import os import os
import numpy as np
# pylint: disable=wrong-import-order
from absl import app as absl_app from absl import app as absl_app
from absl import flags from absl import flags
import numpy as np
import tensorflow as tf import tensorflow as tf
# pylint: enable=wrong-import-order
from official.utils.flags import core as flags_core
from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils
BATCH_SIZE = 64
EPOCHS = 10
EMBEDDING_DIM = 256 EMBEDDING_DIM = 256
RNN_UNITS = 1024 RNN_UNITS = 1024
SEQ_LENGTH = 100 SEQ_LENGTH = 100
# Calculated by running batch_size=1
BATCHES_PER_EPOCH = 11043
def define_flags(): def define_flags():
"""Define the flags for the Shakespeare character LSTM.""" """Define the flags for the Shakespeare character LSTM."""
flags.DEFINE_string( flags_core.define_base(data_dir=False,
name='model_dir', default=None, clean=False,
help='Directory for model check points.') train_epochs=True,
epochs_between_evals=False,
stop_threshold=False,
hooks=False,
export_dir=False,
run_eagerly=True)
flags_core.define_performance(num_parallel_calls=False,
inter_op=False,
intra_op=False,
synthetic_data=False,
max_train_steps=False,
dtype=False,
enable_xla=True)
flags_core.set_defaults(train_epochs=43,
batch_size=64)
flags.DEFINE_boolean(name='enable_eager', default=True, help='Enable eager?')
flags.DEFINE_boolean( flags.DEFINE_boolean(
name='train', default=True, name='train', default=True,
help='If true trains the model.') help='If true trains the model.')
...@@ -53,18 +75,20 @@ def define_flags(): ...@@ -53,18 +75,20 @@ def define_flags():
help='Path to file containing the training data.') help='Path to file containing the training data.')
def get_dataset(path_to_file, seq_length=SEQ_LENGTH): def get_dataset(path_to_file, batch_size=None, seq_length=SEQ_LENGTH):
"""Creates a dataset from a given text file. """Creates a dataset from a given text file.
Args: Args:
path_to_file: The path to the training data. path_to_file: The path to the training data.
batch_size: Batch size to use.
seq_length: The length of the LSTM sequence. seq_length: The length of the LSTM sequence.
Returns: Returns:
A tuple, consisting of the Dataset and the class to character mapping A tuple, consisting of the Dataset and the class to character mapping
and character to class mapping. and character to class mapping.
""" """
text = open(path_to_file, 'rb').read().decode(encoding='utf-8') with open(path_to_file, 'rb') as train_data:
text = train_data.read().decode(encoding='utf-8')
# Create vocab # Create vocab
vocab = sorted(set(text)) vocab = sorted(set(text))
...@@ -80,9 +104,9 @@ def get_dataset(path_to_file, seq_length=SEQ_LENGTH): ...@@ -80,9 +104,9 @@ def get_dataset(path_to_file, seq_length=SEQ_LENGTH):
input_text = chunk[:-1] input_text = chunk[:-1]
target_text = chunk[1:] target_text = chunk[1:]
return input_text, tf.one_hot(target_text, len(vocab)) return input_text, tf.one_hot(target_text, len(vocab))
dataset = sequences.map(split_input_target) dataset = sequences.map(split_input_target)
dataset = dataset.shuffle(10000).batch(BATCH_SIZE, drop_remainder=True) dataset = dataset.shuffle(10000).repeat()
dataset = dataset.batch(batch_size, drop_remainder=True)
return dataset, idx2char, char2idx return dataset, idx2char, char2idx
...@@ -90,7 +114,7 @@ def get_dataset(path_to_file, seq_length=SEQ_LENGTH): ...@@ -90,7 +114,7 @@ def get_dataset(path_to_file, seq_length=SEQ_LENGTH):
def build_model(vocab_size, def build_model(vocab_size,
embedding_dim=EMBEDDING_DIM, embedding_dim=EMBEDDING_DIM,
rnn_units=RNN_UNITS, rnn_units=RNN_UNITS,
batch_size=BATCH_SIZE, batch_size=None,
stateful=False): stateful=False):
"""Builds the Shakespeare model. """Builds the Shakespeare model.
...@@ -115,26 +139,30 @@ def build_model(vocab_size, ...@@ -115,26 +139,30 @@ def build_model(vocab_size,
tf.keras.layers.Dense(vocab_size, activation='softmax')]) tf.keras.layers.Dense(vocab_size, activation='softmax')])
def train_model(dataset, vocab_size, checkpoint_dir=None): def train_model(flags_obj, dataset, vocab_size, strategy, checkpoint_dir=None):
"""Trains a Shakespeare model. """Trains a Shakespeare model.
Args: Args:
flags_obj: An object containing parsed flag values.s
dataset: the training data set. dataset: the training data set.
vocab_size: the number of unique character classes. vocab_size: the number of unique character classes.
strategy: distribution strategy to use.
checkpoint_dir: if not None, the directory in which to make checkpoints. checkpoint_dir: if not None, the directory in which to make checkpoints.
Returns: Returns:
The training history. The training history and callbacks.
""" """
strategy = tf.distribute.MirroredStrategy() train_steps = BATCHES_PER_EPOCH // flags_obj.batch_size
strategy_scope = distribution_utils.get_strategy_scope(strategy)
with strategy.scope(): with strategy_scope:
model = build_model(vocab_size=vocab_size) model = build_model(vocab_size=vocab_size, batch_size=flags_obj.batch_size)
model.compile(optimizer=tf.keras.optimizers.Adam(), model.compile(optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.CategoricalCrossentropy(), loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=[ metrics=[
tf.keras.metrics.Recall(top_k=1, name='RecallAt1'), tf.keras.metrics.Recall(top_k=1, name='RecallAt1'),
tf.keras.metrics.Recall(top_k=5, name='RecallAt5')]) tf.keras.metrics.Recall(top_k=5, name='RecallAt5')],
run_eagerly=flags_obj.run_eagerly)
callbacks = [] callbacks = []
if checkpoint_dir: if checkpoint_dir:
...@@ -143,8 +171,14 @@ def train_model(dataset, vocab_size, checkpoint_dir=None): ...@@ -143,8 +171,14 @@ def train_model(dataset, vocab_size, checkpoint_dir=None):
filepath=checkpoint_prefix, filepath=checkpoint_prefix,
save_weights_only=True) save_weights_only=True)
callbacks.append(checkpoint_callback) callbacks.append(checkpoint_callback)
time_callback = keras_utils.TimeHistory(flags_obj.batch_size, 100)
return model.fit(dataset, epochs=EPOCHS, callbacks=callbacks) callbacks.append(time_callback)
history = model.fit(dataset,
epochs=flags_obj.train_epochs,
steps_per_epoch=train_steps,
callbacks=callbacks,
verbose=2)
return history, callbacks
def make_prediction(checkpoint_dir, length, context, idx2char, char2idx): def make_prediction(checkpoint_dir, length, context, idx2char, char2idx):
...@@ -188,18 +222,39 @@ def make_prediction(checkpoint_dir, length, context, idx2char, char2idx): ...@@ -188,18 +222,39 @@ def make_prediction(checkpoint_dir, length, context, idx2char, char2idx):
return context + ''.join(text_generated) return context + ''.join(text_generated)
def main(_): def run(flags_obj):
flags_obj = flags.FLAGS """Run Shakespeare training and predict.
Args:
flags_obj: An object containing parsed flag values.
Returns:
Dictionary with status from the run.
"""
if not flags_obj.training_data: if not flags_obj.training_data:
raise ValueError( raise ValueError(
'Must set the path to a training data file. e.g download the following ' 'Must set the path to a training data file. e.g download the following '
'https://storage.googleapis.com/download.tensorflow.org/data/' 'https://storage.googleapis.com/download.tensorflow.org/data/'
'shakespeare.txt') 'shakespeare.txt')
dataset, idx2char, char2idx = get_dataset(flags_obj.training_data)
keras_utils.set_session_config(
enable_eager=flags_obj.enable_eager,
enable_xla=flags_obj.enable_xla)
strategy = distribution_utils.get_distribution_strategy(
distribution_strategy=flags_obj.distribution_strategy,
num_gpus=flags_obj.num_gpus)
dataset, idx2char, char2idx = get_dataset(flags_obj.training_data,
batch_size=flags_obj.batch_size)
stats = {}
if flags_obj.train: if flags_obj.train:
train_model(dataset, len(idx2char), flags_obj.model_dir) history, callbacks = train_model(flags_obj, dataset,
len(idx2char), strategy,
checkpoint_dir=flags_obj.model_dir)
stats['history'] = history.history
stats['callbacks'] = callbacks
if flags_obj.predict_context: if flags_obj.predict_context:
if not flags_obj.model_dir: if not flags_obj.model_dir:
...@@ -210,6 +265,13 @@ def main(_): ...@@ -210,6 +265,13 @@ def main(_):
idx2char, idx2char,
char2idx)) char2idx))
return stats
def main(_):
flags_obj = flags.FLAGS
run(flags_obj)
if __name__ == '__main__': if __name__ == '__main__':
define_flags() define_flags()
......
...@@ -41,11 +41,11 @@ class TimeHistory(tf.keras.callbacks.Callback): ...@@ -41,11 +41,11 @@ class TimeHistory(tf.keras.callbacks.Callback):
"""Callback for Keras models.""" """Callback for Keras models."""
def __init__(self, batch_size, log_steps): def __init__(self, batch_size, log_steps):
"""Callback for logging performance (# examples/second). """Callback for logging performance.
Args: Args:
batch_size: Total batch size. batch_size: Total batch size.
log_steps: Interval of time history logs. log_steps: Interval of steps between logging of batch level stats.
""" """
self.batch_size = batch_size self.batch_size = batch_size
super(TimeHistory, self).__init__() super(TimeHistory, self).__init__()
...@@ -55,9 +55,15 @@ class TimeHistory(tf.keras.callbacks.Callback): ...@@ -55,9 +55,15 @@ class TimeHistory(tf.keras.callbacks.Callback):
# Logs start of step 1 then end of each step based on log_steps interval. # Logs start of step 1 then end of each step based on log_steps interval.
self.timestamp_log = [] self.timestamp_log = []
# Records the time each epoch takes to run from start to finish of epoch.
self.epoch_runtime_log = []
def on_train_end(self, logs=None): def on_train_end(self, logs=None):
self.train_finish_time = time.time() self.train_finish_time = time.time()
def on_epoch_begin(self, epoch, logs=None):
self.epoch_start = time.time()
def on_batch_begin(self, batch, logs=None): def on_batch_begin(self, batch, logs=None):
self.global_steps += 1 self.global_steps += 1
if self.global_steps == 1: if self.global_steps == 1:
...@@ -78,6 +84,13 @@ class TimeHistory(tf.keras.callbacks.Callback): ...@@ -78,6 +84,13 @@ class TimeHistory(tf.keras.callbacks.Callback):
(self.global_steps, elapsed_time, examples_per_second)) (self.global_steps, elapsed_time, examples_per_second))
self.start_time = timestamp self.start_time = timestamp
def on_epoch_end(self, epoch, logs=None):
epoch_run_time = time.time() - self.epoch_start
self.epoch_runtime_log.append(epoch_run_time)
tf.compat.v1.logging.info(
"BenchmarkMetric: {'epoch':%d, 'time_taken': %f}" %
(epoch, epoch_run_time))
def get_profiler_callback(model_dir, profile_steps, enable_tensorboard): def get_profiler_callback(model_dir, profile_steps, enable_tensorboard):
"""Validate profile_steps flag value and return profiler callback.""" """Validate profile_steps flag value and return profiler callback."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment