Commit 2d05fc8a authored by Jose Baiocchi's avatar Jose Baiocchi Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 307878379
parent 05feb2be
...@@ -19,23 +19,21 @@ from __future__ import division ...@@ -19,23 +19,21 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
# pylint: disable=g-bad-import-order # pylint: disable=g-bad-import-order
import copy
import json import json
import os
import time import time
from absl import flags from absl import flags
from absl import logging
from absl.testing import flagsaver from absl.testing import flagsaver
import tensorflow as tf import tensorflow as tf
# pylint: enable=g-bad-import-order # pylint: enable=g-bad-import-order
from official.benchmark import bert_benchmark_utils as benchmark_utils
from official.utils.flags import core as flags_core
from official.benchmark import benchmark_wrappers from official.benchmark import benchmark_wrappers
from official.benchmark import perfzero_benchmark
from official.utils.flags import core as flags_core
from official.utils.misc import keras_utils
from official.vision.detection import main as detection from official.vision.detection import main as detection
from official.vision.detection.configs import base_config
TMP_DIR = os.getenv('TMPDIR')
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
# pylint: disable=line-too-long # pylint: disable=line-too-long
...@@ -46,51 +44,41 @@ RESNET_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/retinanet/resnet50-checkpoi ...@@ -46,51 +44,41 @@ RESNET_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/retinanet/resnet50-checkpoi
# pylint: enable=line-too-long # pylint: enable=line-too-long
class DetectionBenchmarkBase(tf.test.Benchmark): class TimerCallback(keras_utils.TimeHistory):
"""Base class to hold methods common to test classes.""" """TimeHistory subclass for benchmark reporting."""
local_flags = None
def __init__(self, output_dir=None): def get_examples_per_sec(self, warmup=1):
self.num_gpus = 8 # First entry in timestamp_log is the start of the step 1. The rest of the
# entries are the end of each step recorded.
time_log = self.timestamp_log
seconds = time_log[-1].timestamp - time_log[warmup].timestamp
steps = time_log[-1].batch_index - time_log[warmup].batch_index
return self.batch_size * steps / seconds
if not output_dir: def get_startup_time(self, start_time_sec):
output_dir = '/tmp' return self.timestamp_log[0].timestamp - start_time_sec
self.output_dir = output_dir
self.timer_callback = None
def _get_model_dir(self, folder_name):
"""Returns directory to store info, e.g. saved model and event log."""
return os.path.join(self.output_dir, folder_name)
def _setup(self): class DetectionBenchmarkBase(perfzero_benchmark.PerfZeroBenchmark):
"""Sets up and resets flags before each test.""" """Base class to hold methods common to test classes."""
self.timer_callback = benchmark_utils.BenchmarkTimerCallback()
def __init__(self, **kwargs):
if DetectionBenchmarkBase.local_flags is None: super(DetectionBenchmarkBase, self).__init__(**kwargs)
# Loads flags to get defaults to then override. List cannot be empty. self.timer_callback = None
flags.FLAGS(['foo'])
saved_flag_values = flagsaver.save_flag_values() def _report_benchmark(self, stats, start_time_sec, wall_time_sec, min_ap,
DetectionBenchmarkBase.local_flags = saved_flag_values max_ap, warmup):
else:
flagsaver.restore_flag_values(DetectionBenchmarkBase.local_flags)
def _report_benchmark(self,
stats,
wall_time_sec,
min_ap,
max_ap,
train_batch_size=None):
"""Report benchmark results by writing to local protobuf file. """Report benchmark results by writing to local protobuf file.
Args: Args:
stats: dict returned from Detection models with known entries. stats: dict returned from Detection models with known entries.
wall_time_sec: the during of the benchmark execution in seconds start_time_sec: the start of the benchmark execution in seconds
wall_time_sec: the duration of the benchmark execution in seconds
min_ap: Minimum detection AP constraint to verify correctness of the min_ap: Minimum detection AP constraint to verify correctness of the
model. model.
max_ap: Maximum detection AP accuracy constraint to verify correctness of max_ap: Maximum detection AP accuracy constraint to verify correctness of
the model. the model.
train_batch_size: Train batch size. It is needed for computing warmup: Number of time log entries to ignore when computing examples/sec.
exp_per_second.
""" """
metrics = [{ metrics = [{
'name': 'total_loss', 'name': 'total_loss',
...@@ -99,7 +87,11 @@ class DetectionBenchmarkBase(tf.test.Benchmark): ...@@ -99,7 +87,11 @@ class DetectionBenchmarkBase(tf.test.Benchmark):
if self.timer_callback: if self.timer_callback:
metrics.append({ metrics.append({
'name': 'exp_per_second', 'name': 'exp_per_second',
'value': self.timer_callback.get_examples_per_sec(train_batch_size) 'value': self.timer_callback.get_examples_per_sec(warmup)
})
metrics.append({
'name': 'startup_time',
'value': self.timer_callback.get_startup_time(start_time_sec)
}) })
else: else:
metrics.append({ metrics.append({
...@@ -125,17 +117,17 @@ class DetectionBenchmarkBase(tf.test.Benchmark): ...@@ -125,17 +117,17 @@ class DetectionBenchmarkBase(tf.test.Benchmark):
class RetinanetBenchmarkBase(DetectionBenchmarkBase): class RetinanetBenchmarkBase(DetectionBenchmarkBase):
"""Base class to hold methods common to test classes in the module.""" """Base class to hold methods common to test classes in the module."""
def __init__(self, output_dir=None, **kwargs): def __init__(self, **kwargs):
self.train_data_path = COCO_TRAIN_DATA self.train_data_path = COCO_TRAIN_DATA
self.eval_data_path = COCO_EVAL_DATA self.eval_data_path = COCO_EVAL_DATA
self.eval_json_path = COCO_EVAL_JSON self.eval_json_path = COCO_EVAL_JSON
self.resnet_checkpoint_path = RESNET_CHECKPOINT_PATH self.resnet_checkpoint_path = RESNET_CHECKPOINT_PATH
super(RetinanetBenchmarkBase, self).__init__(**kwargs)
super(RetinanetBenchmarkBase, self).__init__(output_dir=output_dir)
def _run_detection_main(self): def _run_detection_main(self):
"""Starts detection job.""" """Starts detection job."""
if self.timer_callback: if self.timer_callback:
FLAGS.log_steps = 0 # prevent detection.run from adding the same callback
return detection.run(callbacks=[self.timer_callback]) return detection.run(callbacks=[self.timer_callback])
else: else:
return detection.run() return detection.run()
...@@ -149,37 +141,41 @@ class RetinanetAccuracy(RetinanetBenchmarkBase): ...@@ -149,37 +141,41 @@ class RetinanetAccuracy(RetinanetBenchmarkBase):
`benchmark_(number of gpus)_gpu_(dataset type)` format. `benchmark_(number of gpus)_gpu_(dataset type)` format.
""" """
def __init__(self, output_dir=TMP_DIR, **kwargs):
super(RetinanetAccuracy, self).__init__(output_dir=output_dir)
@benchmark_wrappers.enable_runtime_flags @benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self, min_ap=0.325, max_ap=0.35): def _run_and_report_benchmark(self,
params,
min_ap=0.325,
max_ap=0.35,
do_eval=True,
warmup=1):
"""Starts RetinaNet accuracy benchmark test.""" """Starts RetinaNet accuracy benchmark test."""
FLAGS.params_override = json.dumps(params)
# Need timer callback to measure performance
self.timer_callback = TimerCallback(
batch_size=params['train']['batch_size'],
log_steps=FLAGS.log_steps,
)
start_time_sec = time.time() start_time_sec = time.time()
FLAGS.mode = 'train' FLAGS.mode = 'train'
summary, _ = self._run_detection_main() summary, _ = self._run_detection_main()
wall_time_sec = time.time() - start_time_sec wall_time_sec = time.time() - start_time_sec
if do_eval:
FLAGS.mode = 'eval' FLAGS.mode = 'eval'
eval_metrics = self._run_detection_main() eval_metrics = self._run_detection_main()
summary.update(eval_metrics) summary.update(eval_metrics)
summary['train_batch_size'] = self.params_override['train']['batch_size'] summary['total_steps'] = params['train']['total_steps']
summary['total_steps'] = self.params_override['train']['total_steps'] self._report_benchmark(summary, start_time_sec, wall_time_sec, min_ap,
super(RetinanetAccuracy, self)._report_benchmark( max_ap, warmup)
stats=summary,
wall_time_sec=wall_time_sec,
min_ap=min_ap,
max_ap=max_ap,
train_batch_size=self.params_override['train']['batch_size'])
def _setup(self): def _setup(self):
super(RetinanetAccuracy, self)._setup() super(RetinanetAccuracy, self)._setup()
FLAGS.strategy_type = 'mirrored'
FLAGS.model = 'retinanet' FLAGS.model = 'retinanet'
self.params_override = { def _params(self):
return {
'train': { 'train': {
'batch_size': 64, 'batch_size': 64,
'iterations_per_loop': 100, 'iterations_per_loop': 100,
...@@ -189,6 +185,8 @@ class RetinanetAccuracy(RetinanetBenchmarkBase): ...@@ -189,6 +185,8 @@ class RetinanetAccuracy(RetinanetBenchmarkBase):
'path': self.resnet_checkpoint_path, 'path': self.resnet_checkpoint_path,
'prefix': 'resnet50/' 'prefix': 'resnet50/'
}, },
# Speed up ResNet training when loading from the checkpoint.
'frozen_variable_prefix': base_config.RESNET_FROZEN_VAR_PREFIX,
}, },
'eval': { 'eval': {
'batch_size': 8, 'batch_size': 8,
...@@ -202,13 +200,11 @@ class RetinanetAccuracy(RetinanetBenchmarkBase): ...@@ -202,13 +200,11 @@ class RetinanetAccuracy(RetinanetBenchmarkBase):
def benchmark_8_gpu_coco(self): def benchmark_8_gpu_coco(self):
"""Run RetinaNet model accuracy test with 8 GPUs.""" """Run RetinaNet model accuracy test with 8 GPUs."""
self._setup() self._setup()
params = copy.deepcopy(self.params_override) params = self._params()
FLAGS.params_override = json.dumps(params) FLAGS.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_coco') FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_coco')
# Sets timer_callback to None as we do not use it now. FLAGS.strategy_type = 'mirrored'
self.timer_callback = None self._run_and_report_benchmark(params)
self._run_and_report_benchmark()
class RetinanetBenchmarkReal(RetinanetAccuracy): class RetinanetBenchmarkReal(RetinanetAccuracy):
...@@ -219,15 +215,16 @@ class RetinanetBenchmarkReal(RetinanetAccuracy): ...@@ -219,15 +215,16 @@ class RetinanetBenchmarkReal(RetinanetAccuracy):
`benchmark_(number of gpus)_gpu` format. `benchmark_(number of gpus)_gpu` format.
""" """
def __init__(self, output_dir=TMP_DIR, **kwargs): def _setup(self):
super(RetinanetBenchmarkReal, self).__init__(output_dir=output_dir) super(RetinanetBenchmarkReal, self)._setup()
# Use negative value to avoid saving checkpoints.
FLAGS.save_checkpoint_freq = -1
@flagsaver.flagsaver @flagsaver.flagsaver
def benchmark_8_gpu_coco(self): def benchmark_8_gpu_coco(self):
"""Run RetinaNet model accuracy test with 8 GPUs.""" """Run RetinaNet model accuracy test with 8 GPUs."""
self.num_gpus = 8
self._setup() self._setup()
params = copy.deepcopy(self.params_override) params = self._params()
params['train']['total_steps'] = 1875 # One epoch. params['train']['total_steps'] = 1875 # One epoch.
# The iterations_per_loop must be one, otherwise the number of examples per # The iterations_per_loop must be one, otherwise the number of examples per
# second would be wrong. Currently only support calling callback per batch # second would be wrong. Currently only support calling callback per batch
...@@ -237,58 +234,52 @@ class RetinanetBenchmarkReal(RetinanetAccuracy): ...@@ -237,58 +234,52 @@ class RetinanetBenchmarkReal(RetinanetAccuracy):
# Related bug: b/135933080 # Related bug: b/135933080
params['train']['iterations_per_loop'] = 1 params['train']['iterations_per_loop'] = 1
params['eval']['eval_samples'] = 8 params['eval']['eval_samples'] = 8
FLAGS.num_gpus = self.num_gpus FLAGS.num_gpus = 8
FLAGS.params_override = json.dumps(params)
FLAGS.model_dir = self._get_model_dir('real_benchmark_8_gpu_coco') FLAGS.model_dir = self._get_model_dir('real_benchmark_8_gpu_coco')
# Use negative value to avoid saving checkpoints. FLAGS.strategy_type = 'mirrored'
FLAGS.save_checkpoint_freq = -1 self._run_and_report_benchmark(params)
if self.timer_callback is None:
logging.error('Cannot measure performance without timer callback')
else:
self._run_and_report_benchmark()
@flagsaver.flagsaver @flagsaver.flagsaver
def benchmark_1_gpu_coco(self): def benchmark_1_gpu_coco(self):
"""Run RetinaNet model accuracy test with 1 GPU.""" """Run RetinaNet model accuracy test with 1 GPU."""
self.num_gpus = 1
self._setup() self._setup()
params = copy.deepcopy(self.params_override) params = self._params()
params['train']['batch_size'] = 8 params['train']['batch_size'] = 8
params['train']['total_steps'] = 200 params['train']['total_steps'] = 200
params['train']['iterations_per_loop'] = 1 params['train']['iterations_per_loop'] = 1
params['eval']['eval_samples'] = 8 params['eval']['eval_samples'] = 8
FLAGS.num_gpus = self.num_gpus FLAGS.num_gpus = 1
FLAGS.params_override = json.dumps(params)
FLAGS.model_dir = self._get_model_dir('real_benchmark_1_gpu_coco') FLAGS.model_dir = self._get_model_dir('real_benchmark_1_gpu_coco')
FLAGS.strategy_type = 'one_device' FLAGS.strategy_type = 'one_device'
# Use negative value to avoid saving checkpoints. self._run_and_report_benchmark(params)
FLAGS.save_checkpoint_freq = -1
if self.timer_callback is None:
logging.error('Cannot measure performance without timer callback')
else:
self._run_and_report_benchmark()
@flagsaver.flagsaver @flagsaver.flagsaver
def benchmark_xla_1_gpu_coco(self): def benchmark_xla_1_gpu_coco(self):
"""Run RetinaNet model accuracy test with 1 GPU and XLA enabled.""" """Run RetinaNet model accuracy test with 1 GPU and XLA enabled."""
self.num_gpus = 1
self._setup() self._setup()
params = copy.deepcopy(self.params_override) params = self._params()
params['train']['batch_size'] = 8 params['train']['batch_size'] = 8
params['train']['total_steps'] = 200 params['train']['total_steps'] = 200
params['train']['iterations_per_loop'] = 1 params['train']['iterations_per_loop'] = 1
params['eval']['eval_samples'] = 8 params['eval']['eval_samples'] = 8
FLAGS.num_gpus = self.num_gpus FLAGS.num_gpus = 1
FLAGS.params_override = json.dumps(params) FLAGS.model_dir = self._get_model_dir('real_benchmark_xla_1_gpu_coco')
FLAGS.model_dir = self._get_model_dir('real_benchmark_1_gpu_coco')
FLAGS.strategy_type = 'one_device' FLAGS.strategy_type = 'one_device'
FLAGS.enable_xla = True FLAGS.enable_xla = True
# Use negative value to avoid saving checkpoints. self._run_and_report_benchmark(params)
FLAGS.save_checkpoint_freq = -1
if self.timer_callback is None: @flagsaver.flagsaver
logging.error('Cannot measure performance without timer callback') def benchmark_2x2_tpu_coco(self):
else: """Run RetinaNet model accuracy test with 4 TPUs."""
self._run_and_report_benchmark() self._setup()
params = self._params()
params['train']['batch_size'] = 64
params['train']['total_steps'] = 1875 # One epoch.
params['train']['iterations_per_loop'] = 500
FLAGS.model_dir = self._get_model_dir('real_benchmark_2x2_tpu_coco')
FLAGS.strategy_type = 'tpu'
self._run_and_report_benchmark(params, do_eval=False, warmup=0)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment