Commit 901c4cc4 authored by Vinh Nguyen's avatar Vinh Nguyen
Browse files

Merge remote-tracking branch 'upstream/master' into amp_resnet50

parents ef30de93 824ff2d6
......@@ -47,13 +47,18 @@ To make Official Models easier to use, we are planning to create a pip installab
* [bert](bert): A powerful pre-trained language representation model: BERT, which
stands for Bidirectional Encoder Representations from Transformers.
* [boosted_trees](boosted_trees): A Gradient Boosted Trees model to classify higgs boson process from HIGGS Data Set.
* [mnist](mnist): A basic model to classify digits from the MNIST dataset.
* [resnet](resnet): A deep residual network that can be used to classify both CIFAR-10 and ImageNet's dataset of 1000 classes.
* [transformer](transformer): A transformer model to translate the WMT English to German dataset.
* [wide_deep](wide_deep): A model that combines a wide model and deep network to classify census income data.
* More models to come!
Models that will not update to TensorFlow 2.x stay inside R1 directory:
* [boosted_trees](r1/boosted_trees): A Gradient Boosted Trees model to classify
higgs boson process from HIGGS Data Set.
If you would like to make any fixes or improvements to the models, please [submit a pull request](https://github.com/tensorflow/models/compare).
## New Models
......
......@@ -22,8 +22,8 @@ import time
from absl import flags
import tensorflow as tf # pylint: disable=g-bad-import-order
from official.resnet.keras import keras_benchmark
from official.resnet.keras import keras_cifar_main
from official.benchmark import keras_benchmark
from official.vision.image_classification import resnet_cifar_main
MIN_TOP_1_ACCURACY = 0.929
MAX_TOP_1_ACCURACY = 0.938
......@@ -47,7 +47,7 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
"""
self.data_dir = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME)
flag_methods = [keras_cifar_main.define_cifar_flags]
flag_methods = [resnet_cifar_main.define_cifar_flags]
super(Resnet56KerasAccuracy, self).__init__(
output_dir=output_dir, flag_methods=flag_methods)
......@@ -199,7 +199,7 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = keras_cifar_main.run(FLAGS)
stats = resnet_cifar_main.run(FLAGS)
wall_time_sec = time.time() - start_time_sec
super(Resnet56KerasAccuracy, self)._report_benchmark(
......@@ -215,7 +215,7 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
"""Short performance tests for ResNet56 via Keras and CIFAR-10."""
def __init__(self, output_dir=None, default_flags=None):
flag_methods = [keras_cifar_main.define_cifar_flags]
flag_methods = [resnet_cifar_main.define_cifar_flags]
super(Resnet56KerasBenchmarkBase, self).__init__(
output_dir=output_dir,
......@@ -224,7 +224,7 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = keras_cifar_main.run(FLAGS)
stats = resnet_cifar_main.run(FLAGS)
wall_time_sec = time.time() - start_time_sec
super(Resnet56KerasBenchmarkBase, self)._report_benchmark(
......@@ -248,6 +248,7 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.run_eagerly = False
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_xla')
......@@ -270,6 +271,7 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = False
FLAGS.run_eagerly = False
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
FLAGS.batch_size = 128
......@@ -340,6 +342,7 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
self._setup()
FLAGS.num_gpus = 2
FLAGS.enable_eager = True
FLAGS.run_eagerly = False
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu')
FLAGS.batch_size = 128 * 2 # 2 GPUs
......@@ -350,6 +353,7 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
self._setup()
FLAGS.num_gpus = 2
FLAGS.enable_eager = False
FLAGS.run_eagerly = False
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
FLAGS.batch_size = 128 * 2 # 2 GPUs
......
......@@ -21,8 +21,8 @@ import time
from absl import flags
import tensorflow as tf # pylint: disable=g-bad-import-order
from official.resnet.keras import keras_benchmark
from official.resnet.keras import keras_imagenet_main
from official.benchmark import keras_benchmark
from official.vision.image_classification import resnet_imagenet_main
MIN_TOP_1_ACCURACY = 0.76
MAX_TOP_1_ACCURACY = 0.77
......@@ -44,7 +44,7 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
named arguments before updating the constructor.
"""
flag_methods = [keras_imagenet_main.define_imagenet_keras_flags]
flag_methods = [resnet_imagenet_main.define_imagenet_keras_flags]
self.data_dir = os.path.join(root_data_dir, 'imagenet')
super(Resnet50KerasAccuracy, self).__init__(
......@@ -129,32 +129,6 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.use_tensor_lr = True
self._run_and_report_benchmark()
def benchmark_8_gpu_mlperf_like_tweaked(self):
"""Test similar to the rules for MLPerf 0.5.
Listed below are reasons this comparison is not to the MLSpec, but this is
still a decent directional measurement:
- Eval is every 4 epochs and again at the end. ~2 extra times.
- Learning rate is not tuned to hit 75%, but we know the model is correct.
- We measure total time and MLPerf 0.5 excluded some startup time.
- Eval is not on the total set, need to set eval batch_size where
8*batch_size/50K is even. 250 is a good number.
- Not sure if we are doing any extra or too few steps due to epoch bleed.
"""
self._setup()
FLAGS.num_gpus = 8
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 256 * 8
FLAGS.train_epochs = 61
FLAGS.epochs_between_evals = 4
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mlperf_like_tweaked')
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.enable_xla = True
FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private'
self._run_and_report_benchmark(top_1_min=0.736)
def benchmark_8_gpu_mlperf_like(self):
"""Test similar to the rules for MLPerf 0.5.
......@@ -201,7 +175,7 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
top_1_min=MIN_TOP_1_ACCURACY,
top_1_max=MAX_TOP_1_ACCURACY):
start_time_sec = time.time()
stats = keras_imagenet_main.run(flags.FLAGS)
stats = resnet_imagenet_main.run(flags.FLAGS)
wall_time_sec = time.time() - start_time_sec
super(Resnet50KerasAccuracy, self)._report_benchmark(
......@@ -220,7 +194,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
"""Resnet50 benchmarks."""
def __init__(self, output_dir=None, default_flags=None):
flag_methods = [keras_imagenet_main.define_imagenet_keras_flags]
flag_methods = [resnet_imagenet_main.define_imagenet_keras_flags]
super(Resnet50KerasBenchmarkBase, self).__init__(
output_dir=output_dir,
......@@ -229,7 +203,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = keras_imagenet_main.run(FLAGS)
stats = resnet_imagenet_main.run(FLAGS)
wall_time_sec = time.time() - start_time_sec
# Number of logged step time entries that are excluded in performance
# report. We keep results from last 100 batches in this case.
......@@ -294,48 +268,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 64
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly(self):
"""Forced v1 execution in tf.compile path and force eager."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly')
FLAGS.batch_size = 64
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly_tweaked(self):
"""Forced v1 execution in tf.compile path and force eager."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.explicit_gpu_placement = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly_tweaked')
FLAGS.batch_size = 64
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
"""No dist strat but forced v1 execution tf.compile path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v1_path')
FLAGS.batch_size = 128
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16(self):
"""Test with 1 GPU, no distribution strategy, fp16, run eagerly."""
self._setup()
......@@ -478,20 +410,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.tf_gpu_thread_mode = 'gpu_private'
self._run_and_report_benchmark()
def benchmark_xla_1_gpu_fp16_slack(self):
"""Test Keras model tf.data's experimental_slack functionality."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_slack')
FLAGS.dtype = 'fp16'
FLAGS.batch_size = 256
FLAGS.tf_data_experimental_slack = True
self._run_and_report_benchmark()
def benchmark_xla_1_gpu_fp16_dynamic(self):
"""Test Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling."""
self._setup()
......@@ -570,21 +488,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.tf_gpu_thread_mode = 'gpu_private'
self._run_and_report_benchmark()
def benchmark_graph_xla_1_gpu_fp16_slack(self):
"""Test model in legacy graph with tf.data's experimental_slack."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = False
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir(
'benchmark_graph_xla_1_gpu_fp16_slack')
FLAGS.dtype = 'fp16'
FLAGS.batch_size = 256
FLAGS.tf_data_experimental_slack = True
self._run_and_report_benchmark()
def benchmark_8_gpu(self):
"""Test Keras model with 8 GPUs."""
self._setup()
......@@ -621,18 +524,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.datasets_num_private_threads = 14
self._run_and_report_benchmark()
def benchmark_8_gpu_slack(self):
"""Test Keras model with tf.data's experimental_slack and 8 GPUs."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_slack')
FLAGS.batch_size = 128 * 8 # 8 GPUs
FLAGS.tf_data_experimental_slack = True
self._run_and_report_benchmark()
def benchmark_xla_8_gpu(self):
"""Test Keras model with XLA and 8 GPUs."""
self._setup()
......@@ -715,24 +606,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.tf_gpu_thread_mode = 'gpu_private'
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_optional_next(self):
"""Test Keras model with XLA, 8 GPUs and fp16.
This test also enables get_next_as_optional.
"""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir(
'benchmark_xla_8_gpu_fp16_optional_next')
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.enable_get_next_as_optional = True
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16(self):
"""Test Keras model with XLA, 8 GPUs and fp16."""
self._setup()
......@@ -782,44 +655,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.train_steps = 310
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_tweaked_optional_next(self):
"""Test Keras model with manual config tuning, XLA, 8 GPUs, fp16.
This test also enables get_next_as_optional.
"""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir(
'benchmark_xla_8_gpu_fp16_tweaked_optional_next')
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.datasets_num_private_threads = 48
FLAGS.enable_get_next_as_optional = True
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_slack(self):
"""Test Keras model with XLA, 8 GPUs and fp16.
This test also enable tf.data's experimental_slack functionality.
"""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_slack')
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.tf_data_experimental_slack = True
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_dynamic_tweaked(self):
"""Test Keras model with config tuning, XLA, 8 GPUs and dynamic fp16."""
self._setup()
......@@ -838,24 +673,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.datasets_num_private_threads = 48
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_tensorboard_tweaked(self):
"""Test to track Tensorboard performance overhead."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir(
'benchmark_xla_8_gpu_fp16_tensorboard_tweaked')
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.datasets_num_private_threads = 48
FLAGS.enable_tensorboard = True
self._run_and_report_benchmark()
def benchmark_graph_8_gpu(self):
"""Test Keras model in legacy graph mode with 8 GPUs."""
self._setup()
......@@ -954,41 +771,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.train_steps = 310
self._run_and_report_benchmark()
def benchmark_graph_xla_8_gpu_fp16_tweaked_optional_next(self):
"""Test in legacy graph mode with manual config tuning, XLA, 8 GPUs, fp16.
This test also enables get_next_as_optional.
"""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = False
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir(
'benchmark_graph_xla_8_gpu_fp16_tweaked_optional_next')
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.enable_get_next_as_optional = True
self._run_and_report_benchmark()
def benchmark_graph_xla_8_gpu_fp16_slack(self):
"""Test legacy graph mode with tf.data's experimental_slack."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = False
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir(
'benchmark_graph_xla_8_gpu_fp16_slack')
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.tf_data_experimental_slack = True
self._run_and_report_benchmark()
def benchmark_graph_8_gpu_fp16_dynamic_tweaked(self):
"""Test graph Keras with config tuning, 8 GPUs and dynamic fp16."""
self._setup()
......@@ -1063,7 +845,7 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark):
"""Trivial model with real data benchmark tests."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
flag_methods = [keras_imagenet_main.define_imagenet_keras_flags]
flag_methods = [resnet_imagenet_main.define_imagenet_keras_flags]
def_flags = {}
def_flags['use_trivial_model'] = True
......@@ -1083,7 +865,7 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark):
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = keras_imagenet_main.run(FLAGS)
stats = resnet_imagenet_main.run(FLAGS)
wall_time_sec = time.time() - start_time_sec
super(TrivialKerasBenchmarkReal, self)._report_benchmark(
......@@ -1180,5 +962,96 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark):
log_steps=FLAGS.log_steps)
class Resnet50MultiWorkerKerasBenchmark(Resnet50KerasBenchmarkBase):
"""Resnet50 distributed benchmark tests with multiple workers."""
def __init__(self, output_dir=None, default_flags=None):
super(Resnet50MultiWorkerKerasBenchmark, self).__init__(
output_dir=output_dir, default_flags=default_flags)
def _benchmark_common(self, eager, num_workers, all_reduce_alg):
"""Common to all benchmarks in this class."""
self._setup()
num_gpus = 8
FLAGS.num_gpus = num_gpus
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = eager
FLAGS.enable_xla = False
FLAGS.distribution_strategy = 'multi_worker_mirrored'
FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.model_dir = self._get_model_dir(
'benchmark_graph_8_gpu_{}_worker_fp16_{}_tweaked'.format(
num_workers, all_reduce_alg))
FLAGS.batch_size = 256 * num_gpus * num_workers
FLAGS.all_reduce_alg = all_reduce_alg
self._run_and_report_benchmark()
def benchmark_graph_8_gpu_1_worker_fp16_ring_tweaked(self):
"""Legacy graph, 8 GPUs per worker, 1 worker, fp16, ring all-reduce."""
self._benchmark_common(eager=False, num_workers=1, all_reduce_alg='ring')
def benchmark_graph_8_gpu_1_worker_fp16_nccl_tweaked(self):
"""Legacy graph, 8 GPUs per worker, 1 worker, fp16, nccl all-reduce."""
self._benchmark_common(eager=False, num_workers=1, all_reduce_alg='nccl')
def benchmark_graph_8_gpu_2_workers_fp16_ring_tweaked(self):
"""Legacy graph, 8 GPUs per worker, 2 workers, fp16, ring all-reduce."""
self._benchmark_common(eager=False, num_workers=2, all_reduce_alg='ring')
def benchmark_graph_8_gpu_2_workers_fp16_nccl_tweaked(self):
"""Legacy graph, 8 GPUs per worker, 2 workers, fp16, nccl all-reduce."""
self._benchmark_common(eager=False, num_workers=2, all_reduce_alg='nccl')
def benchmark_graph_8_gpu_8_workers_fp16_ring_tweaked(self):
"""Legacy graph, 8 GPUs per worker, 8 workers, fp16, ring all-reduce."""
self._benchmark_common(eager=False, num_workers=8, all_reduce_alg='ring')
def benchmark_graph_8_gpu_8_workers_fp16_nccl_tweaked(self):
"""Legacy graph, 8 GPUs per worker, 8 workers, fp16, nccl all-reduce."""
self._benchmark_common(eager=False, num_workers=8, all_reduce_alg='nccl')
def benchmark_eager_8_gpu_1_worker_fp16_ring_tweaked(self):
"""Eager, 8 GPUs per worker, 1 worker, fp16, ring all-reduce."""
self._benchmark_common(eager=True, num_workers=1, all_reduce_alg='ring')
def benchmark_eager_8_gpu_1_worker_fp16_nccl_tweaked(self):
"""Eager, 8 GPUs per worker, 1 worker, fp16, nccl all-reduce."""
self._benchmark_common(eager=True, num_workers=1, all_reduce_alg='nccl')
def benchmark_eager_8_gpu_2_workers_fp16_ring_tweaked(self):
"""Eager, 8 GPUs per worker, 2 workers, fp16, ring all-reduce."""
self._benchmark_common(eager=True, num_workers=2, all_reduce_alg='ring')
def benchmark_eager_8_gpu_2_workers_fp16_nccl_tweaked(self):
"""Eager, 8 GPUs per worker, 2 workers, fp16, nccl all-reduce."""
self._benchmark_common(eager=True, num_workers=2, all_reduce_alg='nccl')
def benchmark_eager_8_gpu_8_workers_fp16_ring_tweaked(self):
"""Eager, 8 GPUs per worker, 8 workers, fp16, ring all-reduce."""
self._benchmark_common(eager=True, num_workers=8, all_reduce_alg='ring')
def benchmark_eager_8_gpu_8_workers_fp16_nccl_tweaked(self):
"""Eager, 8 GPUs per worker, 8 workers, fp16, nccl all-reduce."""
self._benchmark_common(eager=True, num_workers=8, all_reduce_alg='nccl')
class Resnet50MultiWorkerKerasBenchmarkSynth(Resnet50MultiWorkerKerasBenchmark):
"""Resnet50 multi-worker synthetic benchmark tests."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
def_flags = {}
def_flags['skip_eval'] = True
def_flags['report_accuracy_metrics'] = False
def_flags['use_synthetic_data'] = True
def_flags['train_steps'] = 110
def_flags['log_steps'] = 10
super(Resnet50MultiWorkerKerasBenchmarkSynth, self).__init__(
output_dir=output_dir, default_flags=def_flags)
if __name__ == '__main__':
tf.test.main()
# BERT (Bidirectional Encoder Representations from Transformers)
Note> Please do not create pull request. This model is still under development
and testing.
The academic paper which describes BERT in detail and provides full results on a
number of tasks can be found here: https://arxiv.org/abs/1810.04805.
......@@ -30,6 +27,31 @@ Our current released checkpoints are exactly the same as TF 1.x official BERT
repository, thus inside `BertConfig`, there is `backward_compatible=True`. We
are going to release new pre-trained checkpoints soon.
### Access to Pretrained Checkpoints
We provide checkpoints that are converted from [google-research/bert](https://github.com/google-research/bert),
in order to keep consistent with BERT paper.
* **[`BERT-Large, Uncased (Whole Word Masking)`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/tf_20/wwm_uncased_L-24_H-1024_A-16.tar.gz)**:
24-layer, 1024-hidden, 16-heads, 340M parameters
* **[`BERT-Large, Cased (Whole Word Masking)`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/tf_20/wwm_cased_L-24_H-1024_A-16.tar.gz)**:
24-layer, 1024-hidden, 16-heads, 340M parameters
* **[`BERT-Base, Uncased`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/tf_20/uncased_L-12_H-768_A-12.tar.gz)**:
12-layer, 768-hidden, 12-heads, 110M parameters
* **[`BERT-Large, Uncased`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/tf_20/uncased_L-24_H-1024_A-16.tar.gz)**:
24-layer, 1024-hidden, 16-heads, 340M parameters
* **[`BERT-Base, Cased`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/tf_20/cased_L-12_H-768_A-12.tar.gz)**:
12-layer, 768-hidden, 12-heads , 110M parameters
* **[`BERT-Large, Cased`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/tf_20/cased_L-24_H-1024_A-16.tar.gz)**:
24-layer, 1024-hidden, 16-heads, 340M parameters
We recommend to host checkpoints on Google Cloud storage buckets when you use
Cloud GPU/TPU. For example, in the following tutorial, we use:
```shell
export BERT_BASE_DIR=gs://cloud-tpu-checkpoints/bert/tf_20/uncased_L-24_H-1024_A-16
```
### Restoring from Checkpoints
`tf.train.Checkpoint` is used to manage model checkpoints in TF 2.0. To restore
......@@ -70,9 +92,9 @@ Second, you need to install TF 2.0 `tf-night` on your VM:
pip install tf-nightly-2.0-preview
```
Warning: More details TPU-specific set-up instructions and tutorial for TF 2.0
are coming. Note that this repo is not officially supported by Google Cloud TPU
team yet.
Warning: More details TPU-specific set-up instructions and tutorial should come
along with official TF 2.x release for TPU. Note that this repo is not officially
supported by Google Cloud TPU team yet.
## Process Datasets
......
......@@ -152,7 +152,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_squad')
FLAGS.train_batch_size = 4
FLAGS.train_batch_size = 3
self._run_and_report_benchmark()
......@@ -174,7 +174,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat_squad')
FLAGS.train_batch_size = 4
FLAGS.train_batch_size = 3
self._run_and_report_benchmark(use_ds=False)
......@@ -185,7 +185,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_eager_no_dist_strat_squad')
FLAGS.train_batch_size = 4
FLAGS.train_batch_size = 3
self._run_and_report_benchmark(use_ds=False, run_eagerly=True)
......@@ -195,7 +195,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._setup()
self.num_gpus = 2
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu_squad')
FLAGS.train_batch_size = 8
FLAGS.train_batch_size = 6
self._run_and_report_benchmark()
......@@ -205,7 +205,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._setup()
self.num_gpus = 4
FLAGS.model_dir = self._get_model_dir('benchmark_4_gpu_squad')
FLAGS.train_batch_size = 16
FLAGS.train_batch_size = 12
self._run_and_report_benchmark()
......@@ -215,7 +215,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._setup()
self.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squad')
FLAGS.train_batch_size = 32
FLAGS.train_batch_size = 24
self._run_and_report_benchmark()
......@@ -231,6 +231,19 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._run_and_report_benchmark()
def benchmark_1_gpu_xla_fp16(self):
"""Tests BERT SQuAD model performance with 1 GPU with XLA and FP16."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_xla_squad_fp16')
FLAGS.train_batch_size = 4
FLAGS.enable_xla = True
FLAGS.dtype = 'fp16'
FLAGS.loss_scale = 'dynamic'
self._run_and_report_benchmark()
def benchmark_2_gpu_fp16(self):
"""Tests BERT SQuAD model performance with 2 GPUs and FP16."""
......@@ -324,7 +337,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
self._setup()
self.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squad')
FLAGS.train_batch_size = 32
FLAGS.train_batch_size = 24
self._run_and_report_benchmark()
......
......@@ -25,18 +25,12 @@ from absl import logging
import tensorflow as tf
from tensorflow.python.util import object_identity
from official.utils.misc import distribution_utils
from official.utils.misc import tpu_lib
_SUMMARY_TXT = 'training_summary.txt'
_MIN_SUMMARY_STEPS = 10
def get_primary_cpu_task(use_remote_tpu=False):
"""Returns primary CPU task to which input pipeline Ops are put."""
# Remote Eager Borg job configures the TPU worker with job name 'worker'.
return '/job:worker' if use_remote_tpu else ''
def _save_checkpoint(checkpoint, model_dir, checkpoint_prefix):
"""Saves model to with provided checkpoint prefix."""
......@@ -195,7 +189,7 @@ def run_customized_training_loop(
# To reduce unnecessary send/receive input pipeline operation, we place input
# pipeline ops in worker task.
with tf.device(get_primary_cpu_task(use_remote_tpu)):
with tf.device(tpu_lib.get_primary_cpu_task(use_remote_tpu)):
train_iterator = _get_input_iterator(train_input_fn, strategy)
with distribution_utils.get_strategy_scope(strategy):
......
......@@ -276,6 +276,7 @@ class EmbeddingPostprocessor(tf.keras.layers.Layer):
max_position_embeddings=512,
dropout_prob=0.0,
initializer_range=0.02,
initializer=None,
**kwargs):
super(EmbeddingPostprocessor, self).__init__(**kwargs)
self.use_type_embeddings = use_type_embeddings
......@@ -285,6 +286,11 @@ class EmbeddingPostprocessor(tf.keras.layers.Layer):
self.dropout_prob = dropout_prob
self.initializer_range = initializer_range
if not initializer:
self.initializer = get_initializer(self.initializer_range)
else:
self.initializer = initializer
if self.use_type_embeddings and not self.token_type_vocab_size:
raise ValueError("If `use_type_embeddings` is True, then "
"`token_type_vocab_size` must be specified.")
......@@ -723,6 +729,15 @@ class TransformerBlock(tf.keras.layers.Layer):
name="output_layer_norm", axis=-1, epsilon=1e-12)
super(TransformerBlock, self).build(unused_input_shapes)
def common_layers(self):
"""Explicitly gets all layer objects inside a Transformer encoder block."""
return [
self.attention_layer, self.attention_output_dense,
self.attention_dropout, self.attention_layer_norm,
self.intermediate_dense, self.output_dense, self.output_dropout,
self.output_layer_norm
]
def __call__(self, input_tensor, attention_mask=None):
inputs = pack_inputs([input_tensor, attention_mask])
return super(TransformerBlock, self).__call__(inputs)
......
......@@ -35,8 +35,8 @@ from official.bert import model_saving_utils
from official.bert import model_training_utils
from official.bert import modeling
from official.bert import optimization
from official.bert import tpu_lib
from official.utils.misc import keras_utils
from official.utils.misc import tpu_lib
flags.DEFINE_enum(
'mode', 'train_and_eval', ['train_and_eval', 'export_only'],
......@@ -210,7 +210,7 @@ def run_bert(strategy, input_meta_data):
run_eagerly=FLAGS.run_eagerly)
if FLAGS.model_export_path:
with tf.device(model_training_utils.get_primary_cpu_task(use_remote_tpu)):
with tf.device(tpu_lib.get_primary_cpu_task(use_remote_tpu)):
model_saving_utils.export_bert_model(
FLAGS.model_export_path, model=trained_model)
return trained_model
......
......@@ -33,7 +33,7 @@ from official.bert import model_saving_utils
from official.bert import model_training_utils
from official.bert import modeling
from official.bert import optimization
from official.bert import tpu_lib
from official.utils.misc import tpu_lib
flags.DEFINE_string('input_files', None,
'File path to retrieve training data for pre-training.')
......
......@@ -36,8 +36,8 @@ from official.bert import modeling
from official.bert import optimization
from official.bert import squad_lib
from official.bert import tokenization
from official.bert import tpu_lib
from official.utils.misc import keras_utils
from official.utils.misc import tpu_lib
flags.DEFINE_bool('do_train', False, 'Whether to run training.')
flags.DEFINE_bool('do_predict', False, 'Whether to run eval on the dev set.')
......
# Keras Application Models Benchmark
## Overview
This provides a single scaffold to benchmark the Keras built-in application [models](https://keras.io/applications/). All the models are for image classification applications, and include:
- Xception
- VGG16
- VGG19
- ResNet50
- InceptionV3
- InceptionResNetV2
- MobileNet
- DenseNet
- NASNet
## Dataset
Synthetic dataset is used for the benchmark.
## Callbacks
Two custom callbacks are provided for model benchmarking: ExamplesPerSecondCallback and LoggingMetricCallback. For each callback, `epoch_based` and `batch_based` options are available to set the benchmark level. Check [model_callbacks.py](model_callbacks.py) for more details.
## Running Code
To benchmark a model, use `--model` to specify the model name. To perform the benchmark with eager execution, issue the following command:
```
python benchmark_main.py --model resnet50 --eager
```
Note that, if eager execution is enabled, only one GPU is utilized even if multiple GPUs are provided and multi_gpu_model is used.
To use distribution strategy in the benchmark, run the following:
```
python benchmark_main.py --model resnet50 --dist_strat
```
Currently, only one of the --eager and --dist_strat arguments can be defined, as DistributionStrategy is not supported in Eager execution now.
Arguments:
* `--model`: Which model to be benchmarked. The model name is defined as the keys of `MODELS` in [benchmark_main.py](benchmark_main.py).
* `--callbacks`: To specify a list of callbacks.
Use the `--help` or `-h` flag to get a full list of possible arguments.
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Benchmark on the keras built-in application models."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# pylint: disable=g-bad-import-order
import numpy as np
from absl import app as absl_app
from absl import flags
import tensorflow as tf
# pylint: enable=g-bad-import-order
from official.keras_application_models import dataset
from official.keras_application_models import model_callbacks
from official.utils.flags import core as flags_core
from official.utils.logs import logger
from official.utils.misc import distribution_utils
# Define a dictionary that maps model names to their model classes inside Keras
MODELS = {
"vgg16": tf.keras.applications.VGG16,
"vgg19": tf.keras.applications.VGG19,
"inceptionv3": tf.keras.applications.InceptionV3,
"xception": tf.keras.applications.Xception,
"resnet50": tf.keras.applications.ResNet50,
"inceptionresnetv2": tf.keras.applications.InceptionResNetV2,
"mobilenet": tf.keras.applications.MobileNet,
"densenet121": tf.keras.applications.DenseNet121,
"densenet169": tf.keras.applications.DenseNet169,
"densenet201": tf.keras.applications.DenseNet201,
"nasnetlarge": tf.keras.applications.NASNetLarge,
"nasnetmobile": tf.keras.applications.NASNetMobile,
}
def run_keras_model_benchmark(_):
"""Run the benchmark on keras model."""
# Ensure a valid model name was supplied via command line argument
if FLAGS.model not in MODELS.keys():
raise AssertionError("The --model command line argument should "
"be a key in the `MODELS` dictionary.")
# Check if eager execution is enabled
if FLAGS.eager:
tf.logging.info("Eager execution is enabled...")
tf.enable_eager_execution()
# Load the model
tf.logging.info("Benchmark on {} model...".format(FLAGS.model))
keras_model = MODELS[FLAGS.model]
# Get dataset
dataset_name = "ImageNet"
if FLAGS.use_synthetic_data:
tf.logging.info("Using synthetic dataset...")
dataset_name += "_Synthetic"
train_dataset = dataset.generate_synthetic_input_dataset(
FLAGS.model, FLAGS.batch_size)
val_dataset = dataset.generate_synthetic_input_dataset(
FLAGS.model, FLAGS.batch_size)
model = keras_model(weights=None)
else:
tf.logging.info("Using CIFAR-10 dataset...")
dataset_name = "CIFAR-10"
ds = dataset.Cifar10Dataset(FLAGS.batch_size)
train_dataset = ds.train_dataset
val_dataset = ds.test_dataset
model = keras_model(
weights=None, input_shape=ds.input_shape, classes=ds.num_classes)
num_gpus = flags_core.get_num_gpus(FLAGS)
distribution = None
# Use distribution strategy
if FLAGS.dist_strat:
distribution = distribution_utils.get_distribution_strategy(
distribution_strategy=FLAGS.distribution_strategy,
num_gpus=num_gpus)
elif num_gpus > 1:
# Run with multi_gpu_model
# If eager execution is enabled, only one GPU is utilized even if multiple
# GPUs are provided.
if FLAGS.eager:
tf.logging.warning(
"{} GPUs are provided, but only one GPU is utilized as "
"eager execution is enabled.".format(num_gpus))
model = tf.keras.utils.multi_gpu_model(model, gpus=num_gpus)
# Adam optimizer and some other optimizers doesn't work well with
# distribution strategy (b/113076709)
# Use GradientDescentOptimizer here
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
model.compile(loss="categorical_crossentropy",
optimizer=optimizer,
metrics=["accuracy"],
distribute=distribution)
# Create benchmark logger for benchmark logging
run_params = {
"batch_size": FLAGS.batch_size,
"synthetic_data": FLAGS.use_synthetic_data,
"train_epochs": FLAGS.train_epochs,
"num_train_images": FLAGS.num_train_images,
"num_eval_images": FLAGS.num_eval_images,
}
benchmark_logger = logger.get_benchmark_logger()
benchmark_logger.log_run_info(
model_name=FLAGS.model,
dataset_name=dataset_name,
run_params=run_params,
test_id=FLAGS.benchmark_test_id)
# Create callbacks that log metric values about the training and evaluation
callbacks = model_callbacks.get_model_callbacks(
FLAGS.callbacks,
batch_size=FLAGS.batch_size,
metric_logger=benchmark_logger)
# Train and evaluate the model
history = model.fit(
train_dataset,
epochs=FLAGS.train_epochs,
callbacks=callbacks,
validation_data=val_dataset,
steps_per_epoch=int(np.ceil(FLAGS.num_train_images / FLAGS.batch_size)),
validation_steps=int(np.ceil(FLAGS.num_eval_images / FLAGS.batch_size))
)
tf.logging.info("Logging the evaluation results...")
for epoch in range(FLAGS.train_epochs):
eval_results = {
"accuracy": history.history["val_acc"][epoch],
"loss": history.history["val_loss"][epoch],
tf.GraphKeys.GLOBAL_STEP: (epoch + 1) * np.ceil(
FLAGS.num_eval_images/FLAGS.batch_size)
}
benchmark_logger.log_evaluation_result(eval_results)
# Clear the session explicitly to avoid session delete error
tf.keras.backend.clear_session()
def define_keras_benchmark_flags():
"""Add flags for keras built-in application models."""
flags_core.define_base(hooks=False)
flags_core.define_performance()
flags_core.define_image()
flags_core.define_benchmark()
flags.adopt_module_key_flags(flags_core)
flags_core.set_defaults(
data_format="channels_last",
use_synthetic_data=True,
batch_size=32,
train_epochs=2)
flags.DEFINE_enum(
name="model", default=None,
enum_values=MODELS.keys(), case_sensitive=False,
help=flags_core.help_wrap(
"Model to be benchmarked."))
flags.DEFINE_integer(
name="num_train_images", default=1000,
help=flags_core.help_wrap(
"The number of synthetic images for training. The default value is "
"1000."))
flags.DEFINE_integer(
name="num_eval_images", default=50,
help=flags_core.help_wrap(
"The number of synthetic images for evaluation. The default value is "
"50."))
flags.DEFINE_boolean(
name="eager", default=False, help=flags_core.help_wrap(
"To enable eager execution. Note that if eager execution is enabled, "
"only one GPU is utilized even if multiple GPUs are provided and "
"multi_gpu_model is used."))
flags.DEFINE_boolean(
name="dist_strat", default=False, help=flags_core.help_wrap(
"To enable distribution strategy for model training and evaluation. "
"Number of GPUs used for distribution strategy can be set by the "
"argument --num_gpus."))
flags.DEFINE_list(
name="callbacks",
default=["ExamplesPerSecondCallback", "LoggingMetricCallback"],
help=flags_core.help_wrap(
"A list of (case insensitive) strings to specify the names of "
"callbacks. For example: `--callbacks ExamplesPerSecondCallback,"
"LoggingMetricCallback`"))
@flags.multi_flags_validator(
["eager", "dist_strat"],
message="Both --eager and --dist_strat were set. Only one can be "
"defined, as DistributionStrategy is not supported in Eager "
"execution currently.")
# pylint: disable=unused-variable
def _check_eager_dist_strat(flag_dict):
return not(flag_dict["eager"] and flag_dict["dist_strat"])
def main(_):
with logger.benchmark_context(FLAGS):
run_keras_model_benchmark(FLAGS)
if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO)
define_keras_benchmark_flags()
FLAGS = flags.FLAGS
absl_app.run(main)
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Prepare dataset for keras model benchmark."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from official.utils.misc import model_helpers # pylint: disable=g-bad-import-order
# Default values for dataset.
_NUM_CHANNELS = 3
_NUM_CLASSES = 1000
def _get_default_image_size(model):
"""Provide default image size for each model."""
image_size = (224, 224)
if model in ["inceptionv3", "xception", "inceptionresnetv2"]:
image_size = (299, 299)
elif model in ["nasnetlarge"]:
image_size = (331, 331)
return image_size
def generate_synthetic_input_dataset(model, batch_size):
"""Generate synthetic dataset."""
image_size = _get_default_image_size(model)
image_shape = (batch_size,) + image_size + (_NUM_CHANNELS,)
label_shape = (batch_size, _NUM_CLASSES)
dataset = model_helpers.generate_synthetic_data(
input_shape=tf.TensorShape(image_shape),
label_shape=tf.TensorShape(label_shape),
)
return dataset
class Cifar10Dataset(object):
"""CIFAR10 dataset, including train and test set.
Each sample consists of a 32x32 color image, and label is from 10 classes.
"""
def __init__(self, batch_size):
"""Initializes train/test datasets.
Args:
batch_size: int, the number of batch size.
"""
self.input_shape = (32, 32, 3)
self.num_classes = 10
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
y_train, y_test = y_train.astype(np.int64), y_test.astype(np.int64)
y_train = tf.keras.utils.to_categorical(y_train, self.num_classes)
y_test = tf.keras.utils.to_categorical(y_test, self.num_classes)
self.train_dataset = tf.data.Dataset.from_tensor_slices(
(x_train, y_train)).shuffle(2000).batch(batch_size).repeat()
self.test_dataset = tf.data.Dataset.from_tensor_slices(
(x_test, y_test)).shuffle(2000).batch(batch_size).repeat()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Callbacks for Keras built-in application models.
Note that, in the callbacks, the global_step is initialized in the __init__ of
each callback rather than on_train_begin. As on_train_begin gets called in
the fit_loop, and it will be reset with each call to fit(). To keep the
global_step persistent across all training sessions, it should be initialized in
the __init__.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
import tensorflow as tf # pylint: disable=g-bad-import-order
from official.utils.logs import logger
# Metrics to log after each batch and epoch
_PER_BATCH_METRICS = {
"loss": "train_loss",
"acc": "train_accuracy",
}
_PER_EPOCH_METRICS = {
"loss": "train_loss",
"acc": "train_accuracy",
"val_loss": "loss",
"val_acc": "accuracy"
}
class ExamplesPerSecondCallback(tf.keras.callbacks.Callback):
"""ExamplesPerSecond callback.
This callback records the average_examples_per_sec and
current_examples_per_sec during training.
"""
def __init__(self, batch_size, every_n_steps=1, metric_logger=None):
self._batch_size = batch_size
self._every_n_steps = every_n_steps
self._logger = metric_logger or logger.BaseBenchmarkLogger()
self._global_step = 0 # Initialize it in __init__
super(ExamplesPerSecondCallback, self).__init__()
def on_train_begin(self, logs=None):
self._train_start_time = time.time()
self._last_recorded_time = time.time()
def on_batch_end(self, batch, logs=None):
"""Log the examples_per_sec metric every_n_steps."""
self._global_step += 1
current_time = time.time()
if self._global_step % self._every_n_steps == 0:
average_examples_per_sec = self._batch_size * (
self._global_step / (current_time - self._train_start_time))
self._logger.log_metric(
"average_examples_per_sec", average_examples_per_sec,
global_step=self._global_step)
current_examples_per_sec = self._batch_size * (
self._every_n_steps / (current_time - self._last_recorded_time))
self._logger.log_metric(
"current_examples_per_sec", current_examples_per_sec,
global_step=self._global_step)
self._last_recorded_time = current_time # Update last_recorded_time
class LoggingMetricCallback(tf.keras.callbacks.Callback):
"""LoggingMetric callback.
Log the predefined _PER_BATCH_METRICS after each batch, and log the predefined
_PER_EPOCH_METRICS after each epoch.
"""
def __init__(self, metric_logger=None):
self._logger = metric_logger or logger.BaseBenchmarkLogger()
self._per_batch_metrics = _PER_BATCH_METRICS
self._per_epoch_metrics = _PER_EPOCH_METRICS
self._global_step = 0 # Initialize it in __init__
super(LoggingMetricCallback, self).__init__()
def on_batch_end(self, batch, logs=None):
"""Log metrics after each batch."""
self._global_step += 1
for metric in _PER_BATCH_METRICS:
self._logger.log_metric(
_PER_BATCH_METRICS[metric],
logs.get(metric),
global_step=self._global_step)
def on_epoch_end(self, epoch, logs=None):
"""Log metrics after each epoch."""
for metric in _PER_EPOCH_METRICS:
self._logger.log_metric(
_PER_EPOCH_METRICS[metric],
logs.get(metric),
global_step=self._global_step)
def get_model_callbacks(name_list, **kwargs):
"""Factory for getting a list of TensorFlow hooks for training by name.
Args:
name_list: a list of strings to name desired callback classes. Allowed:
ExamplesPerSecondCallback, LoggingMetricCallback, which are defined
as keys in CALLBACKS.
**kwargs: a dictionary of arguments to the callbacks.
Returns:
list of instantiated callbacks, ready to be used in a classifier.train call.
Raises:
ValueError: if an unrecognized name is passed.
"""
if not name_list:
return []
callbacks = []
for name in name_list:
callback_name = CALLBACKS.get(name.strip().lower())
if callback_name is None:
raise ValueError(
"Unrecognized training callback requested: {}".format(name))
else:
callbacks.append(callback_name(**kwargs))
return callbacks
def get_examples_per_second_callback(
every_n_steps=1, batch_size=32, metric_logger=None, **kwargs): # pylint: disable=unused-argument
"""Function to get ExamplesPerSecondCallback."""
return ExamplesPerSecondCallback(
batch_size=batch_size, every_n_steps=every_n_steps,
metric_logger=metric_logger or logger.get_benchmark_logger())
def get_logging_metric_callback(metric_logger=None, **kwargs): # pylint: disable=unused-argument
"""Function to get LoggingMetricCallback."""
return LoggingMetricCallback(
metric_logger=metric_logger or logger.get_benchmark_logger())
# A dictionary to map the callback name and its corresponding function
CALLBACKS = {
"examplespersecondcallback": get_examples_per_second_callback,
"loggingmetriccallback": get_logging_metric_callback,
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment