"vscode:/vscode.git/clone" did not exist on "06b1ec72efb8a7c8627996bd8562bc128e538fa0"
Unverified Commit c5e107ff authored by Hongkun Yu's avatar Hongkun Yu Committed by GitHub
Browse files

Remove benchmark folder from the master branch. They are stale. (#9085)

parent 266c7f43
# Lint as: python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utils to annotate and trace benchmarks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl import flags
from absl import logging
from absl.testing import flagsaver
FLAGS = flags.FLAGS
flags.DEFINE_multi_string(
'benchmark_method_flags', None,
'Optional list of runtime flags of the form key=value. Specify '
'multiple times to specify different flags. These will override the FLAGS '
'object directly after hardcoded settings in individual benchmark methods '
'before they call _run_and_report benchmark. Example if we set '
'--benchmark_method_flags=train_steps=10 and a benchmark method hardcodes '
'FLAGS.train_steps=10000 and later calls _run_and_report_benchmark, '
'it\'ll only run for 10 steps. This is useful for '
'debugging/profiling workflows.')
def enable_runtime_flags(decorated_func):
"""Sets attributes from --benchmark_method_flags for method execution.
@enable_runtime_flags decorator temporarily adds flags passed in via
--benchmark_method_flags and runs the decorated function in that context.
A user can set --benchmark_method_flags=train_steps=5 to run the benchmark
method in the snippet below with FLAGS.train_steps=5 for debugging (without
modifying the benchmark code).
class ModelBenchmark():
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self):
# run benchmark ...
# report benchmark results ...
def benchmark_method(self):
FLAGS.train_steps = 1000
...
self._run_and_report_benchmark()
Args:
decorated_func: The method that runs the benchmark after previous setup
execution that set some flags.
Returns:
new_func: The same method which executes in a temporary context where flag
overrides from --benchmark_method_flags are active.
"""
def runner(*args, **kwargs):
"""Creates a temporary context to activate --benchmark_method_flags."""
if FLAGS.benchmark_method_flags:
saved_flag_values = flagsaver.save_flag_values()
for key_value in FLAGS.benchmark_method_flags:
key, value = key_value.split('=', 1)
try:
numeric_float = float(value)
numeric_int = int(numeric_float)
if abs(numeric_int) == abs(numeric_float):
flag_value = numeric_int
else:
flag_value = numeric_float
except ValueError:
flag_value = value
logging.info('Setting --%s=%s', key, flag_value)
setattr(FLAGS, key, flag_value)
else:
saved_flag_values = None
try:
result = decorated_func(*args, **kwargs)
return result
finally:
if saved_flag_values:
flagsaver.restore_flag_values(saved_flag_values)
return runner
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes BERT benchmarks and accuracy tests."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import json
import math
import os
import time
# pylint: disable=g-bad-import-order
from absl import flags
from absl.testing import flagsaver
import tensorflow as tf
# pylint: enable=g-bad-import-order
from official.benchmark import bert_benchmark_utils as benchmark_utils
from official.benchmark import owner_utils
from official.nlp.bert import configs
from official.nlp.bert import run_classifier
from official.utils.misc import distribution_utils
from official.benchmark import benchmark_wrappers
# pylint: disable=line-too-long
PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_model.ckpt'
CLASSIFIER_TRAIN_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_train.tf_record'
CLASSIFIER_EVAL_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_eval.tf_record'
CLASSIFIER_INPUT_META_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_meta_data'
MODEL_CONFIG_FILE_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_config.json'
# pylint: enable=line-too-long
TMP_DIR = os.getenv('TMPDIR')
FLAGS = flags.FLAGS
class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
"""Base class to hold methods common to test classes in the module."""
def __init__(self, output_dir=None, tpu=None):
super(BertClassifyBenchmarkBase, self).__init__(output_dir, tpu=tpu)
self.num_epochs = None
self.num_steps_per_epoch = None
FLAGS.steps_per_loop = 1
@flagsaver.flagsaver
def _run_bert_classifier(self, callbacks=None, use_ds=True):
"""Starts BERT classification task."""
with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader:
input_meta_data = json.loads(reader.read().decode('utf-8'))
bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file)
epochs = self.num_epochs if self.num_epochs else FLAGS.num_train_epochs
if self.num_steps_per_epoch:
steps_per_epoch = self.num_steps_per_epoch
else:
train_data_size = input_meta_data['train_data_size']
steps_per_epoch = int(train_data_size / FLAGS.train_batch_size)
warmup_steps = int(epochs * steps_per_epoch * 0.1)
eval_steps = int(
math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))
if self.tpu:
strategy = distribution_utils.get_distribution_strategy(
distribution_strategy='tpu', tpu_address=self.tpu)
else:
strategy = distribution_utils.get_distribution_strategy(
distribution_strategy='mirrored' if use_ds else 'off',
num_gpus=self.num_gpus)
max_seq_length = input_meta_data['max_seq_length']
train_input_fn = run_classifier.get_dataset_fn(
FLAGS.train_data_path,
max_seq_length,
FLAGS.train_batch_size,
is_training=True)
eval_input_fn = run_classifier.get_dataset_fn(
FLAGS.eval_data_path,
max_seq_length,
FLAGS.eval_batch_size,
is_training=False)
_, summary = run_classifier.run_bert_classifier(
strategy,
bert_config,
input_meta_data,
FLAGS.model_dir,
epochs,
steps_per_epoch,
FLAGS.steps_per_loop,
eval_steps,
warmup_steps,
FLAGS.learning_rate,
FLAGS.init_checkpoint,
train_input_fn,
eval_input_fn,
training_callbacks=False,
custom_callbacks=callbacks)
return summary
class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
"""Short benchmark performance tests for BERT model.
Tests BERT classification performance in different GPU, TPU configurations.
The naming convention of below test cases follow
`benchmark_(number of gpus)_gpu_(dataset type)` for GPUs and
`benchmark_(topology)_tpu_(dataset type)` for TPUs.
"""
def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs):
super(BertClassifyBenchmarkReal, self).__init__(
output_dir=output_dir, tpu=tpu)
self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
self.bert_config_file = MODEL_CONFIG_FILE_PATH
self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
# Since we only care about performance metrics, we limit
# the number of training steps and epochs to prevent unnecessarily
# long tests.
self.num_steps_per_epoch = 100
self.num_epochs = 1
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self,
training_summary_path,
min_accuracy=0,
max_accuracy=1,
use_ds=True):
"""Starts BERT performance benchmark test."""
start_time_sec = time.time()
summary = self._run_bert_classifier(
callbacks=[self.timer_callback], use_ds=use_ds)
wall_time_sec = time.time() - start_time_sec
# Since we do not load from any pretrained checkpoints, we ignore all
# accuracy metrics.
summary.pop('eval_metrics', None)
summary['start_time_sec'] = start_time_sec
super(BertClassifyBenchmarkReal, self)._report_benchmark(
stats=summary,
wall_time_sec=wall_time_sec,
min_accuracy=min_accuracy,
max_accuracy=max_accuracy)
def benchmark_1_gpu_mrpc(self):
"""Test BERT model performance with 1 GPU."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.train_batch_size = 4
FLAGS.eval_batch_size = 4
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path)
def benchmark_1_gpu_mrpc_xla(self):
"""Test BERT model performance with 1 GPU."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_xla')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.train_batch_size = 4
FLAGS.eval_batch_size = 4
FLAGS.enable_xla = True
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path)
def benchmark_1_gpu_mrpc_no_dist_strat(self):
"""Test BERT model performance with 1 GPU, no distribution strategy."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_no_dist_strat')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.train_batch_size = 4
FLAGS.eval_batch_size = 4
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path, use_ds=False)
@owner_utils.Owner('tf-model-garden')
def benchmark_8_gpu_mrpc(self):
"""Test BERT model performance with 8 GPUs."""
self._setup()
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path)
def benchmark_1_gpu_amp_mrpc_no_dist_strat(self):
"""Performance for 1 GPU no DS with automatic mixed precision."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_amp_mrpc_no_dist_strat')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.train_batch_size = 4
FLAGS.eval_batch_size = 4
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path, use_ds=False)
def benchmark_8_gpu_amp_mrpc(self):
"""Test BERT model performance with 8 GPUs with automatic mixed precision."""
self._setup()
self.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp_mrpc')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.train_batch_size = 32
FLAGS.eval_batch_size = 32
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path, use_ds=False)
@owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu_mrpc(self):
"""Test BERT model performance with 2x2 TPU."""
self._setup()
FLAGS.steps_per_loop = 50
FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.train_batch_size = 32
FLAGS.eval_batch_size = 32
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path, use_ds=False)
class BertClassifyAccuracy(BertClassifyBenchmarkBase):
"""Short accuracy test for BERT model.
Tests BERT classification task model accuracy. The naming
convention of below test cases follow
`benchmark_(number of gpus)_gpu_(dataset type)` format.
"""
def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs):
self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
self.bert_config_file = MODEL_CONFIG_FILE_PATH
self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
super(BertClassifyAccuracy, self).__init__(output_dir=output_dir, tpu=tpu)
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self,
training_summary_path,
min_accuracy=0.84,
max_accuracy=0.88):
"""Starts BERT accuracy benchmark test."""
start_time_sec = time.time()
summary = self._run_bert_classifier(callbacks=[self.timer_callback])
wall_time_sec = time.time() - start_time_sec
super(BertClassifyAccuracy, self)._report_benchmark(
stats=summary,
wall_time_sec=wall_time_sec,
min_accuracy=min_accuracy,
max_accuracy=max_accuracy)
def _setup(self):
super(BertClassifyAccuracy, self)._setup()
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.init_checkpoint = self.pretrained_checkpoint_path
@owner_utils.Owner('tf-model-garden')
def benchmark_8_gpu_mrpc(self):
"""Run BERT model accuracy test with 8 GPUs.
Due to comparatively small cardinality of MRPC dataset, training
accuracy metric has high variance between trainings. As so, we
set the wide range of allowed accuracy (84% to 88%).
"""
self._setup()
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc')
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path)
def benchmark_8_gpu_mrpc_xla(self):
"""Run BERT model accuracy test with 8 GPUs with XLA."""
self._setup()
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc_xla')
FLAGS.enable_xla = True
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path)
@owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu_mrpc(self):
"""Run BERT model accuracy test on 2x2 TPU."""
self._setup()
FLAGS.steps_per_loop = 50
FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc')
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path)
if __name__ == '__main__':
tf.test.main()
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions or classes shared between BERT benchmarks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
# pylint: disable=g-bad-import-order
import numpy as np
from absl import flags
import tensorflow as tf
# pylint: enable=g-bad-import-order
from official.utils.flags import core as flags_core
from official.benchmark.perfzero_benchmark import PerfZeroBenchmark
FLAGS = flags.FLAGS
class BenchmarkTimerCallback(tf.keras.callbacks.Callback):
"""Callback that records time it takes to run each batch."""
def __init__(self, num_batches_to_skip=10):
super(BenchmarkTimerCallback, self).__init__()
self.batch_start_times = {}
self.batch_stop_times = {}
def on_batch_begin(self, batch, logs=None):
self.batch_start_times[batch] = time.time()
def on_batch_end(self, batch, logs=None):
# If there are multiple steps_per_loop, the end batch index will not be the
# same as the starting index. Use the last starting index instead.
if batch not in self.batch_start_times:
batch = max(self.batch_start_times.keys())
self.batch_stop_times[batch] = time.time()
def get_examples_per_sec(self, batch_size, num_batches_to_skip=1):
batch_durations = []
for batch in self.batch_start_times:
if batch in self.batch_stop_times and batch >= num_batches_to_skip:
batch_durations.append(self.batch_stop_times[batch] -
self.batch_start_times[batch])
return batch_size / np.mean(batch_durations)
def get_startup_time(self, program_start_time):
return self.batch_start_times[0] - program_start_time
class BertBenchmarkBase(PerfZeroBenchmark):
"""Base class to hold methods common to test classes."""
local_flags = None
def __init__(self, output_dir=None, tpu=None, **kwargs):
super(BertBenchmarkBase, self).__init__(
output_dir=output_dir, tpu=tpu, **kwargs)
self.num_gpus = 8
self.timer_callback = None
def _setup(self):
"""Sets up and resets flags before each test."""
super(BertBenchmarkBase, self)._setup()
self.timer_callback = BenchmarkTimerCallback()
def _report_benchmark(self, stats, wall_time_sec, min_accuracy, max_accuracy):
"""Report benchmark results by writing to local protobuf file.
Args:
stats: dict returned from BERT models with known entries.
wall_time_sec: the during of the benchmark execution in seconds
min_accuracy: Minimum classification accuracy constraint to verify
correctness of the model.
max_accuracy: Maximum classification accuracy constraint to verify
correctness of the model.
"""
metrics = [{
'name': 'training_loss',
'value': stats['train_loss'],
}]
if self.timer_callback:
metrics.append({
'name':
'exp_per_second',
'value':
self.timer_callback.get_examples_per_sec(FLAGS.train_batch_size *
FLAGS.steps_per_loop)
})
else:
metrics.append({
'name': 'exp_per_second',
'value': 0.0,
})
if self.timer_callback and 'start_time_sec' in stats:
metrics.append({
'name': 'startup_time',
'value': self.timer_callback.get_startup_time(stats['start_time_sec'])
})
if 'eval_metrics' in stats:
metrics.append({
'name': 'eval_accuracy',
'value': stats['eval_metrics'],
'min_value': min_accuracy,
'max_value': max_accuracy,
})
flags_str = flags_core.get_nondefault_flags_as_str()
self.report_benchmark(
iters=stats['total_training_steps'],
wall_time=wall_time_sec,
metrics=metrics,
extras={'flags': flags_str})
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes benchmark testing for bert pretraining."""
# pylint: disable=line-too-long
from __future__ import print_function
import json
import os
import time
from typing import Optional
from absl import flags
from absl import logging
import tensorflow as tf # pylint: disable=g-bad-import-order
from official.benchmark import benchmark_wrappers
from official.benchmark import bert_benchmark_utils
from official.benchmark import owner_utils
from official.nlp.bert import run_pretraining
from official.utils.flags import core as flags_core
from official.utils.misc import distribution_utils
# Pretrain masked lanauge modeling accuracy range:
MIN_MLM_ACCURACY = 0.635
MAX_MLM_ACCURACY = 0.645
# Pretrain next sentence prediction accuracy range:
MIN_NSP_ACCURACY = 0.94
MAX_NSP_ACCURACY = 0.96
BERT_PRETRAIN_FILES_SEQ128 = 'gs://mlcompass-data/bert/pretraining_data/seq_128/wikipedia.tfrecord*,gs://mlcompass-data/bert/pretraining_data/seq_128/books.tfrecord*'
BERT_BASE_CONFIG_FILE = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-12_H-768_A-12/bert_config.json'
FLAGS = flags.FLAGS
class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase):
"""Benchmark accuracy tests for BERT Pretraining."""
def __init__(self,
output_dir: Optional[str] = None,
tpu: Optional[str] = None,
**kwargs):
"""Inits BertPretrainAccuracyBenchmark class.
Args:
output_dir: Directory where to output e.g. log files
tpu: TPU name to use in a TPU benchmark.
**kwargs: Additional keyword arguments.
"""
super(BertPretrainAccuracyBenchmark, self).__init__(
output_dir=output_dir, tpu=tpu, **kwargs)
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self, summary_path: str, report_accuracy: bool):
"""Runs and reports the benchmark given the provided configuration."""
distribution = distribution_utils.get_distribution_strategy(
distribution_strategy='tpu', tpu_address=self.tpu)
logging.info('Flags: %s', flags_core.get_nondefault_flags_as_str())
start_time_sec = time.time()
run_pretraining.run_bert_pretrain(
strategy=distribution, custom_callbacks=self.timer_callback)
wall_time_sec = time.time() - start_time_sec
with tf.io.gfile.GFile(summary_path, 'rb') as reader:
summary = json.loads(reader.read().decode('utf-8'))
self._report_benchmark(summary, start_time_sec, wall_time_sec,
report_accuracy)
def _report_benchmark(self, summary, start_time_sec, wall_time_sec,
report_accuracy):
metrics = [{
'name': 'train_loss',
'value': summary['train_loss'],
}, {
'name':
'exp_per_second',
'value':
self.timer_callback.get_examples_per_sec(FLAGS.train_batch_size *
FLAGS.steps_per_loop)
}, {
'name': 'startup_time',
'value': self.timer_callback.get_startup_time(start_time_sec)
}]
if report_accuracy:
metrics.extend([{
'name': 'masked_lm_accuracy',
'value': summary['masked_lm_accuracy'],
'min_value': MIN_MLM_ACCURACY,
'max_value': MAX_MLM_ACCURACY,
}, {
'name': 'next_sentence_accuracy',
'value': summary['next_sentence_accuracy'],
'min_value': MIN_NSP_ACCURACY,
'max_value': MAX_NSP_ACCURACY,
}])
self.report_benchmark(
iters=summary['total_training_steps'],
wall_time=wall_time_sec,
metrics=metrics,
extras={'flags': flags_core.get_nondefault_flags_as_str()})
def _specify_common_flags(self):
FLAGS.bert_config_file = BERT_BASE_CONFIG_FILE
FLAGS.train_batch_size = 512
FLAGS.learning_rate = 1e-4
FLAGS.warmup_steps = 10000
FLAGS.steps_per_loop = 10000
FLAGS.distribution_strategy = 'tpu'
FLAGS.input_files = BERT_PRETRAIN_FILES_SEQ128
FLAGS.max_seq_length = 128
FLAGS.max_predictions_per_seq = 20
FLAGS.dtype = 'bf16'
@owner_utils.Owner('tf-model-garden')
def benchmark_accuracy_8x8_tpu_bf16_seq128_500k_steps(self):
"""Test bert pretraining with 8x8 TPU for 500k steps."""
# This is used for accuracy test.
self._setup()
self._specify_common_flags()
FLAGS.num_steps_per_epoch = 500000
FLAGS.num_train_epochs = 1
FLAGS.model_dir = self._get_model_dir(
'benchmark_accuracy_8x8_tpu_bf16_seq128_500k_steps')
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
# Set train_summary_interval to -1 to disable training summary, because
# writing summary to gcs may fail and summaries are not needed for this
# accuracy benchmark test.
FLAGS.train_summary_interval = -1
self._run_and_report_benchmark(summary_path=summary_path,
report_accuracy=True)
@owner_utils.Owner('tf-model-garden')
def benchmark_perf_2x2_tpu_bf16_seq128_10k_steps(self):
"""Test bert pretraining with 2x2 TPU for 10000 steps."""
self._setup()
self._specify_common_flags()
FLAGS.num_steps_per_epoch = 5000
FLAGS.num_train_epochs = 2
FLAGS.train_batch_size = 128
FLAGS.model_dir = self._get_model_dir(
'benchmark_perf_2x2_tpu_bf16_seq128_10k_steps')
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
# Disable accuracy check.
self._run_and_report_benchmark(
summary_path=summary_path, report_accuracy=False)
@owner_utils.Owner('tf-model-garden')
def benchmark_perf_2x2_tpu_bf16_seq128_10k_steps_mlir(self):
"""Test bert pretraining with 2x2 TPU with MLIR for 10000 steps."""
self._setup()
self._specify_common_flags()
FLAGS.num_steps_per_epoch = 5000
FLAGS.num_train_epochs = 2
FLAGS.train_batch_size = 128
FLAGS.model_dir = self._get_model_dir(
'benchmark_perf_2x2_tpu_bf16_seq128_10k_steps_mlir')
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
tf.config.experimental.enable_mlir_bridge()
# Disable accuracy check.
self._run_and_report_benchmark(
summary_path=summary_path, report_accuracy=False)
@owner_utils.Owner('tf-model-garden')
def benchmark_perf_4x4_tpu_bf16_seq128_10k_steps(self):
"""Test bert pretraining with 4x4 TPU for 10000 steps."""
self._setup()
self._specify_common_flags()
FLAGS.num_steps_per_epoch = 5000
FLAGS.num_train_epochs = 2
FLAGS.model_dir = self._get_model_dir(
'benchmark_perf_4x4_tpu_bf16_seq128_10k_steps')
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
# Disable accuracy check.
self._run_and_report_benchmark(
summary_path=summary_path, report_accuracy=False)
@owner_utils.Owner('tf-model-garden')
def benchmark_perf_4x4_tpu_bf16_seq128_10k_steps_mlir(self):
"""Test bert pretraining with 4x4 TPU with MLIR for 10000 steps."""
self._setup()
self._specify_common_flags()
FLAGS.num_steps_per_epoch = 5000
FLAGS.num_train_epochs = 2
FLAGS.model_dir = self._get_model_dir(
'benchmark_perf_4x4_tpu_bf16_seq128_10k_steps_mlir')
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
tf.config.experimental.enable_mlir_bridge()
# Disable accuracy check.
self._run_and_report_benchmark(
summary_path=summary_path, report_accuracy=False)
@owner_utils.Owner('tf-model-garden')
def benchmark_perf_8x8_tpu_bf16_seq128_10k_steps(self):
"""Test bert pretraining with 8x8 TPU for 10000 steps."""
self._setup()
self._specify_common_flags()
FLAGS.num_steps_per_epoch = 5000
FLAGS.num_train_epochs = 2
FLAGS.model_dir = self._get_model_dir(
'benchmark_perf_8x8_tpu_bf16_seq128_10k_steps')
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
# Disable accuracy check.
self._run_and_report_benchmark(summary_path=summary_path,
report_accuracy=False)
if __name__ == '__main__':
tf.test.main()
This diff is collapsed.
[
{
"description": "The ID of the benchmark run, where this metric should tie to.",
"mode": "REQUIRED",
"name": "run_id",
"type": "STRING"
},
{
"description": "The name of the metric, which should be descriptive. E.g. training_loss, accuracy.",
"mode": "REQUIRED",
"name": "name",
"type": "STRING"
},
{
"description": "The unit of the metric. E.g. MB per sec.",
"mode": "NULLABLE",
"name": "unit",
"type": "STRING"
},
{
"description": "The value of the metric.",
"mode": "NULLABLE",
"name": "value",
"type": "FLOAT"
},
{
"description": "The timestamp when the metric is recorded.",
"mode": "REQUIRED",
"name": "timestamp",
"type": "TIMESTAMP"
},
{
"description": "The global step when this metric is recorded.",
"mode": "NULLABLE",
"name": "global_step",
"type": "INTEGER"
},
{
"description": "Free format metadata for the extra information about the metric.",
"mode": "REPEATED",
"name": "extras",
"type": "RECORD",
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "value",
"type": "STRING"
}
]
}
]
[
{
"description": "The UUID of the run for the benchmark.",
"mode": "REQUIRED",
"name": "model_id",
"type": "STRING"
},
{
"description": "The name of the model, E.g ResNet50, LeNet-5 etc.",
"mode": "REQUIRED",
"name": "model_name",
"type": "STRING"
},
{
"description": "The date when the test of the model is started",
"mode": "REQUIRED",
"name": "run_date",
"type": "TIMESTAMP"
},
{
"description": "The unique name for a test by the combination of key parameters, eg batch size, num of GPU, etc. It is hardware independent.",
"mode": "NULLABLE",
"name": "test_id",
"type": "STRING"
},
{
"description": "The tensorflow version information.",
"fields": [
{
"description": "Version of the tensorflow. E.g. 1.7.0-rc0",
"mode": "REQUIRED",
"name": "version",
"type": "STRING"
},
{
"description": "Git Hash of the tensorflow",
"mode": "NULLABLE",
"name": "git_hash",
"type": "STRING"
},
{
"description": "The channel of the tensorflow binary, eg, nightly, RC, final, custom.",
"mode": "NULLABLE",
"name": "channel",
"type": "STRING"
},
{
"description": "Identify anything special about the build, eg CUDA 10, NCCL, MKL, etc.",
"mode": "NULLABLE",
"name": "build_type",
"type": "STRING"
}
],
"mode": "REQUIRED",
"name": "tensorflow_version",
"type": "RECORD"
},
{
"description": "The arbitrary attribute of the model.",
"fields": [
{
"description": "The name of the attribute.",
"mode": "REQUIRED",
"name": "name",
"type": "STRING"
},
{
"description": "The value of the attribute.",
"mode": "NULLABLE",
"name": "value",
"type": "STRING"
}
],
"mode": "REPEATED",
"name": "attribute",
"type": "RECORD"
},
{
"description": "Environment variables when the benchmark run is executed.",
"fields": [
{
"description": "The name of the variable.",
"mode": "REQUIRED",
"name": "name",
"type": "STRING"
},
{
"description": "The value of the variable.",
"mode": "NULLABLE",
"name": "value",
"type": "STRING"
}
],
"mode": "REPEATED",
"name": "environment_variable",
"type": "RECORD"
},
{
"description": "TF Environment variables when the benchmark run is executed.",
"fields": [
{
"description": "The name of the variable.",
"mode": "REQUIRED",
"name": "name",
"type": "STRING"
},
{
"description": "The value of the variable.",
"mode": "NULLABLE",
"name": "value",
"type": "STRING"
}
],
"mode": "REPEATED",
"name": "tensorflow_environment_variables",
"type": "RECORD"
},
{
"description": "The list of parameters run with the model. It could contain hyperparameters or others.",
"fields": [
{
"description": "The name of the parameter.",
"mode": "REQUIRED",
"name": "name",
"type": "STRING"
},
{
"description": "The string value of the parameter.",
"mode": "NULLABLE",
"name": "string_value",
"type": "STRING"
},
{
"description": "The bool value of the parameter.",
"mode": "NULLABLE",
"name": "bool_value",
"type": "STRING"
},
{
"description": "The int/long value of the parameter.",
"mode": "NULLABLE",
"name": "long_value",
"type": "INTEGER"
},
{
"description": "The double/float value of parameter.",
"mode": "NULLABLE",
"name": "float_value",
"type": "FLOAT"
}
],
"mode": "REPEATED",
"name": "run_parameters",
"type": "RECORD"
},
{
"description": "The dataset that run with the benchmark.",
"mode": "NULLABLE",
"name": "dataset",
"type": "RECORD",
"fields": [
{
"description": "The name of the dataset that the model is trained/validated with. E.g ImageNet, mnist.",
"mode": "REQUIRED",
"name": "name",
"type": "STRING"
},
{
"description": "The arbitrary attribute of the dataset.",
"fields": [
{
"description": "The name of the attribute.",
"mode": "REQUIRED",
"name": "name",
"type": "STRING"
},
{
"description": "The value of the attribute.",
"mode": "NULLABLE",
"name": "value",
"type": "STRING"
}
],
"mode": "REPEATED",
"name": "attribute",
"type": "RECORD"
}
]
},
{
"description": "Used to differentiate from AWS, GCE or DGX-1 at a high level",
"mode": "NULLABLE",
"name": "test_environment",
"type": "STRING"
},
{
"description": "The machine configuration of the benchmark run.",
"mode": "NULLABLE",
"name": "machine_config",
"type": "RECORD",
"fields": [
{
"description": "The platform information of the benchmark run.",
"mode": "NULLABLE",
"name": "platform_info",
"type": "RECORD",
"fields": [
{
"description": "Eg: 64bit.",
"mode": "NULLABLE",
"name": "bits",
"type": "STRING"
},
{
"description": "Eg: ELF.",
"mode": "NULLABLE",
"name": "linkage",
"type": "STRING"
},
{
"description": "Eg: i386.",
"mode": "NULLABLE",
"name": "machine",
"type": "STRING"
},
{
"description": "Eg: 3.13.0-76-generic.",
"mode": "NULLABLE",
"name": "release",
"type": "STRING"
},
{
"description": "Eg: Linux.",
"mode": "NULLABLE",
"name": "system",
"type": "STRING"
},
{
"description": "Eg: #120-Ubuntu SMP Mon Jan 18 15:59:10 UTC 2016.",
"mode": "NULLABLE",
"name": "version",
"type": "STRING"
}
]
},
{
"description": "The CPU information of the benchmark run.",
"mode": "NULLABLE",
"name": "cpu_info",
"type": "RECORD",
"fields": [
{
"mode": "NULLABLE",
"name": "num_cores",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "num_cores_allowed",
"type": "INTEGER"
},
{
"description" : "How fast are those CPUs.",
"mode": "NULLABLE",
"name": "mhz_per_cpu",
"type": "FLOAT"
},
{
"description" : "Additional CPU info, Eg: Intel Ivybridge with HyperThreading (24 cores).",
"mode": "NULLABLE",
"name": "cpu_info",
"type": "STRING"
},
{
"description" : "What kind of cpu scaling is enabled on the host. Eg performance, ondemand, conservative, mixed.",
"mode": "NULLABLE",
"name": "cpu_governor",
"type": "STRING"
},
{
"description": "Cache size of the CPUs.",
"mode": "NULLABLE",
"name": "cache_size",
"type": "RECORD",
"fields": [
{
"mode": "NULLABLE",
"name": "level",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "size",
"type": "INTEGER"
}
]
}
]
},
{
"mode": "NULLABLE",
"name": "gpu_info",
"type": "RECORD",
"fields": [
{
"mode": "NULLABLE",
"name": "count",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "model",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "cuda_version",
"type": "STRING"
}
]
},
{
"description": "The cloud instance inforation if the benchmark run is executed on cloud",
"mode": "NULLABLE",
"name": "cloud_info",
"type": "RECORD",
"fields": [
{
"description": "The instance type, E.g. n1-standard-4.",
"mode": "NULLABLE",
"name": "instance_type",
"type": "STRING"
},
{
"description": "The arbitrary attribute of the cloud info.",
"fields": [
{
"description": "The name of the attribute.",
"mode": "REQUIRED",
"name": "name",
"type": "STRING"
},
{
"description": "The value of the attribute.",
"mode": "NULLABLE",
"name": "value",
"type": "STRING"
}
],
"mode": "REPEATED",
"name": "attribute",
"type": "RECORD"
}
]
},
{
"mode": "NULLABLE",
"name": "memory_total",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "memory_available",
"type": "STRING"
}
]
}
]
[
{
"description": "The UUID of the run for the benchmark.",
"mode": "REQUIRED",
"name": "run_id",
"type": "STRING"
},
{
"description": "The status of the run for the benchmark. Eg, running, failed, success",
"mode": "REQUIRED",
"name": "status",
"type": "STRING"
}
]
\ No newline at end of file
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from official.benchmark.perfzero_benchmark import PerfZeroBenchmark
from official.utils.flags import core as flags_core
class KerasBenchmark(PerfZeroBenchmark):
"""Base benchmark class with methods to simplify testing."""
def __init__(self,
output_dir=None,
default_flags=None,
flag_methods=None,
tpu=None):
super(KerasBenchmark, self).__init__(
output_dir=output_dir,
default_flags=default_flags,
flag_methods=flag_methods,
tpu=tpu)
def _report_benchmark(self,
stats,
wall_time_sec,
top_1_max=None,
top_1_min=None,
log_steps=None,
total_batch_size=None,
warmup=1,
start_time_sec=None):
"""Report benchmark results by writing to local protobuf file.
Args:
stats: dict returned from keras models with known entries.
wall_time_sec: the during of the benchmark execution in seconds
top_1_max: highest passing level for top_1 accuracy.
top_1_min: lowest passing level for top_1 accuracy.
log_steps: How often the log was created for stats['step_timestamp_log'].
total_batch_size: Global batch-size.
warmup: number of entries in stats['step_timestamp_log'] to ignore.
start_time_sec: the start time of the program in seconds since epoch
"""
metrics = []
if 'accuracy_top_1' in stats:
metrics.append({'name': 'accuracy_top_1',
'value': stats['accuracy_top_1'],
'min_value': top_1_min,
'max_value': top_1_max})
metrics.append({'name': 'top_1_train_accuracy',
'value': stats['training_accuracy_top_1']})
if (warmup and 'step_timestamp_log' in stats and
len(stats['step_timestamp_log']) > warmup):
# first entry in the time_log is start of step 1. The rest of the
# entries are the end of each step recorded
time_log = stats['step_timestamp_log']
elapsed = time_log[-1].timestamp - time_log[warmup].timestamp
num_examples = (
total_batch_size * log_steps * (len(time_log) - warmup - 1))
examples_per_sec = num_examples / elapsed
metrics.append({'name': 'exp_per_second',
'value': examples_per_sec})
if 'avg_exp_per_second' in stats:
metrics.append({'name': 'avg_exp_per_second',
'value': stats['avg_exp_per_second']})
if start_time_sec and 'step_timestamp_log' in stats:
time_log = stats['step_timestamp_log']
# time_log[0] is recorded at the beginning of the first step.
startup_time = time_log[0].timestamp - start_time_sec
metrics.append({'name': 'startup_time', 'value': startup_time})
flags_str = flags_core.get_nondefault_flags_as_str()
self.report_benchmark(
iters=-1,
wall_time=wall_time_sec,
metrics=metrics,
extras={'flags': flags_str})
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
from absl import flags
import tensorflow as tf # pylint: disable=g-bad-import-order
from official.benchmark import keras_benchmark
from official.benchmark import benchmark_wrappers
from official.benchmark.models import resnet_cifar_main
MIN_TOP_1_ACCURACY = 0.929
MAX_TOP_1_ACCURACY = 0.938
FLAGS = flags.FLAGS
CIFAR_DATA_DIR_NAME = 'cifar-10-batches-bin'
class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
"""Accuracy tests for ResNet56 Keras CIFAR-10."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
"""A benchmark class.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor.
"""
self.data_dir = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME)
flag_methods = [resnet_cifar_main.define_cifar_flags]
super(Resnet56KerasAccuracy, self).__init__(
output_dir=output_dir, flag_methods=flag_methods)
def _setup(self):
super(Resnet56KerasAccuracy, self)._setup()
FLAGS.use_tensor_lr = False
def benchmark_graph_1_gpu(self):
"""Test keras based model with Keras fit and distribution strategies."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
FLAGS.dtype = 'fp32'
self._run_and_report_benchmark()
def benchmark_1_gpu(self):
"""Test keras based model with eager and distribution strategies."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
self._run_and_report_benchmark()
def benchmark_cpu(self):
"""Test keras based model on CPU."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_cpu')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat(self):
"""Test keras based model on CPU without distribution strategies."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_cpu_no_dist_strat')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat_run_eagerly(self):
"""Test keras based model on CPU w/forced eager and no dist_strat."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir(
'benchmark_cpu_no_dist_strat_run_eagerly')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat(self):
"""Test keras based model with eager and no dist strat."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_run_eagerly(self):
"""Test keras based model w/forced eager and no dist_strat."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_run_eagerly')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_graph_1_gpu_no_dist_strat(self):
"""Test keras based model with Keras fit but not distribution strategies."""
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
FLAGS.dtype = 'fp32'
self._run_and_report_benchmark()
def benchmark_2_gpu(self):
"""Test keras based model with eager and distribution strategies."""
self._setup()
FLAGS.num_gpus = 2
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
self._run_and_report_benchmark()
def benchmark_graph_2_gpu(self):
"""Test keras based model with Keras fit and distribution strategies."""
self._setup()
FLAGS.num_gpus = 2
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
FLAGS.dtype = 'fp32'
self._run_and_report_benchmark()
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = resnet_cifar_main.run(FLAGS)
wall_time_sec = time.time() - start_time_sec
super(Resnet56KerasAccuracy, self)._report_benchmark(
stats,
wall_time_sec,
top_1_min=MIN_TOP_1_ACCURACY,
top_1_max=MAX_TOP_1_ACCURACY,
total_batch_size=FLAGS.batch_size,
log_steps=100)
class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
"""Short performance tests for ResNet56 via Keras and CIFAR-10."""
def __init__(self, output_dir=None, default_flags=None):
flag_methods = [resnet_cifar_main.define_cifar_flags]
super(Resnet56KerasBenchmarkBase, self).__init__(
output_dir=output_dir,
flag_methods=flag_methods,
default_flags=default_flags)
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = resnet_cifar_main.run(FLAGS)
wall_time_sec = time.time() - start_time_sec
super(Resnet56KerasBenchmarkBase, self)._report_benchmark(
stats,
wall_time_sec,
total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps)
def benchmark_1_gpu(self):
"""Test 1 gpu."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_1_gpu_xla(self):
"""Test 1 gpu with xla enabled."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.run_eagerly = False
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_xla')
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_graph_1_gpu(self):
"""Test 1 gpu graph."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = False
FLAGS.run_eagerly = False
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat(self):
"""Test 1 gpu without distribution strategies."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_graph_1_gpu_no_dist_strat(self):
"""Test 1 gpu graph mode without distribution strategies."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = False
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_run_eagerly(self):
"""Test 1 gpu without distribution strategy and forced eager."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 128
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_run_eagerly')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_2_gpu(self):
"""Test 2 gpu."""
self._setup()
FLAGS.num_gpus = 2
FLAGS.enable_eager = True
FLAGS.run_eagerly = False
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu')
FLAGS.batch_size = 128 * 2 # 2 GPUs
self._run_and_report_benchmark()
def benchmark_graph_2_gpu(self):
"""Test 2 gpu graph mode."""
self._setup()
FLAGS.num_gpus = 2
FLAGS.enable_eager = False
FLAGS.run_eagerly = False
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
FLAGS.batch_size = 128 * 2 # 2 GPUs
self._run_and_report_benchmark()
def benchmark_cpu(self):
"""Test cpu."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.enable_eager = True
FLAGS.model_dir = self._get_model_dir('benchmark_cpu')
FLAGS.batch_size = 128
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_graph_cpu(self):
"""Test cpu graph mode."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.enable_eager = False
FLAGS.model_dir = self._get_model_dir('benchmark_graph_cpu')
FLAGS.batch_size = 128
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat_run_eagerly(self):
"""Test cpu without distribution strategy and forced eager."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.distribution_strategy = 'off'
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.model_dir = self._get_model_dir(
'benchmark_cpu_no_dist_strat_run_eagerly')
FLAGS.batch_size = 128
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat(self):
"""Test cpu without distribution strategies."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir('benchmark_cpu_no_dist_strat')
FLAGS.batch_size = 128
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_graph_cpu_no_dist_strat(self):
"""Test cpu graph mode without distribution strategies."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.enable_eager = False
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir('benchmark_graph_cpu_no_dist_strat')
FLAGS.batch_size = 128
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
class Resnet56KerasBenchmarkSynth(Resnet56KerasBenchmarkBase):
"""Synthetic benchmarks for ResNet56 and Keras."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
default_flags = {}
default_flags['skip_eval'] = True
default_flags['use_synthetic_data'] = True
default_flags['train_steps'] = 110
default_flags['log_steps'] = 10
default_flags['use_tensor_lr'] = False
super(Resnet56KerasBenchmarkSynth, self).__init__(
output_dir=output_dir, default_flags=default_flags)
class Resnet56KerasBenchmarkReal(Resnet56KerasBenchmarkBase):
"""Real data benchmarks for ResNet56 and Keras."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
default_flags = {}
default_flags['skip_eval'] = True
default_flags['data_dir'] = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME)
default_flags['train_steps'] = 110
default_flags['log_steps'] = 10
default_flags['use_tensor_lr'] = False
super(Resnet56KerasBenchmarkReal, self).__init__(
output_dir=output_dir, default_flags=default_flags)
if __name__ == '__main__':
tf.test.main()
This diff is collapsed.
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides utilities to Cifar-10 dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from absl import logging
import tensorflow as tf
from official.vision.image_classification.resnet import imagenet_preprocessing
HEIGHT = 32
WIDTH = 32
NUM_CHANNELS = 3
_DEFAULT_IMAGE_BYTES = HEIGHT * WIDTH * NUM_CHANNELS
# The record is the image plus a one-byte label
_RECORD_BYTES = _DEFAULT_IMAGE_BYTES + 1
# TODO(tobyboyd): Change to best practice 45K(train)/5K(val)/10K(test) splits.
NUM_IMAGES = {
'train': 50000,
'validation': 10000,
}
_NUM_DATA_FILES = 5
NUM_CLASSES = 10
def parse_record(raw_record, is_training, dtype):
"""Parses a record containing a training example of an image.
The input record is parsed into a label and image, and the image is passed
through preprocessing steps (cropping, flipping, and so on).
This method converts the label to one hot to fit the loss function.
Args:
raw_record: scalar Tensor tf.string containing a serialized
Example protocol buffer.
is_training: A boolean denoting whether the input is for training.
dtype: Data type to use for input images.
Returns:
Tuple with processed image tensor and one-hot-encoded label tensor.
"""
# Convert bytes to a vector of uint8 that is record_bytes long.
record_vector = tf.io.decode_raw(raw_record, tf.uint8)
# The first byte represents the label, which we convert from uint8 to int32
# and then to one-hot.
label = tf.cast(record_vector[0], tf.int32)
# The remaining bytes after the label represent the image, which we reshape
# from [depth * height * width] to [depth, height, width].
depth_major = tf.reshape(record_vector[1:_RECORD_BYTES],
[NUM_CHANNELS, HEIGHT, WIDTH])
# Convert from [depth, height, width] to [height, width, depth], and cast as
# float32.
image = tf.cast(tf.transpose(a=depth_major, perm=[1, 2, 0]), tf.float32)
image = preprocess_image(image, is_training)
image = tf.cast(image, dtype)
return image, label
def preprocess_image(image, is_training):
"""Preprocess a single image of layout [height, width, depth]."""
if is_training:
# Resize the image to add four extra pixels on each side.
image = tf.image.resize_with_crop_or_pad(
image, HEIGHT + 8, WIDTH + 8)
# Randomly crop a [HEIGHT, WIDTH] section of the image.
image = tf.image.random_crop(image, [HEIGHT, WIDTH, NUM_CHANNELS])
# Randomly flip the image horizontally.
image = tf.image.random_flip_left_right(image)
# Subtract off the mean and divide by the variance of the pixels.
image = tf.image.per_image_standardization(image)
return image
def get_filenames(is_training, data_dir):
"""Returns a list of filenames."""
assert tf.io.gfile.exists(data_dir), (
'Run cifar10_download_and_extract.py first to download and extract the '
'CIFAR-10 data.')
if is_training:
return [
os.path.join(data_dir, 'data_batch_%d.bin' % i)
for i in range(1, _NUM_DATA_FILES + 1)
]
else:
return [os.path.join(data_dir, 'test_batch.bin')]
def input_fn(is_training,
data_dir,
batch_size,
dtype=tf.float32,
datasets_num_private_threads=None,
parse_record_fn=parse_record,
input_context=None,
drop_remainder=False):
"""Input function which provides batches for train or eval.
Args:
is_training: A boolean denoting whether the input is for training.
data_dir: The directory containing the input data.
batch_size: The number of samples per batch.
dtype: Data type to use for images/features
datasets_num_private_threads: Number of private threads for tf.data.
parse_record_fn: Function to use for parsing the records.
input_context: A `tf.distribute.InputContext` object passed in by
`tf.distribute.Strategy`.
drop_remainder: A boolean indicates whether to drop the remainder of the
batches. If True, the batch dimension will be static.
Returns:
A dataset that can be used for iteration.
"""
filenames = get_filenames(is_training, data_dir)
dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES)
if input_context:
logging.info(
'Sharding the dataset: input_pipeline_id=%d num_input_pipelines=%d',
input_context.input_pipeline_id, input_context.num_input_pipelines)
dataset = dataset.shard(input_context.num_input_pipelines,
input_context.input_pipeline_id)
return imagenet_preprocessing.process_record_dataset(
dataset=dataset,
is_training=is_training,
batch_size=batch_size,
shuffle_buffer=NUM_IMAGES['train'],
parse_record_fn=parse_record_fn,
dtype=dtype,
datasets_num_private_threads=datasets_num_private_threads,
drop_remainder=drop_remainder
)
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs a ResNet model on the Cifar-10 dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl import app
from absl import flags
from absl import logging
import numpy as np
import tensorflow as tf
from official.benchmark.models import cifar_preprocessing
from official.benchmark.models import resnet_cifar_model
from official.benchmark.models import synthetic_util
from official.utils.flags import core as flags_core
from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils
from official.vision.image_classification.resnet import common
LR_SCHEDULE = [ # (multiplier, epoch to start) tuples
(0.1, 91), (0.01, 136), (0.001, 182)
]
def learning_rate_schedule(current_epoch,
current_batch,
batches_per_epoch,
batch_size):
"""Handles linear scaling rule and LR decay.
Scale learning rate at epoch boundaries provided in LR_SCHEDULE by the
provided scaling factor.
Args:
current_epoch: integer, current epoch indexed from 0.
current_batch: integer, current batch in the current epoch, indexed from 0.
batches_per_epoch: integer, number of steps in an epoch.
batch_size: integer, total batch sized.
Returns:
Adjusted learning rate.
"""
del current_batch, batches_per_epoch # not used
initial_learning_rate = common.BASE_LEARNING_RATE * batch_size / 128
learning_rate = initial_learning_rate
for mult, start_epoch in LR_SCHEDULE:
if current_epoch >= start_epoch:
learning_rate = initial_learning_rate * mult
else:
break
return learning_rate
class LearningRateBatchScheduler(tf.keras.callbacks.Callback):
"""Callback to update learning rate on every batch (not epoch boundaries).
N.B. Only support Keras optimizers, not TF optimizers.
Attributes:
schedule: a function that takes an epoch index and a batch index as input
(both integer, indexed from 0) and returns a new learning rate as
output (float).
"""
def __init__(self, schedule, batch_size, steps_per_epoch):
super(LearningRateBatchScheduler, self).__init__()
self.schedule = schedule
self.steps_per_epoch = steps_per_epoch
self.batch_size = batch_size
self.epochs = -1
self.prev_lr = -1
def on_epoch_begin(self, epoch, logs=None):
if not hasattr(self.model.optimizer, 'learning_rate'):
raise ValueError('Optimizer must have a "learning_rate" attribute.')
self.epochs += 1
def on_batch_begin(self, batch, logs=None):
"""Executes before step begins."""
lr = self.schedule(self.epochs,
batch,
self.steps_per_epoch,
self.batch_size)
if not isinstance(lr, (float, np.float32, np.float64)):
raise ValueError('The output of the "schedule" function should be float.')
if lr != self.prev_lr:
self.model.optimizer.learning_rate = lr # lr should be a float here
self.prev_lr = lr
logging.debug(
'Epoch %05d Batch %05d: LearningRateBatchScheduler '
'change learning rate to %s.', self.epochs, batch, lr)
def run(flags_obj):
"""Run ResNet Cifar-10 training and eval loop using native Keras APIs.
Args:
flags_obj: An object containing parsed flag values.
Raises:
ValueError: If fp16 is passed as it is not currently supported.
Returns:
Dictionary of training and eval stats.
"""
keras_utils.set_session_config(
enable_xla=flags_obj.enable_xla)
# Execute flag override logic for better model performance
if flags_obj.tf_gpu_thread_mode:
keras_utils.set_gpu_thread_mode_and_count(
per_gpu_thread_count=flags_obj.per_gpu_thread_count,
gpu_thread_mode=flags_obj.tf_gpu_thread_mode,
num_gpus=flags_obj.num_gpus,
datasets_num_private_threads=flags_obj.datasets_num_private_threads)
common.set_cudnn_batchnorm_mode()
dtype = flags_core.get_tf_dtype(flags_obj)
if dtype == 'fp16':
raise ValueError('dtype fp16 is not supported in Keras. Use the default '
'value(fp32).')
data_format = flags_obj.data_format
if data_format is None:
data_format = ('channels_first' if tf.config.list_physical_devices('GPU')
else 'channels_last')
tf.keras.backend.set_image_data_format(data_format)
strategy = distribution_utils.get_distribution_strategy(
distribution_strategy=flags_obj.distribution_strategy,
num_gpus=flags_obj.num_gpus,
all_reduce_alg=flags_obj.all_reduce_alg,
num_packs=flags_obj.num_packs)
if strategy:
# flags_obj.enable_get_next_as_optional controls whether enabling
# get_next_as_optional behavior in DistributedIterator. If true, last
# partial batch can be supported.
strategy.extended.experimental_enable_get_next_as_optional = (
flags_obj.enable_get_next_as_optional
)
strategy_scope = distribution_utils.get_strategy_scope(strategy)
if flags_obj.use_synthetic_data:
synthetic_util.set_up_synthetic_data()
input_fn = common.get_synth_input_fn(
height=cifar_preprocessing.HEIGHT,
width=cifar_preprocessing.WIDTH,
num_channels=cifar_preprocessing.NUM_CHANNELS,
num_classes=cifar_preprocessing.NUM_CLASSES,
dtype=flags_core.get_tf_dtype(flags_obj),
drop_remainder=True)
else:
synthetic_util.undo_set_up_synthetic_data()
input_fn = cifar_preprocessing.input_fn
train_input_dataset = input_fn(
is_training=True,
data_dir=flags_obj.data_dir,
batch_size=flags_obj.batch_size,
parse_record_fn=cifar_preprocessing.parse_record,
datasets_num_private_threads=flags_obj.datasets_num_private_threads,
dtype=dtype,
# Setting drop_remainder to avoid the partial batch logic in normalization
# layer, which triggers tf.where and leads to extra memory copy of input
# sizes between host and GPU.
drop_remainder=(not flags_obj.enable_get_next_as_optional))
eval_input_dataset = None
if not flags_obj.skip_eval:
eval_input_dataset = input_fn(
is_training=False,
data_dir=flags_obj.data_dir,
batch_size=flags_obj.batch_size,
parse_record_fn=cifar_preprocessing.parse_record)
steps_per_epoch = (
cifar_preprocessing.NUM_IMAGES['train'] // flags_obj.batch_size)
lr_schedule = 0.1
if flags_obj.use_tensor_lr:
initial_learning_rate = common.BASE_LEARNING_RATE * flags_obj.batch_size / 128
lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(
boundaries=list(p[1] * steps_per_epoch for p in LR_SCHEDULE),
values=[initial_learning_rate] +
list(p[0] * initial_learning_rate for p in LR_SCHEDULE))
with strategy_scope:
optimizer = common.get_optimizer(lr_schedule)
model = resnet_cifar_model.resnet56(classes=cifar_preprocessing.NUM_CLASSES)
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
train_epochs = flags_obj.train_epochs
callbacks = common.get_callbacks()
if not flags_obj.use_tensor_lr:
lr_callback = LearningRateBatchScheduler(
schedule=learning_rate_schedule,
batch_size=flags_obj.batch_size,
steps_per_epoch=steps_per_epoch)
callbacks.append(lr_callback)
# if mutliple epochs, ignore the train_steps flag.
if train_epochs <= 1 and flags_obj.train_steps:
steps_per_epoch = min(flags_obj.train_steps, steps_per_epoch)
train_epochs = 1
num_eval_steps = (cifar_preprocessing.NUM_IMAGES['validation'] //
flags_obj.batch_size)
validation_data = eval_input_dataset
if flags_obj.skip_eval:
if flags_obj.set_learning_phase_to_train:
# TODO(haoyuzhang): Understand slowdown of setting learning phase when
# not using distribution strategy.
tf.keras.backend.set_learning_phase(1)
num_eval_steps = None
validation_data = None
if not strategy and flags_obj.explicit_gpu_placement:
# TODO(b/135607227): Add device scope automatically in Keras training loop
# when not using distribition strategy.
no_dist_strat_device = tf.device('/device:GPU:0')
no_dist_strat_device.__enter__()
history = model.fit(train_input_dataset,
epochs=train_epochs,
steps_per_epoch=steps_per_epoch,
callbacks=callbacks,
validation_steps=num_eval_steps,
validation_data=validation_data,
validation_freq=flags_obj.epochs_between_evals,
verbose=2)
eval_output = None
if not flags_obj.skip_eval:
eval_output = model.evaluate(eval_input_dataset,
steps=num_eval_steps,
verbose=2)
if not strategy and flags_obj.explicit_gpu_placement:
no_dist_strat_device.__exit__()
stats = common.build_stats(history, eval_output, callbacks)
return stats
def define_cifar_flags():
common.define_keras_flags(dynamic_loss_scale=False)
flags_core.set_defaults(data_dir='/tmp/cifar10_data/cifar-10-batches-bin',
model_dir='/tmp/cifar10_model',
epochs_between_evals=10,
batch_size=128)
def main(_):
return run(flags.FLAGS)
if __name__ == '__main__':
logging.set_verbosity(logging.INFO)
define_cifar_flags()
app.run(main)
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""ResNet56 model for Keras adapted from tf.keras.applications.ResNet50.
# Reference:
- [Deep Residual Learning for Image Recognition](
https://arxiv.org/abs/1512.03385)
Adapted from code contributed by BigMoyan.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import tensorflow as tf
from tensorflow.python.keras import backend
from tensorflow.python.keras import initializers
from tensorflow.python.keras import layers
from tensorflow.python.keras import regularizers
BATCH_NORM_DECAY = 0.997
BATCH_NORM_EPSILON = 1e-5
L2_WEIGHT_DECAY = 2e-4
def identity_building_block(input_tensor,
kernel_size,
filters,
stage,
block,
training=None):
"""The identity block is the block that has no conv layer at shortcut.
Arguments:
input_tensor: input tensor
kernel_size: default 3, the kernel size of
middle conv layer at main path
filters: list of integers, the filters of 3 conv layer at main path
stage: integer, current stage label, used for generating layer names
block: current block label, used for generating layer names
training: Only used if training keras model with Estimator. In other
scenarios it is handled automatically.
Returns:
Output tensor for the block.
"""
filters1, filters2 = filters
if backend.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = layers.Conv2D(filters1, kernel_size,
padding='same', use_bias=False,
kernel_initializer='he_normal',
kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
name=conv_name_base + '2a')(input_tensor)
x = layers.BatchNormalization(
axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON,
name=bn_name_base + '2a')(x, training=training)
x = layers.Activation('relu')(x)
x = layers.Conv2D(filters2, kernel_size,
padding='same', use_bias=False,
kernel_initializer='he_normal',
kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
name=conv_name_base + '2b')(x)
x = layers.BatchNormalization(
axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON,
name=bn_name_base + '2b')(x, training=training)
x = layers.add([x, input_tensor])
x = layers.Activation('relu')(x)
return x
def conv_building_block(input_tensor,
kernel_size,
filters,
stage,
block,
strides=(2, 2),
training=None):
"""A block that has a conv layer at shortcut.
Arguments:
input_tensor: input tensor
kernel_size: default 3, the kernel size of
middle conv layer at main path
filters: list of integers, the filters of 3 conv layer at main path
stage: integer, current stage label, used for generating layer names
block: current block label, used for generating layer names
strides: Strides for the first conv layer in the block.
training: Only used if training keras model with Estimator. In other
scenarios it is handled automatically.
Returns:
Output tensor for the block.
Note that from stage 3,
the first conv layer at main path is with strides=(2, 2)
And the shortcut should have strides=(2, 2) as well
"""
filters1, filters2 = filters
if tf.keras.backend.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = layers.Conv2D(filters1, kernel_size, strides=strides,
padding='same', use_bias=False,
kernel_initializer='he_normal',
kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
name=conv_name_base + '2a')(input_tensor)
x = layers.BatchNormalization(
axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON,
name=bn_name_base + '2a')(x, training=training)
x = layers.Activation('relu')(x)
x = layers.Conv2D(filters2, kernel_size, padding='same', use_bias=False,
kernel_initializer='he_normal',
kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
name=conv_name_base + '2b')(x)
x = layers.BatchNormalization(
axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON,
name=bn_name_base + '2b')(x, training=training)
shortcut = layers.Conv2D(filters2, (1, 1), strides=strides, use_bias=False,
kernel_initializer='he_normal',
kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
name=conv_name_base + '1')(input_tensor)
shortcut = layers.BatchNormalization(
axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON,
name=bn_name_base + '1')(shortcut, training=training)
x = layers.add([x, shortcut])
x = layers.Activation('relu')(x)
return x
def resnet_block(input_tensor,
size,
kernel_size,
filters,
stage,
conv_strides=(2, 2),
training=None):
"""A block which applies conv followed by multiple identity blocks.
Arguments:
input_tensor: input tensor
size: integer, number of constituent conv/identity building blocks.
A conv block is applied once, followed by (size - 1) identity blocks.
kernel_size: default 3, the kernel size of
middle conv layer at main path
filters: list of integers, the filters of 3 conv layer at main path
stage: integer, current stage label, used for generating layer names
conv_strides: Strides for the first conv layer in the block.
training: Only used if training keras model with Estimator. In other
scenarios it is handled automatically.
Returns:
Output tensor after applying conv and identity blocks.
"""
x = conv_building_block(input_tensor, kernel_size, filters, stage=stage,
strides=conv_strides, block='block_0',
training=training)
for i in range(size - 1):
x = identity_building_block(x, kernel_size, filters, stage=stage,
block='block_%d' % (i + 1), training=training)
return x
def resnet(num_blocks, classes=10, training=None):
"""Instantiates the ResNet architecture.
Arguments:
num_blocks: integer, the number of conv/identity blocks in each block.
The ResNet contains 3 blocks with each block containing one conv block
followed by (layers_per_block - 1) number of idenity blocks. Each
conv/idenity block has 2 convolutional layers. With the input
convolutional layer and the pooling layer towards the end, this brings
the total size of the network to (6*num_blocks + 2)
classes: optional number of classes to classify images into
training: Only used if training keras model with Estimator. In other
scenarios it is handled automatically.
Returns:
A Keras model instance.
"""
input_shape = (32, 32, 3)
img_input = layers.Input(shape=input_shape)
if backend.image_data_format() == 'channels_first':
x = layers.Lambda(lambda x: backend.permute_dimensions(x, (0, 3, 1, 2)),
name='transpose')(img_input)
bn_axis = 1
else: # channel_last
x = img_input
bn_axis = 3
x = layers.ZeroPadding2D(padding=(1, 1), name='conv1_pad')(x)
x = layers.Conv2D(16, (3, 3),
strides=(1, 1),
padding='valid', use_bias=False,
kernel_initializer='he_normal',
kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
name='conv1')(x)
x = layers.BatchNormalization(axis=bn_axis,
momentum=BATCH_NORM_DECAY,
epsilon=BATCH_NORM_EPSILON,
name='bn_conv1',)(x, training=training)
x = layers.Activation('relu')(x)
x = resnet_block(x, size=num_blocks, kernel_size=3, filters=[16, 16],
stage=2, conv_strides=(1, 1), training=training)
x = resnet_block(x, size=num_blocks, kernel_size=3, filters=[32, 32],
stage=3, conv_strides=(2, 2), training=training)
x = resnet_block(x, size=num_blocks, kernel_size=3, filters=[64, 64],
stage=4, conv_strides=(2, 2), training=training)
rm_axes = [1, 2] if backend.image_data_format() == 'channels_last' else [2, 3]
x = layers.Lambda(lambda x: backend.mean(x, rm_axes), name='reduce_mean')(x)
x = layers.Dense(classes,
activation='softmax',
kernel_initializer=initializers.RandomNormal(stddev=0.01),
kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
name='fc10')(x)
inputs = img_input
# Create model.
model = tf.keras.models.Model(inputs, x, name='resnet56')
return model
resnet20 = functools.partial(resnet, num_blocks=3)
resnet32 = functools.partial(resnet, num_blocks=5)
resnet56 = functools.partial(resnet, num_blocks=9)
resnet10 = functools.partial(resnet, num_blocks=110)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test the keras ResNet model with ImageNet data on TPU."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl.testing import parameterized
import tensorflow as tf
from official.benchmark.models import resnet_imagenet_main
from official.utils.testing import integration
from official.vision.image_classification.resnet import imagenet_preprocessing
class KerasImagenetTest(tf.test.TestCase, parameterized.TestCase):
"""Unit tests for Keras Models with ImageNet."""
_extra_flags_dict = {
"resnet": [
"-batch_size", "4",
"-train_steps", "1",
"-use_synthetic_data", "true"
"-model", "resnet50_v1.5",
"-optimizer", "resnet50_default",
],
"resnet_polynomial_decay": [
"-batch_size", "4",
"-train_steps", "1",
"-use_synthetic_data", "true",
"-model", "resnet50_v1.5",
"-optimizer", "resnet50_default",
"-pruning_method", "polynomial_decay",
],
}
_tempdir = None
@classmethod
def setUpClass(cls): # pylint: disable=invalid-name
super(KerasImagenetTest, cls).setUpClass()
resnet_imagenet_main.define_imagenet_keras_flags()
def setUp(self):
super(KerasImagenetTest, self).setUp()
imagenet_preprocessing.NUM_IMAGES["validation"] = 4
self.policy = \
tf.keras.mixed_precision.experimental.global_policy()
def tearDown(self):
super(KerasImagenetTest, self).tearDown()
tf.io.gfile.rmtree(self.get_temp_dir())
tf.keras.mixed_precision.experimental.set_policy(self.policy)
@parameterized.parameters([
"resnet",
# "resnet_polynomial_decay" b/151854314
])
def test_end_to_end_tpu(self, flags_key):
"""Test Keras model with TPU distribution strategy."""
extra_flags = [
"-distribution_strategy", "tpu",
"-data_format", "channels_last",
"-enable_checkpoint_and_export", "1",
]
extra_flags = extra_flags + self._extra_flags_dict[flags_key]
integration.run_synthetic(
main=resnet_imagenet_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags
)
@parameterized.parameters(["resnet"])
def test_end_to_end_tpu_bf16(self, flags_key):
"""Test Keras model with TPU and bfloat16 activation."""
extra_flags = [
"-distribution_strategy", "tpu",
"-data_format", "channels_last",
"-dtype", "bf16",
]
extra_flags = extra_flags + self._extra_flags_dict[flags_key]
integration.run_synthetic(
main=resnet_imagenet_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags
)
if __name__ == "__main__":
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment