Commit 3d61d6b3 authored by qianyj's avatar qianyj
Browse files

initial files for ResNet50

parent d3a70caf
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for lr_schedule."""
from absl.testing import parameterized
import tensorflow as tf
from official.modeling.optimization import lr_schedule
class PowerAndLinearDecayTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
dict(
testcase_name='power_only',
init_lr=1.0,
power=-1.0,
linear_decay_fraction=0.0,
total_decay_steps=100,
offset=0,
expected=[[0, 1.0], [1, 1.0], [40, 1. / 40.], [60, 1. / 60],
[100, 1. / 100]]),
dict(
testcase_name='linear_only',
init_lr=1.0,
power=0.0,
linear_decay_fraction=1.0,
total_decay_steps=100,
offset=0,
expected=[[0, 1.0], [1, 0.99], [40, 0.6], [60, 0.4], [100, 0.0]]),
dict(
testcase_name='general',
init_lr=1.0,
power=-1.0,
linear_decay_fraction=0.5,
total_decay_steps=100,
offset=0,
expected=[[0, 1.0], [1, 1.0], [40, 1. / 40.],
[60, 1. / 60. * 0.8], [100, 0.0]]),
dict(
testcase_name='offset',
init_lr=1.0,
power=-1.0,
linear_decay_fraction=0.5,
total_decay_steps=100,
offset=90,
expected=[[0, 1.0], [90, 1.0], [91, 1.0], [130, 1. / 40.],
[150, 1. / 60. * 0.8], [190, 0.0], [200, 0.0]]),
)
def test_power_linear_lr_schedule(self, init_lr, power, linear_decay_fraction,
total_decay_steps, offset, expected):
lr = lr_schedule.PowerAndLinearDecay(
initial_learning_rate=init_lr,
power=power,
linear_decay_fraction=linear_decay_fraction,
total_decay_steps=total_decay_steps,
offset=offset)
for step, value in expected:
self.assertAlmostEqual(lr(step).numpy(), value)
class OffsetLearningRateTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(
dict(class_name=lr_schedule.PiecewiseConstantDecayWithOffset),
dict(class_name=lr_schedule.PolynomialDecayWithOffset),
dict(class_name=lr_schedule.ExponentialDecayWithOffset),
dict(class_name=lr_schedule.CosineDecayWithOffset),
)
def test_generated_docstring(self, class_name):
self.assertNotEmpty(class_name.__init__.__doc__)
@parameterized.parameters(
dict(
class_name=lr_schedule.PiecewiseConstantDecayWithOffset,
kwarg=dict(boundaries=[50, 80], values=[1.0, 0.5, 0.1])),
dict(
class_name=lr_schedule.PolynomialDecayWithOffset,
kwarg=dict(initial_learning_rate=1.0, decay_steps=100)),
dict(
class_name=lr_schedule.ExponentialDecayWithOffset,
kwarg=dict(
initial_learning_rate=1.0, decay_steps=100, decay_rate=0.5)),
dict(
class_name=lr_schedule.CosineDecayWithOffset,
kwarg=dict(initial_learning_rate=1.0, decay_steps=100)),
)
def test_offset(self, class_name, kwarg):
offset = 10
offset_lr = class_name(offset=offset, **kwarg)
base_lr = class_name.base_lr_class(**kwarg)
self.assertIsInstance(offset_lr, class_name)
for step in range(10, 101, 10):
self.assertEqual(offset_lr(step), base_lr(step - offset))
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Optimizer factory class."""
from typing import Callable, Optional, Union, List, Tuple
import gin
import tensorflow as tf
import tensorflow_addons.optimizers as tfa_optimizers
from official.modeling.optimization import slide_optimizer
from official.modeling.optimization import adafactor_optimizer
from official.modeling.optimization import ema_optimizer
from official.modeling.optimization import lars_optimizer
from official.modeling.optimization import lr_schedule
from official.modeling.optimization.configs import optimization_config as opt_cfg
from official.nlp import optimization as nlp_optimization
OPTIMIZERS_CLS = {
'sgd': tf.keras.optimizers.SGD,
'adam': tf.keras.optimizers.Adam,
'adamw': nlp_optimization.AdamWeightDecay,
'lamb': tfa_optimizers.LAMB,
'rmsprop': tf.keras.optimizers.RMSprop,
'lars': lars_optimizer.LARS,
'adagrad': tf.keras.optimizers.Adagrad,
'slide': slide_optimizer.SLIDE,
'adafactor': adafactor_optimizer.Adafactor,
}
LR_CLS = {
'stepwise': lr_schedule.PiecewiseConstantDecayWithOffset,
'polynomial': lr_schedule.PolynomialDecayWithOffset,
'exponential': lr_schedule.ExponentialDecayWithOffset,
'cosine': lr_schedule.CosineDecayWithOffset,
'power': lr_schedule.DirectPowerDecay,
'power_linear': lr_schedule.PowerAndLinearDecay,
'power_with_offset': lr_schedule.PowerDecayWithOffset,
'step_cosine_with_offset': lr_schedule.StepConsineDecayWithOffset,
}
WARMUP_CLS = {
'linear': lr_schedule.LinearWarmup,
'polynomial': lr_schedule.PolynomialWarmUp
}
def register_optimizer_cls(
key: str, optimizer_config_cls: tf.keras.optimizers.Optimizer):
"""Register customize optimizer cls.
The user will still need to subclass data classes in
configs.optimization_config to be used with OptimizerFactory.
Args:
key: A string to that the optimizer_config_cls is registered with.
optimizer_config_cls: A class which inherits tf.keras.optimizers.Optimizer.
"""
if key in OPTIMIZERS_CLS:
raise ValueError('%s already registered in OPTIMIZER_CLS.' % key)
OPTIMIZERS_CLS[key] = optimizer_config_cls
class OptimizerFactory:
"""Optimizer factory class.
This class builds learning rate and optimizer based on an optimization config.
To use this class, you need to do the following:
(1) Define optimization config, this includes optimizer, and learning rate
schedule.
(2) Initialize the class using the optimization config.
(3) Build learning rate.
(4) Build optimizer.
This is a typical example for using this class:
params = {
'optimizer': {
'type': 'sgd',
'sgd': {'momentum': 0.9}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {'boundaries': [10000, 20000],
'values': [0.1, 0.01, 0.001]}
},
'warmup': {
'type': 'linear',
'linear': {'warmup_steps': 500, 'warmup_learning_rate': 0.01}
}
}
opt_config = OptimizationConfig(params)
opt_factory = OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
optimizer = opt_factory.build_optimizer(lr)
"""
def __init__(self, config: opt_cfg.OptimizationConfig):
"""Initializing OptimizerFactory.
Args:
config: OptimizationConfig instance contain optimization config.
"""
self._config = config
self._optimizer_config = config.optimizer.get()
self._optimizer_type = config.optimizer.type
self._use_ema = config.ema is not None
self._ema_config = config.ema
if self._optimizer_config is None:
raise ValueError('Optimizer type must be specified')
self._lr_config = config.learning_rate.get()
self._lr_type = config.learning_rate.type
if self._lr_type is None:
raise ValueError('Learning rate type must be specified')
self._warmup_config = config.warmup.get()
self._warmup_type = config.warmup.type
def build_learning_rate(self):
"""Build learning rate.
Builds learning rate from config. Learning rate schedule is built according
to the learning rate config. If learning rate type is consant,
lr_config.learning_rate is returned.
Returns:
tf.keras.optimizers.schedules.LearningRateSchedule instance. If
learning rate type is consant, lr_config.learning_rate is returned.
"""
if self._lr_type == 'constant':
lr = self._lr_config.learning_rate
else:
lr = LR_CLS[self._lr_type](**self._lr_config.as_dict())
if self._warmup_config:
lr = WARMUP_CLS[self._warmup_type](lr, **self._warmup_config.as_dict())
return lr
@gin.configurable
def build_optimizer(
self,
lr: Union[tf.keras.optimizers.schedules.LearningRateSchedule, float],
gradient_transformers: Optional[List[Callable[
[List[Tuple[tf.Tensor, tf.Tensor]]], List[Tuple[tf.Tensor, tf.Tensor]]
]]] = None,
postprocessor: Optional[Callable[[tf.keras.optimizers.Optimizer],
tf.keras.optimizers.Optimizer]] = None):
"""Build optimizer.
Builds optimizer from config. It takes learning rate as input, and builds
the optimizer according to the optimizer config. Typically, the learning
rate built using self.build_lr() is passed as an argument to this method.
Args:
lr: A floating point value, or a
tf.keras.optimizers.schedules.LearningRateSchedule instance.
gradient_transformers: Optional list of functions to use to transform
gradients before applying updates to Variables. The functions are
applied after gradient_aggregator. The functions should accept and
return a list of (gradient, variable) tuples. clipvalue, clipnorm,
global_clipnorm should not be set when gradient_transformers is passed.
postprocessor: An optional function for postprocessing the optimizer. It
takes an optimizer and returns an optimizer.
Returns:
tf.keras.optimizers.Optimizer instance.
"""
optimizer_dict = self._optimizer_config.as_dict()
## Delete clipnorm, clipvalue, global_clipnorm if None
if optimizer_dict['clipnorm'] is None:
del optimizer_dict['clipnorm']
if optimizer_dict['clipvalue'] is None:
del optimizer_dict['clipvalue']
if optimizer_dict['global_clipnorm'] is None:
del optimizer_dict['global_clipnorm']
optimizer_dict['learning_rate'] = lr
if gradient_transformers is not None:
optimizer_dict['gradient_transformers'] = gradient_transformers
optimizer = OPTIMIZERS_CLS[self._optimizer_type](**optimizer_dict)
if self._use_ema:
optimizer = ema_optimizer.ExponentialMovingAverage(
optimizer, **self._ema_config.as_dict())
if postprocessor:
optimizer = postprocessor(optimizer)
assert isinstance(optimizer, tf.keras.optimizers.Optimizer), (
'OptimizerFactory.build_optimizer returning a non-optimizer object: '
'{}'.format(optimizer))
return optimizer
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for optimizer_factory.py."""
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from official.modeling.optimization import optimizer_factory
from official.modeling.optimization.configs import optimization_config
class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(('sgd'), ('rmsprop'), ('adam'), ('adamw'), ('lamb'),
('lars'), ('adagrad'))
def test_optimizers(self, optimizer_type):
params = {
'optimizer': {
'type': optimizer_type
},
'learning_rate': {
'type': 'constant',
'constant': {
'learning_rate': 0.1
}
}
}
optimizer_cls = optimizer_factory.OPTIMIZERS_CLS[optimizer_type]
expected_optimizer_config = optimizer_cls().get_config()
expected_optimizer_config['learning_rate'] = 0.1
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
optimizer = opt_factory.build_optimizer(lr, postprocessor=lambda x: x)
self.assertIsInstance(optimizer, optimizer_cls)
self.assertEqual(expected_optimizer_config, optimizer.get_config())
@parameterized.parameters((None, None), (1.0, None), (None, 1.0))
def test_gradient_clipping(self, clipnorm, clipvalue):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {
'clipnorm': clipnorm,
'clipvalue': clipvalue
}
},
'learning_rate': {
'type': 'constant',
'constant': {
'learning_rate': 1.0
}
}
}
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
optimizer = opt_factory.build_optimizer(lr)
var0 = tf.Variable([1.0, 2.0])
var1 = tf.Variable([3.0, 4.0])
grads0 = tf.constant([0.1, 0.1])
grads1 = tf.constant([2.0, 3.0])
grads_and_vars = list(zip([grads0, grads1], [var0, var1]))
optimizer.apply_gradients(grads_and_vars)
self.assertAllClose(np.array([0.9, 1.9]), var0.numpy())
if clipvalue is not None:
self.assertAllClose(np.array([2.0, 3.0]), var1.numpy())
elif clipnorm is not None:
self.assertAllClose(np.array([2.4452999, 3.1679497]), var1.numpy())
else:
self.assertAllClose(np.array([1.0, 1.0]), var1.numpy())
def test_missing_types(self):
params = {'optimizer': {'type': 'sgd', 'sgd': {'momentum': 0.9}}}
with self.assertRaises(ValueError):
optimizer_factory.OptimizerFactory(
optimization_config.OptimizationConfig(params))
params = {
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [10000, 20000],
'values': [0.1, 0.01, 0.001]
}
}
}
with self.assertRaises(ValueError):
optimizer_factory.OptimizerFactory(
optimization_config.OptimizationConfig(params))
# TODO(b/187559334) refactor lr_schedule tests into `lr_schedule_test.py`.
def test_stepwise_lr_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [10000, 20000],
'values': [0.1, 0.01, 0.001]
}
}
}
expected_lr_step_values = [[0, 0.1], [5000, 0.1], [10000, 0.1],
[10001, 0.01], [20000, 0.01], [20001, 0.001]]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
def test_stepwise_lr_with_warmup_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [10000, 20000],
'values': [0.1, 0.01, 0.001]
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 500,
'warmup_learning_rate': 0.01
}
}
}
expected_lr_step_values = [[0, 0.01], [250, 0.055], [500, 0.1], [5500, 0.1],
[10000, 0.1], [10001, 0.01], [20000, 0.01],
[20001, 0.001]]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
def test_exponential_lr_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'exponential',
'exponential': {
'initial_learning_rate': 0.1,
'decay_steps': 1000,
'decay_rate': 0.96,
'staircase': True
}
}
}
expected_lr_step_values = [
[0, 0.1],
[999, 0.1],
[1000, 0.096],
[1999, 0.096],
[2000, 0.09216],
]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
def test_polynomial_lr_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'polynomial',
'polynomial': {
'initial_learning_rate': 0.1,
'decay_steps': 1000,
'end_learning_rate': 0.001
}
}
}
expected_lr_step_values = [[0, 0.1], [500, 0.0505], [1000, 0.001]]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
def test_cosine_lr_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'cosine',
'cosine': {
'initial_learning_rate': 0.1,
'decay_steps': 1000
}
}
}
expected_lr_step_values = [[0, 0.1], [250, 0.08535534], [500, 0.04999999],
[750, 0.01464466], [1000, 0]]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
def test_constant_lr_with_warmup_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'constant',
'constant': {
'learning_rate': 0.1
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 500,
'warmup_learning_rate': 0.01
}
}
}
expected_lr_step_values = [[0, 0.01], [250, 0.055], [500, 0.1], [5000, 0.1],
[10000, 0.1], [20000, 0.1]]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
def test_stepwise_lr_with_polynomial_warmup_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [10000, 20000],
'values': [0.1, 0.01, 0.001]
}
},
'warmup': {
'type': 'polynomial',
'polynomial': {
'warmup_steps': 500,
'power': 2.
}
}
}
expected_lr_step_values = [[0, 0.0], [250, 0.025], [500, 0.1], [5500, 0.1],
[10000, 0.1], [10001, 0.01], [20000, 0.01],
[20001, 0.001]]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value, places=6)
def test_power_lr_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'power',
'power': {
'initial_learning_rate': 1.0,
'power': -1.0
}
}
}
expected_lr_step_values = [[0, 1.0], [1, 1.0], [250, 1. / 250.]]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
def test_power_linear_lr_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'power_linear',
'power_linear': {
'initial_learning_rate': 1.0,
'power': -1.0,
'linear_decay_fraction': 0.5,
'total_decay_steps': 100,
'offset': 0,
}
}
}
expected_lr_step_values = [[0, 1.0], [1, 1.0], [40, 1. / 40.],
[60, 1. / 60. * 0.8]]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
def test_power_with_offset_lr_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'power_with_offset',
'power_with_offset': {
'initial_learning_rate': 1.0,
'power': -1.0,
'offset': 10,
'pre_offset_learning_rate': 3.0,
}
}
}
expected_lr_step_values = [[1, 3.0], [10, 3.0], [20, 1. / 10.]]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
def test_step_cosine_lr_schedule_with_warmup(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'step_cosine_with_offset',
'step_cosine_with_offset': {
'values': (0.0001, 0.00005),
'boundaries': (0, 500000),
'offset': 10000,
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 10000,
'warmup_learning_rate': 0.0
}
}
}
expected_lr_step_values = [[0, 0.0], [5000, 1e-4/2.0], [10000, 1e-4],
[20000, 9.994863e-05], [499999, 5e-05]]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
class OptimizerFactoryRegistryTest(tf.test.TestCase):
def test_registry(self):
class MyClass():
pass
optimizer_factory.register_optimizer_cls('test', MyClass)
self.assertIn('test', optimizer_factory.OPTIMIZERS_CLS)
with self.assertRaisesRegex(ValueError, 'test already registered.*'):
optimizer_factory.register_optimizer_cls('test', MyClass)
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""SLIDE optimizer.
A new optimizer that will be open sourced soon.
"""
SLIDE = "Unimplemented"
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Functions and classes related to training performance."""
import tensorflow as tf
def configure_optimizer(optimizer,
use_float16=False,
use_graph_rewrite=False,
loss_scale=None):
"""Configures optimizer object with performance options."""
if use_float16:
if loss_scale in (None, 'dynamic'):
optimizer = tf.keras.mixed_precision.LossScaleOptimizer(optimizer)
else:
# loss_scale is a number. We interpret that as a fixed loss scale.
optimizer = tf.keras.mixed_precision.LossScaleOptimizer(
optimizer, dynamic=False, initial_scale=loss_scale)
if use_graph_rewrite:
# Note: the model dtype must be 'float32', which will ensure
# tf.keras.mixed_precision and enable_mixed_precision_graph_rewrite do not
# double up.
optimizer = (
tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
optimizer))
return optimizer
def set_mixed_precision_policy(dtype, loss_scale=None):
"""Sets the global `tf.keras.mixed_precision.Policy`."""
# TODO(b/191894773): Remove loss_scale argument
assert loss_scale is None, (
'The loss_scale argument must be None. The argument exists for '
'historical reasons and will be removed soon.')
if dtype == tf.float16:
tf.keras.mixed_precision.set_global_policy('mixed_float16')
elif dtype == tf.bfloat16:
tf.keras.mixed_precision.set_global_policy('mixed_bfloat16')
elif dtype == tf.float32:
tf.keras.mixed_precision.set_global_policy('float32')
else:
raise ValueError('Unexpected dtype: %s' % dtype)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Common TF utilities."""
import six
import tensorflow as tf
from tensorflow.python.util import deprecation
from official.modeling import activations
@deprecation.deprecated(
None,
"tf.keras.layers.Layer supports multiple positional args and kwargs as "
"input tensors. pack/unpack inputs to override __call__ is no longer "
"needed.")
def pack_inputs(inputs):
"""Pack a list of `inputs` tensors to a tuple.
Args:
inputs: a list of tensors.
Returns:
a tuple of tensors. if any input is None, replace it with a special constant
tensor.
"""
inputs = tf.nest.flatten(inputs)
outputs = []
for x in inputs:
if x is None:
outputs.append(tf.constant(0, shape=[], dtype=tf.int32))
else:
outputs.append(x)
return tuple(outputs)
@deprecation.deprecated(
None,
"tf.keras.layers.Layer supports multiple positional args and kwargs as "
"input tensors. pack/unpack inputs to override __call__ is no longer "
"needed.")
def unpack_inputs(inputs):
"""unpack a tuple of `inputs` tensors to a tuple.
Args:
inputs: a list of tensors.
Returns:
a tuple of tensors. if any input is a special constant tensor, replace it
with None.
"""
inputs = tf.nest.flatten(inputs)
outputs = []
for x in inputs:
if is_special_none_tensor(x):
outputs.append(None)
else:
outputs.append(x)
x = tuple(outputs)
# To trick the very pointless 'unbalanced-tuple-unpacking' pylint check
# from triggering.
if len(x) == 1:
return x[0]
return tuple(outputs)
def is_special_none_tensor(tensor):
"""Checks if a tensor is a special None Tensor."""
return tensor.shape.ndims == 0 and tensor.dtype == tf.int32
def get_activation(identifier, use_keras_layer=False):
"""Maps a identifier to a Python function, e.g., "relu" => `tf.nn.relu`.
It checks string first and if it is one of customized activation not in TF,
the corresponding activation will be returned. For non-customized activation
names and callable identifiers, always fallback to tf.keras.activations.get.
Prefers using keras layers when use_keras_layer=True. Now it only supports
'relu', 'linear', 'identity', 'swish'.
Args:
identifier: String name of the activation function or callable.
use_keras_layer: If True, use keras layer if identifier is allow-listed.
Returns:
A Python function corresponding to the activation function or a keras
activation layer when use_keras_layer=True.
"""
if isinstance(identifier, six.string_types):
identifier = str(identifier).lower()
if use_keras_layer:
keras_layer_allowlist = {
"relu": "relu",
"linear": "linear",
"identity": "linear",
"swish": "swish",
"sigmoid": "sigmoid",
"relu6": tf.nn.relu6,
}
if identifier in keras_layer_allowlist:
return tf.keras.layers.Activation(keras_layer_allowlist[identifier])
name_to_fn = {
"gelu": activations.gelu,
"simple_swish": activations.simple_swish,
"hard_swish": activations.hard_swish,
"relu6": activations.relu6,
"hard_sigmoid": activations.hard_sigmoid,
"identity": activations.identity,
}
if identifier in name_to_fn:
return tf.keras.activations.get(name_to_fn[identifier])
return tf.keras.activations.get(identifier)
def get_shape_list(tensor, expected_rank=None, name=None):
"""Returns a list of the shape of tensor, preferring static dimensions.
Args:
tensor: A tf.Tensor object to find the shape of.
expected_rank: (optional) int. The expected rank of `tensor`. If this is
specified and the `tensor` has a different rank, and exception will be
thrown.
name: Optional name of the tensor for the error message.
Returns:
A list of dimensions of the shape of tensor. All static dimensions will
be returned as python integers, and dynamic dimensions will be returned
as tf.Tensor scalars.
"""
if expected_rank is not None:
assert_rank(tensor, expected_rank, name)
shape = tensor.shape.as_list()
non_static_indexes = []
for (index, dim) in enumerate(shape):
if dim is None:
non_static_indexes.append(index)
if not non_static_indexes:
return shape
dyn_shape = tf.shape(tensor)
for index in non_static_indexes:
shape[index] = dyn_shape[index]
return shape
def assert_rank(tensor, expected_rank, name=None):
"""Raises an exception if the tensor rank is not of the expected rank.
Args:
tensor: A tf.Tensor to check the rank of.
expected_rank: Python integer or list of integers, expected rank.
name: Optional name of the tensor for the error message.
Raises:
ValueError: If the expected shape doesn't match the actual shape.
"""
expected_rank_dict = {}
if isinstance(expected_rank, six.integer_types):
expected_rank_dict[expected_rank] = True
else:
for x in expected_rank:
expected_rank_dict[x] = True
actual_rank = tensor.shape.ndims
if actual_rank not in expected_rank_dict:
raise ValueError(
"For the tensor `%s`, the actual tensor rank `%d` (shape = %s) is not "
"equal to the expected tensor rank `%s`" %
(name, actual_rank, str(tensor.shape), str(expected_rank)))
def safe_mean(losses):
"""Computes a safe mean of the losses.
Args:
losses: `Tensor` whose elements contain individual loss measurements.
Returns:
A scalar representing the mean of `losses`. If `num_present` is zero,
then zero is returned.
"""
total = tf.reduce_sum(losses)
num_elements = tf.cast(tf.size(losses), dtype=losses.dtype)
return tf.math.divide_no_nan(total, num_elements)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Flags which will be nearly universal across models."""
from absl import flags
import tensorflow as tf
from official.utils.flags._conventions import help_wrap
def define_base(data_dir=True,
model_dir=True,
clean=False,
train_epochs=False,
epochs_between_evals=False,
stop_threshold=False,
batch_size=True,
num_gpu=False,
hooks=False,
export_dir=False,
distribution_strategy=False,
run_eagerly=False):
"""Register base flags.
Args:
data_dir: Create a flag for specifying the input data directory.
model_dir: Create a flag for specifying the model file directory.
clean: Create a flag for removing the model_dir.
train_epochs: Create a flag to specify the number of training epochs.
epochs_between_evals: Create a flag to specify the frequency of testing.
stop_threshold: Create a flag to specify a threshold accuracy or other eval
metric which should trigger the end of training.
batch_size: Create a flag to specify the batch size.
num_gpu: Create a flag to specify the number of GPUs used.
hooks: Create a flag to specify hooks for logging.
export_dir: Create a flag to specify where a SavedModel should be exported.
distribution_strategy: Create a flag to specify which Distribution Strategy
to use.
run_eagerly: Create a flag to specify to run eagerly op by op.
Returns:
A list of flags for core.py to marks as key flags.
"""
key_flags = []
if data_dir:
flags.DEFINE_string(
name="data_dir",
short_name="dd",
default="/tmp",
help=help_wrap("The location of the input data."))
key_flags.append("data_dir")
if model_dir:
flags.DEFINE_string(
name="model_dir",
short_name="md",
default="/tmp",
help=help_wrap("The location of the model checkpoint files."))
key_flags.append("model_dir")
if clean:
flags.DEFINE_boolean(
name="clean",
default=False,
help=help_wrap("If set, model_dir will be removed if it exists."))
key_flags.append("clean")
if train_epochs:
flags.DEFINE_integer(
name="train_epochs",
short_name="te",
default=1,
help=help_wrap("The number of epochs used to train."))
key_flags.append("train_epochs")
if epochs_between_evals:
flags.DEFINE_integer(
name="epochs_between_evals",
short_name="ebe",
default=1,
help=help_wrap("The number of training epochs to run between "
"evaluations."))
key_flags.append("epochs_between_evals")
if stop_threshold:
flags.DEFINE_float(
name="stop_threshold",
short_name="st",
default=None,
help=help_wrap("If passed, training will stop at the earlier of "
"train_epochs and when the evaluation metric is "
"greater than or equal to stop_threshold."))
if batch_size:
flags.DEFINE_integer(
name="batch_size",
short_name="bs",
default=32,
help=help_wrap("Batch size for training and evaluation. When using "
"multiple gpus, this is the global batch size for "
"all devices. For example, if the batch size is 32 "
"and there are 4 GPUs, each GPU will get 8 examples on "
"each step."))
key_flags.append("batch_size")
if num_gpu:
flags.DEFINE_integer(
name="num_gpus",
short_name="ng",
default=1,
help=help_wrap("How many GPUs to use at each worker with the "
"DistributionStrategies API. The default is 1."))
if run_eagerly:
flags.DEFINE_boolean(
name="run_eagerly",
default=False,
help="Run the model op by op without building a model function.")
if hooks:
flags.DEFINE_list(
name="hooks",
short_name="hk",
default="LoggingTensorHook",
help=help_wrap(
u"A list of (case insensitive) strings to specify the names of "
u"training hooks. Example: `--hooks ProfilerHook,"
u"ExamplesPerSecondHook`\n See hooks_helper "
u"for details."))
key_flags.append("hooks")
if export_dir:
flags.DEFINE_string(
name="export_dir",
short_name="ed",
default=None,
help=help_wrap("If set, a SavedModel serialization of the model will "
"be exported to this directory at the end of training. "
"See the README for more details and relevant links."))
key_flags.append("export_dir")
if distribution_strategy:
flags.DEFINE_string(
name="distribution_strategy",
short_name="ds",
default="mirrored",
help=help_wrap("The Distribution Strategy to use for training. "
"Accepted values are 'off', 'one_device', "
"'mirrored', 'parameter_server', 'collective', "
"case insensitive. 'off' means not to use "
"Distribution Strategy; 'default' means to choose "
"from `MirroredStrategy` or `OneDeviceStrategy` "
"according to the number of GPUs."))
return key_flags
def get_num_gpus(flags_obj):
"""Treat num_gpus=-1 as 'use all'."""
if flags_obj.num_gpus != -1:
return flags_obj.num_gpus
from tensorflow.python.client import device_lib # pylint: disable=g-import-not-at-top
local_device_protos = device_lib.list_local_devices()
return sum([1 for d in local_device_protos if d.device_type == "GPU"])
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Flags for benchmarking models."""
from absl import flags
from official.utils.flags._conventions import help_wrap
def define_log_steps():
flags.DEFINE_integer(
name="log_steps",
default=100,
help="Frequency with which to log timing information with TimeHistory.")
return []
def define_benchmark(benchmark_log_dir=True, bigquery_uploader=True):
"""Register benchmarking flags.
Args:
benchmark_log_dir: Create a flag to specify location for benchmark logging.
bigquery_uploader: Create flags for uploading results to BigQuery.
Returns:
A list of flags for core.py to marks as key flags.
"""
key_flags = []
flags.DEFINE_enum(
name="benchmark_logger_type",
default="BaseBenchmarkLogger",
enum_values=["BaseBenchmarkLogger", "BenchmarkFileLogger"],
help=help_wrap("The type of benchmark logger to use. Defaults to using "
"BaseBenchmarkLogger which logs to STDOUT. Different "
"loggers will require other flags to be able to work."))
flags.DEFINE_string(
name="benchmark_test_id",
short_name="bti",
default=None,
help=help_wrap("The unique test ID of the benchmark run. It could be the "
"combination of key parameters. It is hardware "
"independent and could be used compare the performance "
"between different test runs. This flag is designed for "
"human consumption, and does not have any impact within "
"the system."))
define_log_steps()
if benchmark_log_dir:
flags.DEFINE_string(
name="benchmark_log_dir",
short_name="bld",
default=None,
help=help_wrap("The location of the benchmark logging."))
if bigquery_uploader:
flags.DEFINE_string(
name="gcp_project",
short_name="gp",
default=None,
help=help_wrap(
"The GCP project name where the benchmark will be uploaded."))
flags.DEFINE_string(
name="bigquery_data_set",
short_name="bds",
default="test_benchmark",
help=help_wrap(
"The Bigquery dataset name where the benchmark will be uploaded."))
flags.DEFINE_string(
name="bigquery_run_table",
short_name="brt",
default="benchmark_run",
help=help_wrap("The Bigquery table name where the benchmark run "
"information will be uploaded."))
flags.DEFINE_string(
name="bigquery_run_status_table",
short_name="brst",
default="benchmark_run_status",
help=help_wrap("The Bigquery table name where the benchmark run "
"status information will be uploaded."))
flags.DEFINE_string(
name="bigquery_metric_table",
short_name="bmt",
default="benchmark_metric",
help=help_wrap("The Bigquery table name where the benchmark metric "
"information will be uploaded."))
@flags.multi_flags_validator(
["benchmark_logger_type", "benchmark_log_dir"],
message="--benchmark_logger_type=BenchmarkFileLogger will require "
"--benchmark_log_dir being set")
def _check_benchmark_log_dir(flags_dict):
benchmark_logger_type = flags_dict["benchmark_logger_type"]
if benchmark_logger_type == "BenchmarkFileLogger":
return flags_dict["benchmark_log_dir"]
return True
return key_flags
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment