Commit 7ee6089e authored by Yeqing Li's avatar Yeqing Li Committed by A. Unique TensorFlower
Browse files

Adds the offset argument to the supported learning rate.

PiperOrigin-RevId: 381301573
parent 169e4051
......@@ -56,10 +56,12 @@ class StepwiseLrConfig(base_config.Config):
values[0] [boundaries[0], boundaries[1]] -> values[1]
[boundaries[n-1], boundaries[n]] -> values[n] [boundaries[n],
end] -> values[n+1] Defaults to None.
offset: An int. The offset applied to steps. Defaults to 0.
"""
name: str = 'PiecewiseConstantDecay'
boundaries: Optional[List[int]] = None
values: Optional[List[float]] = None
offset: int = 0
@dataclasses.dataclass
......@@ -76,12 +78,14 @@ class ExponentialLrConfig(base_config.Config):
decay_rate: A float. Defaults to None.
staircase: A boolean, if true, learning rate is decreased at discreate
intervals. Defaults to False.
offset: An int. The offset applied to steps. Defaults to 0.
"""
name: str = 'ExponentialDecay'
initial_learning_rate: Optional[float] = None
decay_steps: Optional[int] = None
decay_rate: Optional[float] = None
staircase: Optional[bool] = None
offset: int = 0
@dataclasses.dataclass
......@@ -99,6 +103,7 @@ class PolynomialLrConfig(base_config.Config):
power: A float. The power of the polynomial. Defaults to linear, 1.0.
cycle: A boolean, whether or not it should cycle beyond decay_steps.
Defaults to False.
offset: An int. The offset applied to steps. Defaults to 0.
"""
name: str = 'PolynomialDecay'
initial_learning_rate: Optional[float] = None
......@@ -106,6 +111,7 @@ class PolynomialLrConfig(base_config.Config):
end_learning_rate: float = 0.0001
power: float = 1.0
cycle: bool = False
offset: int = 0
@dataclasses.dataclass
......@@ -122,11 +128,13 @@ class CosineLrConfig(base_config.Config):
to None.
alpha: A float. Minimum learning rate value as a fraction of
initial_learning_rate.
offset: An int. The offset applied to steps. Defaults to 0.
"""
name: str = 'CosineDecay'
initial_learning_rate: Optional[float] = None
decay_steps: Optional[int] = None
alpha: float = 0.0
offset: int = 0
@dataclasses.dataclass
......
......@@ -19,6 +19,75 @@ from typing import Mapping, Any, Union, Optional
import tensorflow as tf
def _make_offset_wrapper(new_class_name: str, base_lr_class):
"""Generates a offset wrapper of learning rate schedule.
It will returns a subclass of the the `base_lr_class`, the subclass takes an
`offset` argument in the constructor. When the new class instance is called,
the behavior is:
new_class_object(step) = base_lr_class_object(step - offset)
Example:
CosineDecayWithOffset = _make_offset_wrapper(
'CosineDecayWithOffset', tf.keras.experimental.CosineDecay)
# Use the lr:
lr = CosineDecayWithOffset(offset=100, initial_learning_rate=0.1,
decay_steps=1000)
lr(101) # equals to tf.keras.experimental.CosineDecay(...)(101-100)
Args:
new_class_name: the name of the new class.
base_lr_class: the base learning rate schedule class. Should be subclass of
tf.keras.optimizers.schedules.LearningRateSchedule
Returns:
A new class (subclass of the base_lr_class) that can take an offset.
"""
assert issubclass(base_lr_class,
tf.keras.optimizers.schedules.LearningRateSchedule), (
"base_lr_class should be subclass of keras "
f"LearningRateSchedule, got {base_lr_class}")
# pylint: disable=protected-access,pointless-statement
def offset_learning_rate_init(self, offset=0, **kwargs):
"""Construct learning rate schedule object.
When this object is called, its behavior is
self.__call__(step) == base_lr_class.__call__(step - offset)
Args:
self: this object.
offset: The offset when computing the learning rate schedule.
**kwargs: Pass through to base learning rate class constructor.
"""
base_lr_class.__init__(self, **kwargs)
self._offset = offset
def offset_learning_rate_call(self, step):
step = tf.cast(step - self._offset, tf.float32)
return base_lr_class.__call__(self, step)
# pylint: enable=protected-access,pointless-statement
return type(
new_class_name, (base_lr_class,), {
"base_lr_class": base_lr_class,
"__init__": offset_learning_rate_init,
"__call__": offset_learning_rate_call
})
PiecewiseConstantDecayWithOffset = _make_offset_wrapper(
"PiecewiseConstantDecayWithOffset",
tf.keras.optimizers.schedules.PiecewiseConstantDecay)
PolynomialDecayWithOffset = _make_offset_wrapper(
"PolynomialDecayWithOffset", tf.keras.optimizers.schedules.PolynomialDecay)
ExponentialDecayWithOffset = _make_offset_wrapper(
"ExponentialDecayWithOffset",
tf.keras.optimizers.schedules.ExponentialDecay)
CosineDecayWithOffset = _make_offset_wrapper("CosineDecayWithOffset",
tf.keras.experimental.CosineDecay)
class LinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Linear warmup schedule."""
......
......@@ -70,5 +70,40 @@ class PowerAndLinearDecayTest(tf.test.TestCase, parameterized.TestCase):
self.assertAlmostEqual(lr(step).numpy(), value)
class OffsetLearningRateTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(
dict(class_name=lr_schedule.PiecewiseConstantDecayWithOffset),
dict(class_name=lr_schedule.PolynomialDecayWithOffset),
dict(class_name=lr_schedule.ExponentialDecayWithOffset),
dict(class_name=lr_schedule.CosineDecayWithOffset),
)
def test_generated_docstring(self, class_name):
self.assertNotEmpty(class_name.__init__.__doc__)
@parameterized.parameters(
dict(
class_name=lr_schedule.PiecewiseConstantDecayWithOffset,
kwarg=dict(boundaries=[50, 80], values=[1.0, 0.5, 0.1])),
dict(
class_name=lr_schedule.PolynomialDecayWithOffset,
kwarg=dict(initial_learning_rate=1.0, decay_steps=100)),
dict(
class_name=lr_schedule.ExponentialDecayWithOffset,
kwarg=dict(
initial_learning_rate=1.0, decay_steps=100, decay_rate=0.5)),
dict(
class_name=lr_schedule.CosineDecayWithOffset,
kwarg=dict(initial_learning_rate=1.0, decay_steps=100)),
)
def test_offset(self, class_name, kwarg):
offset = 10
offset_lr = class_name(offset=offset, **kwarg)
base_lr = class_name.base_lr_class(**kwarg)
self.assertIsInstance(offset_lr, class_name)
for step in range(10, 101, 10):
self.assertEqual(offset_lr(step), base_lr(step - offset))
if __name__ == '__main__':
tf.test.main()
......@@ -38,10 +38,10 @@ OPTIMIZERS_CLS = {
}
LR_CLS = {
'stepwise': tf.keras.optimizers.schedules.PiecewiseConstantDecay,
'polynomial': tf.keras.optimizers.schedules.PolynomialDecay,
'exponential': tf.keras.optimizers.schedules.ExponentialDecay,
'cosine': tf.keras.experimental.CosineDecay,
'stepwise': lr_schedule.PiecewiseConstantDecayWithOffset,
'polynomial': lr_schedule.PolynomialDecayWithOffset,
'exponential': lr_schedule.ExponentialDecayWithOffset,
'cosine': lr_schedule.CosineDecayWithOffset,
'power': lr_schedule.DirectPowerDecay,
'power_linear': lr_schedule.PowerAndLinearDecay,
'power_with_offset': lr_schedule.PowerDecayWithOffset,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment