Commit 1ebff962 authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Add new lr schedule: lr * (step)^power

PiperOrigin-RevId: 328677732
parent ed38358f
...@@ -130,6 +130,22 @@ class CosineLrConfig(base_config.Config): ...@@ -130,6 +130,22 @@ class CosineLrConfig(base_config.Config):
alpha: float = 0.0 alpha: float = 0.0
@dataclasses.dataclass
class DirectPowerLrConfig(base_config.Config):
"""Configuration for DirectPower learning rate decay.
This class configures a schedule following follows lr * (step)^power.
Attributes:
name: The name of the learning rate schedule. Defaults to DirectPowerDecay.
initial_learning_rate: A float. The initial learning rate. Defaults to None.
power: A float. Defaults to -0.5, for sqrt decay.
"""
name: str = 'DirectPowerDecay'
initial_learning_rate: Optional[float] = None
power: float = -0.5
@dataclasses.dataclass @dataclasses.dataclass
class LinearWarmupConfig(base_config.Config): class LinearWarmupConfig(base_config.Config):
"""Configuration for linear warmup schedule config. """Configuration for linear warmup schedule config.
......
...@@ -60,6 +60,7 @@ class LrConfig(oneof.OneOfConfig): ...@@ -60,6 +60,7 @@ class LrConfig(oneof.OneOfConfig):
exponential: exponential learning rate config. exponential: exponential learning rate config.
polynomial: polynomial learning rate config. polynomial: polynomial learning rate config.
cosine: cosine learning rate config. cosine: cosine learning rate config.
power: step^power learning rate config.
""" """
type: Optional[str] = None type: Optional[str] = None
constant: lr_cfg.ConstantLrConfig = lr_cfg.ConstantLrConfig() constant: lr_cfg.ConstantLrConfig = lr_cfg.ConstantLrConfig()
...@@ -67,6 +68,7 @@ class LrConfig(oneof.OneOfConfig): ...@@ -67,6 +68,7 @@ class LrConfig(oneof.OneOfConfig):
exponential: lr_cfg.ExponentialLrConfig = lr_cfg.ExponentialLrConfig() exponential: lr_cfg.ExponentialLrConfig = lr_cfg.ExponentialLrConfig()
polynomial: lr_cfg.PolynomialLrConfig = lr_cfg.PolynomialLrConfig() polynomial: lr_cfg.PolynomialLrConfig = lr_cfg.PolynomialLrConfig()
cosine: lr_cfg.CosineLrConfig = lr_cfg.CosineLrConfig() cosine: lr_cfg.CosineLrConfig = lr_cfg.CosineLrConfig()
power: lr_cfg.DirectPowerLrConfig = lr_cfg.DirectPowerLrConfig()
@dataclasses.dataclass @dataclasses.dataclass
......
...@@ -153,3 +153,38 @@ class PolynomialWarmUp(tf.keras.optimizers.schedules.LearningRateSchedule): ...@@ -153,3 +153,38 @@ class PolynomialWarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
"name": self._name "name": self._name
}) })
return config return config
class DirectPowerDecay(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Learning rate schedule follows lr * (step)^power."""
def __init__(self,
initial_learning_rate: float,
power: float = 1.0,
name: str = "DirectPowerDecay"):
"""Initialize configuration of the learning rate schedule.
Args:
initial_learning_rate: A float, the initial learning rate.
power: A float, the number of steps required for linear warmup.
name: Optional, name of warmup schedule.
"""
super(DirectPowerDecay, self).__init__()
self._initial_learning_rate = initial_learning_rate
self._power = power
self._name = name
def __call__(self, step):
with tf.name_scope(self._name or "DirectPowerDecay"):
step = tf.cast(step, tf.float32)
learning_rate = self._initial_learning_rate
learning_rate *= tf.math.pow(step, self._power)
return learning_rate
def get_config(self):
"""Get the configuration of the learning rate schedule."""
return {
"initial_learning_rate": self._initial_learning_rate,
"power": self._power,
"name": self._name,
}
...@@ -36,7 +36,8 @@ LR_CLS = { ...@@ -36,7 +36,8 @@ LR_CLS = {
'stepwise': tf.keras.optimizers.schedules.PiecewiseConstantDecay, 'stepwise': tf.keras.optimizers.schedules.PiecewiseConstantDecay,
'polynomial': tf.keras.optimizers.schedules.PolynomialDecay, 'polynomial': tf.keras.optimizers.schedules.PolynomialDecay,
'exponential': tf.keras.optimizers.schedules.ExponentialDecay, 'exponential': tf.keras.optimizers.schedules.ExponentialDecay,
'cosine': tf.keras.experimental.CosineDecay 'cosine': tf.keras.experimental.CosineDecay,
'power': lr_schedule.DirectPowerDecay,
} }
WARMUP_CLS = { WARMUP_CLS = {
......
...@@ -274,6 +274,30 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): ...@@ -274,6 +274,30 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
for step, value in expected_lr_step_values: for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value) self.assertAlmostEqual(lr(step).numpy(), value)
def test_power_lr_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'power',
'power': {
'initial_learning_rate': 1.0,
'power': -1.0
}
}
}
expected_lr_step_values = [[1, 1.0], [250, 1. / 250.]]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment