Commit 23c87aaa authored by Abdullah Rashwan's avatar Abdullah Rashwan Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 314838199
parent 9b219f04
...@@ -100,6 +100,28 @@ class PolynomialLrConfig(base_config.Config): ...@@ -100,6 +100,28 @@ class PolynomialLrConfig(base_config.Config):
cycle: bool = False cycle: bool = False
@dataclasses.dataclass
class CosineLrConfig(base_config.Config):
"""Configuration for Cosine learning rate decay.
This class is a containers for the cosine learning rate decay configs,
tf.keras.experimental.CosineDecay.
Attributes:
name: The name of the learning rate schedule. Defaults to CosineDecay.
initial_learning_rate: A float. The initial learning rate. Defaults to
None.
decay_steps: A positive integer that is used for decay computation.
Defaults to None.
alpha: A float. Minimum learning rate value as a fraction of
initial_learning_rate.
"""
name: str = 'CosineDecay'
initial_learning_rate: Optional[float] = None
decay_steps: Optional[int] = None
alpha: float = 0.0
@dataclasses.dataclass @dataclasses.dataclass
class LinearWarmupConfig(base_config.Config): class LinearWarmupConfig(base_config.Config):
"""Configuration for linear warmup schedule config. """Configuration for linear warmup schedule config.
...@@ -118,8 +140,23 @@ class LinearWarmupConfig(base_config.Config): ...@@ -118,8 +140,23 @@ class LinearWarmupConfig(base_config.Config):
warmup_learning_rate: Initial learning rate for the warmup. Defaults to 0. warmup_learning_rate: Initial learning rate for the warmup. Defaults to 0.
warmup_steps: Warmup steps. Defaults to None. warmup_steps: Warmup steps. Defaults to None.
""" """
name: str = 'LinearWarmup' name: str = 'linear'
warmup_learning_rate: float = 0 warmup_learning_rate: float = 0
warmup_steps: Optional[int] = None warmup_steps: Optional[int] = None
@dataclasses.dataclass
class PolinomialWarmupConfig(base_config.Config):
"""Configuration for linear warmup schedule config.
This class is a container for the polinomial warmup schedule configs.
Attributes:
name: The name of warmup schedule. Defaults to Polinomial.
power: Polinomial power. Defaults to 1.
warmup_steps: Warmup steps. Defaults to None.
"""
name: str = 'polinomial'
power: float = 1
warmup_steps: Optional[int] = None
...@@ -56,11 +56,13 @@ class LrConfig(oneof.OneOfConfig): ...@@ -56,11 +56,13 @@ class LrConfig(oneof.OneOfConfig):
stepwise: stepwise learning rate config. stepwise: stepwise learning rate config.
exponential: exponential learning rate config. exponential: exponential learning rate config.
polynomial: polynomial learning rate config. polynomial: polynomial learning rate config.
cosine: cosine learning rate config.
""" """
type: Optional[str] = None type: Optional[str] = None
stepwise: lr_cfg.StepwiseLrConfig = lr_cfg.StepwiseLrConfig() stepwise: lr_cfg.StepwiseLrConfig = lr_cfg.StepwiseLrConfig()
exponential: lr_cfg.ExponentialLrConfig = lr_cfg.ExponentialLrConfig() exponential: lr_cfg.ExponentialLrConfig = lr_cfg.ExponentialLrConfig()
polynomial: lr_cfg.PolynomialLrConfig = lr_cfg.PolynomialLrConfig() polynomial: lr_cfg.PolynomialLrConfig = lr_cfg.PolynomialLrConfig()
cosine: lr_cfg.CosineLrConfig = lr_cfg.CosineLrConfig()
@dataclasses.dataclass @dataclasses.dataclass
...@@ -70,9 +72,11 @@ class WarmupConfig(oneof.OneOfConfig): ...@@ -70,9 +72,11 @@ class WarmupConfig(oneof.OneOfConfig):
Attributes: Attributes:
type: 'str', type of warmup schedule to be used, on the of fields below. type: 'str', type of warmup schedule to be used, on the of fields below.
linear: linear warmup config. linear: linear warmup config.
polynomial: polynomial warmup config.
""" """
type: Optional[str] = None type: Optional[str] = None
linear: lr_cfg.LinearWarmupConfig = lr_cfg.LinearWarmupConfig() linear: lr_cfg.LinearWarmupConfig = lr_cfg.LinearWarmupConfig()
polynomial: lr_cfg.PolinomialWarmupConfig = lr_cfg.PolinomialWarmupConfig()
@dataclasses.dataclass @dataclasses.dataclass
......
...@@ -80,15 +80,76 @@ class LinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule): ...@@ -80,15 +80,76 @@ class LinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
def get_config(self) -> Mapping[str, Any]: def get_config(self) -> Mapping[str, Any]:
if isinstance(self._after_warmup_lr_sched, if isinstance(self._after_warmup_lr_sched,
tf.keras.optimizers.schedules.LearningRateSchedule): tf.keras.optimizers.schedules.LearningRateSchedule):
name = "{!s}WithWarmup".format(self._after_warmup_lr_sched.name) # pytype: disable=attribute-error config = {
config = self._after_warmup_lr_sched.get_config() # pytype: disable=attribute-error "after_warmup_lr_sched": self._after_warmup_lr_sched.get_config()} # pytype: disable=attribute-error
else: else:
name = "ConstantWithWarmup" config = {"after_warmup_lr_sched": self._after_warmup_lr_sched} # pytype: disable=attribute-error
config = {"learning_rate": self._after_warmup_lr_sched}
config.update({ config.update({
"warmup_steps": self._warmup_steps, "warmup_steps": self._warmup_steps,
"warmup_learning_rate": self._init_warmup_lr, "warmup_learning_rate": self._init_warmup_lr,
"name": name "name": self._name
})
return config
class PolynomialWarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Applies polynomial warmup schedule on a given learning rate decay schedule.
"""
def __init__(self,
after_warmup_lr_sched: Union[
tf.keras.optimizers.schedules.LearningRateSchedule, float],
warmup_steps: int,
power: float = 1.0,
name: str = "PolynomialWarmup"):
super(PolynomialWarmUp, self).__init__()
if isinstance(after_warmup_lr_sched,
tf.keras.optimizers.schedules.LearningRateSchedule):
self._initial_learning_rate = after_warmup_lr_sched(warmup_steps)
else:
self._initial_learning_rate = tf.cast(
after_warmup_lr_sched, dtype=tf.float32)
self._warmup_steps = warmup_steps
self._power = power
self._after_warmup_lr_sched = after_warmup_lr_sched
self._name = name
def __call__(self, step):
with tf.name_scope(self._name or "PolynomialWarmUp") as name:
# Implements polynomial warmup. i.e., if global_step < warmup_steps, the
# learning rate will be `global_step/num_warmup_steps * init_lr`.
global_step_float = tf.cast(step, tf.float32)
warmup_steps_float = tf.cast(self._warmup_steps, tf.float32)
warmup_percent_done = global_step_float / warmup_steps_float
warmup_learning_rate = (
self._initial_learning_rate *
tf.math.pow(warmup_percent_done, self._power))
if isinstance(self._after_warmup_lr_sched,
tf.keras.optimizers.schedules.LearningRateSchedule):
after_warmup_lr = self._after_warmup_lr_sched(step)
else:
after_warmup_lr = tf.cast(self._after_warmup_lr_sched, dtype=tf.float32)
return tf.cond(
global_step_float < warmup_steps_float,
lambda: warmup_learning_rate,
lambda: after_warmup_lr,
name=name)
def get_config(self) -> Mapping[str, Any]:
if isinstance(self._after_warmup_lr_sched,
tf.keras.optimizers.schedules.LearningRateSchedule):
config = {
"after_warmup_lr_sched": self._after_warmup_lr_sched.get_config()} # pytype: disable=attribute-error
else:
config = {"after_warmup_lr_sched": self._after_warmup_lr_sched} # pytype: disable=attribute-error
config.update({
"warmup_steps": self._warmup_setps,
"power": self._power,
"name": self._name
}) })
return config return config
...@@ -36,10 +36,12 @@ LR_CLS = { ...@@ -36,10 +36,12 @@ LR_CLS = {
'stepwise': tf.keras.optimizers.schedules.PiecewiseConstantDecay, 'stepwise': tf.keras.optimizers.schedules.PiecewiseConstantDecay,
'polynomial': tf.keras.optimizers.schedules.PolynomialDecay, 'polynomial': tf.keras.optimizers.schedules.PolynomialDecay,
'exponential': tf.keras.optimizers.schedules.ExponentialDecay, 'exponential': tf.keras.optimizers.schedules.ExponentialDecay,
'cosine': tf.keras.experimental.CosineDecay
} }
WARMUP_CLS = { WARMUP_CLS = {
'linear': lr_schedule.LinearWarmup 'linear': lr_schedule.LinearWarmup,
'polynomial': lr_schedule.PolynomialWarmUp
} }
...@@ -108,6 +110,7 @@ class OptimizerFactory(object): ...@@ -108,6 +110,7 @@ class OptimizerFactory(object):
returned. returned.
""" """
# TODO(arashwan): Explore if we want to only allow explicit const lr sched.
if not self._lr_config: if not self._lr_config:
lr = self._optimizer_config.learning_rate lr = self._optimizer_config.learning_rate
else: else:
......
...@@ -209,6 +209,32 @@ class OptimizerFactoryTest(tf.test.TestCase): ...@@ -209,6 +209,32 @@ class OptimizerFactoryTest(tf.test.TestCase):
for step, value in expected_lr_step_values: for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value) self.assertAlmostEqual(lr(step).numpy(), value)
def test_cosine_lr_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {'learning_rate': 0.1, 'momentum': 0.9}
},
'learning_rate': {
'type': 'cosine',
'cosine': {
'initial_learning_rate': 0.1,
'decay_steps': 1000
}
}
}
expected_lr_step_values = [[0, 0.1],
[250, 0.08535534],
[500, 0.04999999],
[750, 0.01464466],
[1000, 0]]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
def test_constant_lr_with_warmup_schedule(self): def test_constant_lr_with_warmup_schedule(self):
params = { params = {
'optimizer': { 'optimizer': {
...@@ -233,6 +259,38 @@ class OptimizerFactoryTest(tf.test.TestCase): ...@@ -233,6 +259,38 @@ class OptimizerFactoryTest(tf.test.TestCase):
for step, value in expected_lr_step_values: for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value) self.assertAlmostEqual(lr(step).numpy(), value)
def test_stepwise_lr_with_polinomial_warmup_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {'learning_rate': 0.1, 'momentum': 0.9}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {'boundaries': [10000, 20000],
'values': [0.1, 0.01, 0.001]}
},
'warmup': {
'type': 'polynomial',
'polynomial': {'warmup_steps': 500, 'power': 2.}
}
}
expected_lr_step_values = [
[0, 0.0],
[250, 0.025],
[500, 0.1],
[5500, 0.1],
[10000, 0.1],
[10001, 0.01],
[20000, 0.01],
[20001, 0.001]
]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment