Adds the offset argument to the supported learning rate.

PiperOrigin-RevId: 381301573

Adds the offset argument to the supported learning rate.
PiperOrigin-RevId: 381301573
5c96ad96 · Yeqing Li · A. Unique TensorFlower · af924a4c · 5c96ad96 · 5c96ad96
Commit 5c96ad96 authored Jun 24, 2021 by Yeqing Li Committed by A. Unique TensorFlower Jun 24, 2021
4 changed files
--- a/official/modeling/optimization/configs/learning_rate_config.py
+++ b/official/modeling/optimization/configs/learning_rate_config.py
@@ -56,10 +56,12 @@ class StepwiseLrConfig(base_config.Config):
              values[0] [boundaries[0], boundaries[1]]     -> values[1]
              [boundaries[n-1], boundaries[n]]   -> values[n] [boundaries[n],
              end]               -> values[n+1] Defaults to None.
+    offset: An int. The offset applied to steps. Defaults to 0.
  """
  name: str = 'PiecewiseConstantDecay'
  boundaries: Optional[List[int]] = None
  values: Optional[List[float]] = None
+  offset: int = 0
 @dataclasses.dataclass
@@ -76,12 +78,14 @@ class ExponentialLrConfig(base_config.Config):
    decay_rate: A float. Defaults to None.
    staircase: A boolean, if true, learning rate is decreased at discreate
      intervals. Defaults to False.
+    offset: An int. The offset applied to steps. Defaults to 0.
  """
  name: str = 'ExponentialDecay'
  initial_learning_rate: Optional[float] = None
  decay_steps: Optional[int] = None
  decay_rate: Optional[float] = None
  staircase: Optional[bool] = None
+  offset: int = 0
 @dataclasses.dataclass
@@ -99,6 +103,7 @@ class PolynomialLrConfig(base_config.Config):
    power: A float.  The power of the polynomial. Defaults to linear, 1.0.
    cycle: A boolean, whether or not it should cycle beyond decay_steps.
      Defaults to False.
+    offset: An int. The offset applied to steps. Defaults to 0.
  """
  name: str = 'PolynomialDecay'
  initial_learning_rate: Optional[float] = None
@@ -106,6 +111,7 @@ class PolynomialLrConfig(base_config.Config):
  end_learning_rate: float = 0.0001
  power: float = 1.0
  cycle: bool = False
+  offset: int = 0
 @dataclasses.dataclass
@@ -122,11 +128,13 @@ class CosineLrConfig(base_config.Config):
      to None.
    alpha: A float.  Minimum learning rate value as a fraction of
      initial_learning_rate.
+    offset: An int. The offset applied to steps. Defaults to 0.
  """
  name: str = 'CosineDecay'
  initial_learning_rate: Optional[float] = None
  decay_steps: Optional[int] = None
  alpha: float = 0.0
+  offset: int = 0
 @dataclasses.dataclass

--- a/official/modeling/optimization/lr_schedule.py
+++ b/official/modeling/optimization/lr_schedule.py
@@ -19,6 +19,75 @@ from typing import Mapping, Any, Union, Optional
 import tensorflow as tf
+def _make_offset_wrapper(new_class_name: str, base_lr_class):
+  """Generates a offset wrapper of learning rate schedule.
+  It will returns a subclass of the the `base_lr_class`, the subclass takes an
+  `offset` argument in the constructor. When the new class instance is called,
+  the behavior is:
+    new_class_object(step) = base_lr_class_object(step - offset)
+  Example:
+    CosineDecayWithOffset = _make_offset_wrapper(
+                     'CosineDecayWithOffset', tf.keras.experimental.CosineDecay)
+    # Use the lr:
+    lr = CosineDecayWithOffset(offset=100, initial_learning_rate=0.1,
+                               decay_steps=1000)
+    lr(101) # equals to tf.keras.experimental.CosineDecay(...)(101-100)
+  Args:
+    new_class_name: the name of the new class.
+    base_lr_class: the base learning rate schedule class. Should be subclass of
+      tf.keras.optimizers.schedules.LearningRateSchedule
+  Returns:
+    A new class (subclass of the base_lr_class) that can take an offset.
+  """
+  assert issubclass(base_lr_class,
+                    tf.keras.optimizers.schedules.LearningRateSchedule), (
+                        "base_lr_class should be subclass of keras "
+                        f"LearningRateSchedule, got {base_lr_class}")
+  # pylint: disable=protected-access,pointless-statement
+  def offset_learning_rate_init(self, offset=0, **kwargs):
+    """Construct learning rate schedule object.
+    When this object is called, its behavior is
+       self.__call__(step) == base_lr_class.__call__(step - offset)
+    Args:
+      self: this object.
+      offset: The offset when computing the learning rate schedule.
+      **kwargs: Pass through to base learning rate class constructor.
+    """
+    base_lr_class.__init__(self, **kwargs)
+    self._offset = offset
+  def offset_learning_rate_call(self, step):
+    step = tf.cast(step - self._offset, tf.float32)
+    return base_lr_class.__call__(self, step)
+  # pylint: enable=protected-access,pointless-statement
+  return type(
+      new_class_name, (base_lr_class,), {
+          "base_lr_class": base_lr_class,
+          "__init__": offset_learning_rate_init,
+          "__call__": offset_learning_rate_call
+      })
+PiecewiseConstantDecayWithOffset = _make_offset_wrapper(
+    "PiecewiseConstantDecayWithOffset",
+    tf.keras.optimizers.schedules.PiecewiseConstantDecay)
+PolynomialDecayWithOffset = _make_offset_wrapper(
+    "PolynomialDecayWithOffset", tf.keras.optimizers.schedules.PolynomialDecay)
+ExponentialDecayWithOffset = _make_offset_wrapper(
+    "ExponentialDecayWithOffset",
+    tf.keras.optimizers.schedules.ExponentialDecay)
+CosineDecayWithOffset = _make_offset_wrapper("CosineDecayWithOffset",
+                                             tf.keras.experimental.CosineDecay)
 class LinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
  """Linear warmup schedule."""

--- a/official/modeling/optimization/lr_schedule_test.py
+++ b/official/modeling/optimization/lr_schedule_test.py
@@ -70,5 +70,40 @@ class PowerAndLinearDecayTest(tf.test.TestCase, parameterized.TestCase):
      self.assertAlmostEqual(lr(step).numpy(), value)
+class OffsetLearningRateTest(tf.test.TestCase, parameterized.TestCase):
+  @parameterized.parameters(
+      dict(class_name=lr_schedule.PiecewiseConstantDecayWithOffset),
+      dict(class_name=lr_schedule.PolynomialDecayWithOffset),
+      dict(class_name=lr_schedule.ExponentialDecayWithOffset),
+      dict(class_name=lr_schedule.CosineDecayWithOffset),
+  )
+  def test_generated_docstring(self, class_name):
+    self.assertNotEmpty(class_name.__init__.__doc__)
+  @parameterized.parameters(
+      dict(
+          class_name=lr_schedule.PiecewiseConstantDecayWithOffset,
+          kwarg=dict(boundaries=[50, 80], values=[1.0, 0.5, 0.1])),
+      dict(
+          class_name=lr_schedule.PolynomialDecayWithOffset,
+          kwarg=dict(initial_learning_rate=1.0, decay_steps=100)),
+      dict(
+          class_name=lr_schedule.ExponentialDecayWithOffset,
+          kwarg=dict(
+              initial_learning_rate=1.0, decay_steps=100, decay_rate=0.5)),
+      dict(
+          class_name=lr_schedule.CosineDecayWithOffset,
+          kwarg=dict(initial_learning_rate=1.0, decay_steps=100)),
+  )
+  def test_offset(self, class_name, kwarg):
+    offset = 10
+    offset_lr = class_name(offset=offset, **kwarg)
+    base_lr = class_name.base_lr_class(**kwarg)
+    self.assertIsInstance(offset_lr, class_name)
+    for step in range(10, 101, 10):
+      self.assertEqual(offset_lr(step), base_lr(step - offset))
 if __name__ == '__main__':
  tf.test.main()
--- a/official/modeling/optimization/optimizer_factory.py
+++ b/official/modeling/optimization/optimizer_factory.py
@@ -38,10 +38,10 @@ OPTIMIZERS_CLS = {
 }
 LR_CLS = {
-    'stepwise': tf.keras.optimizers.schedules.PiecewiseConstantDecay,
+    'stepwise': lr_schedule.PiecewiseConstantDecayWithOffset,
-    'polynomial': tf.keras.optimizers.schedules.PolynomialDecay,
+    'polynomial': lr_schedule.PolynomialDecayWithOffset,
-    'exponential': tf.keras.optimizers.schedules.ExponentialDecay,
+    'exponential': lr_schedule.ExponentialDecayWithOffset,
-    'cosine': tf.keras.experimental.CosineDecay,
+    'cosine': lr_schedule.CosineDecayWithOffset,
    'power': lr_schedule.DirectPowerDecay,
    'power_linear': lr_schedule.PowerAndLinearDecay,
    'power_with_offset': lr_schedule.PowerDecayWithOffset,