Internal change

PiperOrigin-RevId: 314838199

Internal change
PiperOrigin-RevId: 314838199
23c87aaa · Abdullah Rashwan · A. Unique TensorFlower · 9b219f04 · 23c87aaa · 23c87aaa
Commit 23c87aaa authored Jun 04, 2020 by Abdullah Rashwan Committed by A. Unique TensorFlower Jun 04, 2020
5 changed files
--- a/official/modeling/optimization/configs/learning_rate_config.py
+++ b/official/modeling/optimization/configs/learning_rate_config.py
@@ -100,6 +100,28 @@ class PolynomialLrConfig(base_config.Config):
  cycle: bool = False


+@dataclasses.dataclass
+class CosineLrConfig(base_config.Config):
+  """Configuration for Cosine learning rate decay.
+
+  This class is a containers for the cosine learning rate decay configs,
+  tf.keras.experimental.CosineDecay.
+
+  Attributes:
+    name: The name of the learning rate schedule. Defaults to CosineDecay.
+    initial_learning_rate: A float. The initial learning rate. Defaults to
+                           None.
+    decay_steps: A positive integer that is used for decay computation.
+                 Defaults to None.
+    alpha: A float.  Minimum learning rate value as a fraction of
+                     initial_learning_rate.
+  """
+  name: str = 'CosineDecay'
+  initial_learning_rate: Optional[float] = None
+  decay_steps: Optional[int] = None
+  alpha: float = 0.0
+
+
 @dataclasses.dataclass
 class LinearWarmupConfig(base_config.Config):
  """Configuration for linear warmup schedule config.
@@ -118,8 +140,23 @@ class LinearWarmupConfig(base_config.Config):
    warmup_learning_rate: Initial learning rate for the warmup. Defaults to 0.
    warmup_steps: Warmup steps. Defaults to None.
  """
-  name: str = 'LinearWarmup'
+  name: str = 'linear'
  warmup_learning_rate: float = 0
  warmup_steps: Optional[int] = None


+@dataclasses.dataclass
+class PolinomialWarmupConfig(base_config.Config):
+  """Configuration for linear warmup schedule config.
+
+  This class is a container for the polinomial warmup schedule configs.
+
+  Attributes:
+    name: The name of warmup schedule. Defaults to Polinomial.
+    power: Polinomial power. Defaults to 1.
+    warmup_steps: Warmup steps. Defaults to None.
+  """
+  name: str = 'polinomial'
+  power: float = 1
+  warmup_steps: Optional[int] = None
+
--- a/official/modeling/optimization/configs/optimization_config.py
+++ b/official/modeling/optimization/configs/optimization_config.py
@@ -56,11 +56,13 @@ class LrConfig(oneof.OneOfConfig):
    stepwise: stepwise learning rate config.
    exponential: exponential learning rate config.
    polynomial: polynomial learning rate config.
+    cosine: cosine learning rate config.
  """
  type: Optional[str] = None
  stepwise: lr_cfg.StepwiseLrConfig = lr_cfg.StepwiseLrConfig()
  exponential: lr_cfg.ExponentialLrConfig = lr_cfg.ExponentialLrConfig()
  polynomial: lr_cfg.PolynomialLrConfig = lr_cfg.PolynomialLrConfig()
+  cosine: lr_cfg.CosineLrConfig = lr_cfg.CosineLrConfig()


 @dataclasses.dataclass
@@ -70,9 +72,11 @@ class WarmupConfig(oneof.OneOfConfig):
  Attributes:
    type: 'str', type of warmup schedule to be used, on the of fields below.
    linear: linear warmup config.
+    polynomial: polynomial warmup config.
  """
  type: Optional[str] = None
  linear: lr_cfg.LinearWarmupConfig = lr_cfg.LinearWarmupConfig()
+  polynomial: lr_cfg.PolinomialWarmupConfig = lr_cfg.PolinomialWarmupConfig()


 @dataclasses.dataclass

--- a/official/modeling/optimization/lr_schedule.py
+++ b/official/modeling/optimization/lr_schedule.py
@@ -80,15 +80,76 @@ class LinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
  def get_config(self) -> Mapping[str, Any]:
    if isinstance(self._after_warmup_lr_sched,
                  tf.keras.optimizers.schedules.LearningRateSchedule):
-      name = "{!s}WithWarmup".format(self._after_warmup_lr_sched.name)  # pytype: disable=attribute-error
-      config = self._after_warmup_lr_sched.get_config()  # pytype: disable=attribute-error
+      config = {
+          "after_warmup_lr_sched": self._after_warmup_lr_sched.get_config()}  # pytype: disable=attribute-error
    else:
-      name = "ConstantWithWarmup"
-      config = {"learning_rate": self._after_warmup_lr_sched}
+      config = {"after_warmup_lr_sched": self._after_warmup_lr_sched}  # pytype: disable=attribute-error

    config.update({
        "warmup_steps": self._warmup_steps,
        "warmup_learning_rate": self._init_warmup_lr,
-        "name": name
+        "name": self._name
+    })
+    return config
+
+
+class PolynomialWarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
+  """Applies polynomial warmup schedule on a given learning rate decay schedule.
+  """
+
+  def __init__(self,
+               after_warmup_lr_sched: Union[
+                   tf.keras.optimizers.schedules.LearningRateSchedule, float],
+               warmup_steps: int,
+               power: float = 1.0,
+               name: str = "PolynomialWarmup"):
+    super(PolynomialWarmUp, self).__init__()
+    if isinstance(after_warmup_lr_sched,
+                  tf.keras.optimizers.schedules.LearningRateSchedule):
+      self._initial_learning_rate = after_warmup_lr_sched(warmup_steps)
+    else:
+      self._initial_learning_rate = tf.cast(
+          after_warmup_lr_sched, dtype=tf.float32)
+
+    self._warmup_steps = warmup_steps
+    self._power = power
+    self._after_warmup_lr_sched = after_warmup_lr_sched
+    self._name = name
+
+  def __call__(self, step):
+    with tf.name_scope(self._name or "PolynomialWarmUp") as name:
+      # Implements polynomial warmup. i.e., if global_step < warmup_steps, the
+      # learning rate will be `global_step/num_warmup_steps * init_lr`.
+      global_step_float = tf.cast(step, tf.float32)
+      warmup_steps_float = tf.cast(self._warmup_steps, tf.float32)
+      warmup_percent_done = global_step_float / warmup_steps_float
+      warmup_learning_rate = (
+          self._initial_learning_rate *
+          tf.math.pow(warmup_percent_done, self._power))
+
+      if isinstance(self._after_warmup_lr_sched,
+                    tf.keras.optimizers.schedules.LearningRateSchedule):
+        after_warmup_lr = self._after_warmup_lr_sched(step)
+      else:
+        after_warmup_lr = tf.cast(self._after_warmup_lr_sched, dtype=tf.float32)
+
+      return tf.cond(
+          global_step_float < warmup_steps_float,
+          lambda: warmup_learning_rate,
+          lambda: after_warmup_lr,
+          name=name)
+
+  def get_config(self) -> Mapping[str, Any]:
+    if isinstance(self._after_warmup_lr_sched,
+                  tf.keras.optimizers.schedules.LearningRateSchedule):
+      config = {
+          "after_warmup_lr_sched": self._after_warmup_lr_sched.get_config()}  # pytype: disable=attribute-error
+    else:
+      config = {"after_warmup_lr_sched": self._after_warmup_lr_sched}  # pytype: disable=attribute-error
+
+    config.update({
+        "warmup_steps": self._warmup_setps,
+        "power": self._power,
+        "name": self._name
    })
    return config
--- a/official/modeling/optimization/optimizer_factory.py
+++ b/official/modeling/optimization/optimizer_factory.py
@@ -36,10 +36,12 @@ LR_CLS = {
    'stepwise': tf.keras.optimizers.schedules.PiecewiseConstantDecay,
    'polynomial': tf.keras.optimizers.schedules.PolynomialDecay,
    'exponential': tf.keras.optimizers.schedules.ExponentialDecay,
+    'cosine': tf.keras.experimental.CosineDecay
 }

 WARMUP_CLS = {
-    'linear': lr_schedule.LinearWarmup
+    'linear': lr_schedule.LinearWarmup,
+    'polynomial': lr_schedule.PolynomialWarmUp
 }


@@ -108,6 +110,7 @@ class OptimizerFactory(object):
      returned.
    """

+    # TODO(arashwan): Explore if we want to only allow explicit const lr sched.
    if not self._lr_config:
      lr = self._optimizer_config.learning_rate
    else:

--- a/official/modeling/optimization/optimizer_factory_test.py
+++ b/official/modeling/optimization/optimizer_factory_test.py
@@ -209,6 +209,32 @@ class OptimizerFactoryTest(tf.test.TestCase):
    for step, value in expected_lr_step_values:
      self.assertAlmostEqual(lr(step).numpy(), value)

+  def test_cosine_lr_schedule(self):
+    params = {
+        'optimizer': {
+            'type': 'sgd',
+            'sgd': {'learning_rate': 0.1, 'momentum': 0.9}
+        },
+        'learning_rate': {
+            'type': 'cosine',
+            'cosine': {
+                'initial_learning_rate': 0.1,
+                'decay_steps': 1000
+            }
+        }
+    }
+    expected_lr_step_values = [[0, 0.1],
+                               [250, 0.08535534],
+                               [500, 0.04999999],
+                               [750, 0.01464466],
+                               [1000, 0]]
+    opt_config = optimization_config.OptimizationConfig(params)
+    opt_factory = optimizer_factory.OptimizerFactory(opt_config)
+    lr = opt_factory.build_learning_rate()
+
+    for step, value in expected_lr_step_values:
+      self.assertAlmostEqual(lr(step).numpy(), value)
+
  def test_constant_lr_with_warmup_schedule(self):
    params = {
        'optimizer': {
@@ -233,6 +259,38 @@ class OptimizerFactoryTest(tf.test.TestCase):
    for step, value in expected_lr_step_values:
      self.assertAlmostEqual(lr(step).numpy(), value)

+  def test_stepwise_lr_with_polinomial_warmup_schedule(self):
+    params = {
+        'optimizer': {
+            'type': 'sgd',
+            'sgd': {'learning_rate': 0.1, 'momentum': 0.9}
+        },
+        'learning_rate': {
+            'type': 'stepwise',
+            'stepwise': {'boundaries': [10000, 20000],
+                         'values': [0.1, 0.01, 0.001]}
+        },
+        'warmup': {
+            'type': 'polynomial',
+            'polynomial': {'warmup_steps': 500, 'power': 2.}
+        }
+    }
+    expected_lr_step_values = [
+        [0, 0.0],
+        [250, 0.025],
+        [500, 0.1],
+        [5500, 0.1],
+        [10000, 0.1],
+        [10001, 0.01],
+        [20000, 0.01],
+        [20001, 0.001]
+    ]
+    opt_config = optimization_config.OptimizationConfig(params)
+    opt_factory = optimizer_factory.OptimizerFactory(opt_config)
+    lr = opt_factory.build_learning_rate()
+
+    for step, value in expected_lr_step_values:
+      self.assertAlmostEqual(lr(step).numpy(), value)

 if __name__ == '__main__':
  tf.test.main()