Commit 87ec3d2a authored by Abdullah Rashwan's avatar Abdullah Rashwan Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 314002442
parent 4ce55184
"""Optimization package definition."""
# pylint: disable=wildcard-import
from official.modeling.optimization.configs.learning_rate_config import *
from official.modeling.optimization.configs.optimization_config import *
from official.modeling.optimization.configs.optimizer_config import *
from official.modeling.optimization.optimizer_factory import OptimizerFactory
# Lint as: python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Dataclasses for learning rate schedule config."""
from typing import List, Optional
import dataclasses
from official.modeling.hyperparams import base_config
@dataclasses.dataclass
class StepwiseLrConfig(base_config.Config):
"""Configuration for stepwise learning rate decay.
This class is a container for the piecewise constant learning rate scheduling
configs. It will configure an instance of PiecewiseConstantDecay keras
learning rate schedule.
An example (from keras docs): use a learning rate that's 1.0 for the first
100001 steps, 0.5 for the next 10000 steps, and 0.1 for any additional steps.
```python
boundaries: [100000, 110000]
values: [1.0, 0.5, 0.1]
Attributes:
name: The name of the learning rate schedule. Defaults to PiecewiseConstant.
boundaries: A list of ints of strictly increasing entries.
Defaults to None.
values: A list of floats that specifies the values for the intervals defined
by `boundaries`. It should have one more element than `boundaries`.
The learning rate is computed as follows:
[0, boundaries[0]] -> values[0]
[boundaries[0], boundaries[1]] -> values[1]
[boundaries[n-1], boundaries[n]] -> values[n]
[boundaries[n], end] -> values[n+1]
Defaults to None.
"""
name: str = 'PiecewiseConstantDecay'
boundaries: Optional[List[int]] = None
values: Optional[List[float]] = None
@dataclasses.dataclass
class ExponentialLrConfig(base_config.Config):
"""Configuration for exponential learning rate decay.
This class is a containers for the exponential learning rate decay configs.
Attributes:
name: The name of the learning rate schedule. Defaults to ExponentialDecay.
initial_learning_rate: A float. The initial learning rate. Defaults to
None.
decay_steps: A positive integer that is used for decay computation.
Defaults to None.
decay_rate: A float. Defaults to None.
staircase: A boolean, if true, learning rate is decreased at discreate
intervals. Defaults to False.
"""
name: str = 'ExponentialDecay'
initial_learning_rate: Optional[float] = None
decay_steps: Optional[int] = None
decay_rate: Optional[float] = None
staircase: Optional[bool] = None
@dataclasses.dataclass
class PolynomialLrConfig(base_config.Config):
"""Configuration for polynomial learning rate decay.
This class is a containers for the polynomial learning rate decay configs.
Attributes:
name: The name of the learning rate schedule. Defaults to PolynomialDecay.
initial_learning_rate: A float. The initial learning rate. Defaults to
None.
decay_steps: A positive integer that is used for decay computation.
Defaults to None.
end_learning_rate: A float. The minimal end learning rate.
power: A float. The power of the polynomial. Defaults to linear, 1.0.
cycle: A boolean, whether or not it should cycle beyond decay_steps.
Defaults to False.
"""
name: str = 'PolynomialDecay'
initial_learning_rate: Optional[float] = None
decay_steps: Optional[int] = None
end_learning_rate: float = 0.0001
power: float = 1.0
cycle: bool = False
@dataclasses.dataclass
class LinearWarmupConfig(base_config.Config):
"""Configuration for linear warmup schedule config.
This class is a container for the linear warmup schedule configs.
Warmup_learning_rate is the initial learning rate, the final learning rate of
the warmup period is the learning_rate of the optimizer in use. The learning
rate at each step linearly increased according to the following formula:
warmup_learning_rate = warmup_learning_rate +
step / warmup_steps * (final_learning_rate - warmup_learning_rate).
Using warmup overrides the learning rate schedule by the number of warmup
steps.
Attributes:
name: The name of warmup schedule. Defaults to linear.
warmup_learning_rate: Initial learning rate for the warmup. Defaults to 0.
warmup_steps: Warmup steps. Defaults to None.
"""
name: str = 'LinearWarmup'
warmup_learning_rate: float = 0
warmup_steps: Optional[int] = None
# Lint as: python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Dataclasses for optimization configs.
This file define the dataclass for optimization configs (OptimizationConfig).
It also has two helper functions get_optimizer_config, and get_lr_config from
an OptimizationConfig class.
"""
from typing import Optional
import dataclasses
from official.modeling.hyperparams import base_config
from official.modeling.hyperparams import oneof
from official.modeling.optimization.configs import learning_rate_config as lr_cfg
from official.modeling.optimization.configs import optimizer_config as opt_cfg
@dataclasses.dataclass
class OptimizerConfig(oneof.OneOfConfig):
"""Configuration for optimizer.
Attributes:
type: 'str', type of optimizer to be used, on the of fields below.
sgd: sgd optimizer config.
adam: adam optimizer config.
adamw: adam with weight decay.
lamb: lamb optimizer.
"""
type: Optional[str] = None
sgd: opt_cfg.SGDConfig = opt_cfg.SGDConfig()
adam: opt_cfg.AdamConfig = opt_cfg.AdamConfig()
adamw: opt_cfg.AdamWeightDecayConfig = opt_cfg.AdamWeightDecayConfig()
lamb: opt_cfg.LAMBConfig = opt_cfg.LAMBConfig()
@dataclasses.dataclass
class LrConfig(oneof.OneOfConfig):
"""Configuration for lr schedule.
Attributes:
type: 'str', type of lr schedule to be used, on the of fields below.
stepwise: stepwise learning rate config.
exponential: exponential learning rate config.
polynomial: polynomial learning rate config.
"""
type: Optional[str] = None
stepwise: lr_cfg.StepwiseLrConfig = lr_cfg.StepwiseLrConfig()
exponential: lr_cfg.ExponentialLrConfig = lr_cfg.ExponentialLrConfig()
polynomial: lr_cfg.PolynomialLrConfig = lr_cfg.PolynomialLrConfig()
@dataclasses.dataclass
class WarmupConfig(oneof.OneOfConfig):
"""Configuration for lr schedule.
Attributes:
type: 'str', type of warmup schedule to be used, on the of fields below.
linear: linear warmup config.
"""
type: Optional[str] = None
linear: lr_cfg.LinearWarmupConfig = lr_cfg.LinearWarmupConfig()
@dataclasses.dataclass
class OptimizationConfig(base_config.Config):
"""Configuration for optimizer and learning rate schedule.
Attributes:
optimizer: optimizer oneof config.
learning_rate: learning rate oneof config.
warmup: warmup oneof config.
"""
optimizer: OptimizerConfig = OptimizerConfig()
learning_rate: LrConfig = LrConfig()
warmup: WarmupConfig = WarmupConfig()
# Lint as: python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for optimization_config.py."""
import tensorflow as tf
from official.modeling.optimization.configs import learning_rate_config as lr_cfg
from official.modeling.optimization.configs import optimization_config
from official.modeling.optimization.configs import optimizer_config as opt_cfg
class OptimizerConfigTest(tf.test.TestCase):
def test_no_optimizer(self):
optimizer = optimization_config.OptimizationConfig({}).optimizer.get()
self.assertEqual(optimizer, None)
def test_no_lr_schedule(self):
lr = optimization_config.OptimizationConfig({}).learning_rate.get()
self.assertEqual(lr, None)
def test_no_warmup_schedule(self):
warmup = optimization_config.OptimizationConfig({}).warmup.get()
self.assertEqual(warmup, None)
def test_config(self):
opt_config = optimization_config.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {} # default config
},
'learning_rate': {
'type': 'polynomial',
'polynomial': {}
},
'warmup': {
'type': 'linear'
}
})
self.assertEqual(opt_config.optimizer.get(),
opt_cfg.SGDConfig())
self.assertEqual(opt_config.learning_rate.get(),
lr_cfg.PolynomialLrConfig())
self.assertEqual(opt_config.warmup.get(),
lr_cfg.LinearWarmupConfig())
if __name__ == '__main__':
tf.test.main()
# Lint as: python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Dataclasses for optimizer configs."""
from typing import List, Optional
import dataclasses
from official.modeling.hyperparams import base_config
@dataclasses.dataclass
class SGDConfig(base_config.Config):
"""Configuration for SGD optimizer.
The attributes for this class matches the arguments of tf.keras.optimizer.SGD.
Attributes:
name: name of the optimizer.
learning_rate: learning_rate for SGD optimizer.
decay: decay rate for SGD optimizer.
nesterov: nesterov for SGD optimizer.
momentum: momentum for SGD optimizer.
"""
name: str = "SGD"
learning_rate: float = 0.01
decay: float = 0.0
nesterov: bool = False
momentum: float = 0.0
@dataclasses.dataclass
class AdamConfig(base_config.Config):
"""Configuration for Adam optimizer.
The attributes for this class matches the arguments of
tf.keras.optimizer.Adam.
Attributes:
name: name of the optimizer.
learning_rate: learning_rate for Adam optimizer.
beta_1: decay rate for 1st order moments.
beta_2: decay rate for 2st order moments.
epsilon: epsilon value used for numerical stability in Adam optimizer.
amsgrad: boolean. Whether to apply AMSGrad variant of this algorithm from
the paper "On the Convergence of Adam and beyond".
"""
name: str = "Adam"
learning_rate: float = 0.001
beta_1: float = 0.9
beta_2: float = 0.999
epsilon: float = 1e-07
amsgrad: bool = False
@dataclasses.dataclass
class AdamWeightDecayConfig(base_config.Config):
"""Configuration for Adam optimizer with weight decay.
Attributes:
name: name of the optimizer.
learning_rate: learning_rate for the optimizer.
beta_1: decay rate for 1st order moments.
beta_2: decay rate for 2st order moments.
epsilon: epsilon value used for numerical stability in the optimizer.
amsgrad: boolean. Whether to apply AMSGrad variant of this algorithm from
the paper "On the Convergence of Adam and beyond".
weight_decay_rate: float. Weight decay rate. Default to 0.
include_in_weight_decay: list[str], or None. List of weight names to include
in weight decay.
include_in_weight_decay: list[str], or None. List of weight names to not
include in weight decay.
"""
name: str = "AdamWeightDecay"
learning_rate: float = 0.001
beta_1: float = 0.9
beta_2: float = 0.999
epsilon: float = 1e-07
amsgrad: bool = False
weight_decay_rate: float = 0.0
include_in_weight_decay: Optional[List[str]] = None
exclude_from_weight_decay: Optional[List[str]] = None
@dataclasses.dataclass
class LAMBConfig(base_config.Config):
"""Configuration for LAMB optimizer.
The attributes for this class matches the arguments of
tensorflow_addons.optimizers.LAMB.
Attributes:
name: name of the optimizer.
learning_rate: learning_rate for Adam optimizer.
beta_1: decay rate for 1st order moments.
beta_2: decay rate for 2st order moments.
epsilon: epsilon value used for numerical stability in LAMB optimizer.
weight_decay_rate: float. Weight decay rate. Default to 0.
exclude_from_weight_decay: List of regex patterns of variables excluded from
weight decay. Variables whose name contain a
substring matching the pattern will be excluded.
exclude_from_layer_adaptation: List of regex patterns of variables excluded
from layer adaptation. Variables whose name
contain a substring matching the pattern will
be excluded.
"""
name: str = "LAMB"
learning_rate: float = 0.001
beta_1: float = 0.9
beta_2: float = 0.999
epsilon: float = 1e-6
weight_decay_rate: float = 0.0
exclude_from_weight_decay: Optional[List[str]] = None
exclude_from_layer_adaptation: Optional[List[str]] = None
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Learning rate schedule classes."""
from typing import Mapping, Any, Union, Optional
import tensorflow as tf
class LinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Linear warmup schedule."""
def __init__(self, after_warmup_lr_sched: Union[
tf.keras.optimizers.schedules.LearningRateSchedule, float],
warmup_steps: int, warmup_learning_rate: float,
name: Optional[str] = None):
"""Add linear warmup schedule to a learning rate schedule.
warmup_lr is the initial learning rate, the final learning rate of the
init_warmup period is the initial learning rate of lr_schedule in use.
The learning rate at each step linearly increased according to the following
formula:
learning_rate = warmup_lr + step / warmup_steps
* (final_warmup_lr - warmup_lr).
Using warmup overrides the learning rate schedule by the number of warmup
steps.
Args:
after_warmup_lr_sched: tf.keras.optimizers.schedules
.LearningRateSchedule or a constant.
warmup_steps: int. number of the warmup steps.
warmup_learning_rate: floating point number. Initial learning rate for the
warmup.
name: Optional, name of warmup schedule.
"""
super(LinearWarmup, self).__init__()
self._name = name
self._after_warmup_lr_sched = after_warmup_lr_sched
self._warmup_steps = warmup_steps
self._init_warmup_lr = warmup_learning_rate
if isinstance(after_warmup_lr_sched,
tf.keras.optimizers.schedules.LearningRateSchedule):
self._final_warmup_lr = after_warmup_lr_sched(warmup_steps)
else:
self._final_warmup_lr = tf.cast(
after_warmup_lr_sched, dtype=tf.float32)
def __call__(self, step: int):
global_step = tf.cast(step, dtype=tf.float32)
linear_warmup_lr = (
self._init_warmup_lr + global_step / self._warmup_steps *
(self._final_warmup_lr - self._init_warmup_lr))
if isinstance(self._after_warmup_lr_sched,
tf.keras.optimizers.schedules.LearningRateSchedule):
after_warmup_lr = self._after_warmup_lr_sched(step)
else:
after_warmup_lr = tf.cast(self._after_warmup_lr_sched, dtype=tf.float32)
lr = tf.cond(global_step < self._warmup_steps,
lambda: linear_warmup_lr,
lambda: after_warmup_lr)
return lr
def get_config(self) -> Mapping[str, Any]:
if isinstance(self._after_warmup_lr_sched,
tf.keras.optimizers.schedules.LearningRateSchedule):
name = "{!s}WithWarmup".format(self._after_warmup_lr_sched.name) # pytype: disable=attribute-error
config = self._after_warmup_lr_sched.get_config() # pytype: disable=attribute-error
else:
name = "ConstantWithWarmup"
config = {"learning_rate": self._after_warmup_lr_sched}
config.update({
"warmup_steps": self._warmup_steps,
"warmup_learning_rate": self._init_warmup_lr,
"name": name
})
return config
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Optimizer factory class."""
from typing import Union
import tensorflow as tf
import tensorflow_addons.optimizers as tfa_optimizers
from official.modeling.optimization import lr_schedule
from official.modeling.optimization.configs import optimization_config as opt_cfg
from official.nlp import optimization as nlp_optimization
OPTIMIZERS_CLS = {
'sgd': tf.keras.optimizers.SGD,
'adam': tf.keras.optimizers.Adam,
'adamw': nlp_optimization.AdamWeightDecay,
'lamb': tfa_optimizers.LAMB
}
LR_CLS = {
'stepwise': tf.keras.optimizers.schedules.PiecewiseConstantDecay,
'polynomial': tf.keras.optimizers.schedules.PolynomialDecay,
'exponential': tf.keras.optimizers.schedules.ExponentialDecay,
}
WARMUP_CLS = {
'linear': lr_schedule.LinearWarmup
}
class OptimizerFactory(object):
"""Optimizer factory class.
This class builds learning rate and optimizer based on an optimization config.
To use this class, you need to do the following:
(1) Define optimization config, this includes optimizer, and learning rate
schedule.
(2) Initialize the class using the optimization config.
(3) Build learning rate.
(4) Build optimizer.
This is a typical example for using this class:
params = {
'optimizer': {
'type': 'sgd',
'sgd': {'learning_rate': 0.1, 'momentum': 0.9}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {'boundaries': [10000, 20000],
'values': [0.1, 0.01, 0.001]}
},
'warmup': {
'type': 'linear',
'linear': {'warmup_steps': 500, 'warmup_learning_rate': 0.01}
}
}
opt_config = OptimizationConfig(params)
opt_factory = OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
optimizer = opt_factory.build_optimizer(lr)
"""
def __init__(self, config: opt_cfg.OptimizationConfig):
"""Initializing OptimizerFactory.
Args:
config: OptimizationConfig instance contain optimization config.
"""
self._config = config
self._optimizer_config = config.optimizer.get()
self._optimizer_type = config.optimizer.type
if self._optimizer_config is None:
raise ValueError('Optimizer type must be specified')
self._lr_config = config.learning_rate.get()
self._lr_type = config.learning_rate.type
self._warmup_config = config.warmup.get()
self._warmup_type = config.warmup.type
def build_learning_rate(self):
"""Build learning rate.
Builds learning rate from config. Learning rate schedule is built according
to the learning rate config. If there is no learning rate config, optimizer
learning rate is returned.
Returns:
tf.keras.optimizers.schedules.LearningRateSchedule instance. If no
learning rate schedule defined, optimizer_config.learning_rate is
returned.
"""
if not self._lr_config:
lr = self._optimizer_config.learning_rate
else:
lr = LR_CLS[self._lr_type](**self._lr_config.as_dict())
if self._warmup_config:
lr = WARMUP_CLS[self._warmup_type](lr, **self._warmup_config.as_dict())
return lr
def build_optimizer(
self, lr: Union[tf.keras.optimizers.schedules.LearningRateSchedule,
float]):
"""Build optimizer.
Builds optimizer from config. It takes learning rate as input, and builds
the optimizer according to the optimizer config. Typically, the learning
rate built using self.build_lr() is passed as an argument to this method.
Args:
lr: A floating point value, or
a tf.keras.optimizers.schedules.LearningRateSchedule instance.
Returns:
tf.keras.optimizers.Optimizer instance.
"""
optimizer_dict = self._optimizer_config.as_dict()
optimizer_dict['learning_rate'] = lr
optimizer = OPTIMIZERS_CLS[self._optimizer_type](**optimizer_dict)
return optimizer
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for optimizer_factory.py."""
import tensorflow as tf
import tensorflow_addons.optimizers as tfa_optimizers
from official.modeling.optimization import optimizer_factory
from official.modeling.optimization.configs import optimization_config
from official.nlp import optimization as nlp_optimization
class OptimizerFactoryTest(tf.test.TestCase):
def test_sgd_optimizer(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {'learning_rate': 0.1, 'momentum': 0.9}
}
}
expected_optimizer_config = {
'name': 'SGD',
'learning_rate': 0.1,
'decay': 0.0,
'momentum': 0.9,
'nesterov': False
}
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
optimizer = opt_factory.build_optimizer(lr)
self.assertIsInstance(optimizer, tf.keras.optimizers.SGD)
self.assertEqual(expected_optimizer_config, optimizer.get_config())
def test_adam_optimizer(self):
# Define adam optimizer with default values.
params = {
'optimizer': {
'type': 'adam'
}
}
expected_optimizer_config = tf.keras.optimizers.Adam().get_config()
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
optimizer = opt_factory.build_optimizer(lr)
self.assertIsInstance(optimizer, tf.keras.optimizers.Adam)
self.assertEqual(expected_optimizer_config, optimizer.get_config())
def test_adam_weight_decay_optimizer(self):
params = {
'optimizer': {
'type': 'adamw'
}
}
expected_optimizer_config = nlp_optimization.AdamWeightDecay().get_config()
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
optimizer = opt_factory.build_optimizer(lr)
self.assertIsInstance(optimizer, nlp_optimization.AdamWeightDecay)
self.assertEqual(expected_optimizer_config, optimizer.get_config())
def test_lamb_optimizer(self):
params = {
'optimizer': {
'type': 'lamb'
}
}
expected_optimizer_config = tfa_optimizers.LAMB().get_config()
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
optimizer = opt_factory.build_optimizer(lr)
self.assertIsInstance(optimizer, tfa_optimizers.LAMB)
self.assertEqual(expected_optimizer_config, optimizer.get_config())
def test_stepwise_lr_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {'learning_rate': 0.1, 'momentum': 0.9}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {'boundaries': [10000, 20000],
'values': [0.1, 0.01, 0.001]}
}
}
expected_lr_step_values = [
[0, 0.1],
[5000, 0.1],
[10000, 0.1],
[10001, 0.01],
[20000, 0.01],
[20001, 0.001]
]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
def test_stepwise_lr_with_warmup_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {'learning_rate': 0.1, 'momentum': 0.9}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {'boundaries': [10000, 20000],
'values': [0.1, 0.01, 0.001]}
},
'warmup': {
'type': 'linear',
'linear': {'warmup_steps': 500, 'warmup_learning_rate': 0.01}
}
}
expected_lr_step_values = [
[0, 0.01],
[250, 0.055],
[500, 0.1],
[5500, 0.1],
[10000, 0.1],
[10001, 0.01],
[20000, 0.01],
[20001, 0.001]
]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
def test_exponential_lr_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {'learning_rate': 0.1, 'momentum': 0.9}
},
'learning_rate': {
'type': 'exponential',
'exponential': {
'initial_learning_rate': 0.1,
'decay_steps': 1000,
'decay_rate': 0.96,
'staircase': True
}
}
}
expected_lr_step_values = [
[0, 0.1],
[999, 0.1],
[1000, 0.096],
[1999, 0.096],
[2000, 0.09216],
]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
def test_polynomial_lr_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {'learning_rate': 0.1, 'momentum': 0.9}
},
'learning_rate': {
'type': 'polynomial',
'polynomial': {
'initial_learning_rate': 0.1,
'decay_steps': 1000,
'end_learning_rate': 0.001
}
}
}
expected_lr_step_values = [[0, 0.1], [500, 0.0505], [1000, 0.001]]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
def test_constant_lr_with_warmup_schedule(self):
params = {
'optimizer': {
'type': 'sgd',
'sgd': {'learning_rate': 0.1, 'momentum': 0.9}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 500,
'warmup_learning_rate': 0.01
}
}
}
expected_lr_step_values = [[0, 0.01], [250, 0.055], [500, 0.1], [5000, 0.1],
[10000, 0.1], [20000, 0.1]]
opt_config = optimization_config.OptimizationConfig(params)
opt_factory = optimizer_factory.OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
for step, value in expected_lr_step_values:
self.assertAlmostEqual(lr(step).numpy(), value)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment