Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
87ec3d2a
Commit
87ec3d2a
authored
May 31, 2020
by
Abdullah Rashwan
Committed by
A. Unique TensorFlower
May 31, 2020
Browse files
Internal change
PiperOrigin-RevId: 314002442
parent
4ce55184
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
881 additions
and
0 deletions
+881
-0
official/modeling/optimization/__init__.py
official/modeling/optimization/__init__.py
+7
-0
official/modeling/optimization/configs/__init__.py
official/modeling/optimization/configs/__init__.py
+0
-0
official/modeling/optimization/configs/learning_rate_config.py
...ial/modeling/optimization/configs/learning_rate_config.py
+125
-0
official/modeling/optimization/configs/optimization_config.py
...cial/modeling/optimization/configs/optimization_config.py
+89
-0
official/modeling/optimization/configs/optimization_config_test.py
...modeling/optimization/configs/optimization_config_test.py
+61
-0
official/modeling/optimization/configs/optimizer_config.py
official/modeling/optimization/configs/optimizer_config.py
+125
-0
official/modeling/optimization/lr_schedule.py
official/modeling/optimization/lr_schedule.py
+94
-0
official/modeling/optimization/optimizer_factory.py
official/modeling/optimization/optimizer_factory.py
+142
-0
official/modeling/optimization/optimizer_factory_test.py
official/modeling/optimization/optimizer_factory_test.py
+238
-0
No files found.
official/modeling/optimization/__init__.py
0 → 100644
View file @
87ec3d2a
"""Optimization package definition."""
# pylint: disable=wildcard-import
from
official.modeling.optimization.configs.learning_rate_config
import
*
from
official.modeling.optimization.configs.optimization_config
import
*
from
official.modeling.optimization.configs.optimizer_config
import
*
from
official.modeling.optimization.optimizer_factory
import
OptimizerFactory
official/modeling/optimization/configs/__init__.py
0 → 100644
View file @
87ec3d2a
official/modeling/optimization/configs/learning_rate_config.py
0 → 100644
View file @
87ec3d2a
# Lint as: python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Dataclasses for learning rate schedule config."""
from
typing
import
List
,
Optional
import
dataclasses
from
official.modeling.hyperparams
import
base_config
@
dataclasses
.
dataclass
class
StepwiseLrConfig
(
base_config
.
Config
):
"""Configuration for stepwise learning rate decay.
This class is a container for the piecewise constant learning rate scheduling
configs. It will configure an instance of PiecewiseConstantDecay keras
learning rate schedule.
An example (from keras docs): use a learning rate that's 1.0 for the first
100001 steps, 0.5 for the next 10000 steps, and 0.1 for any additional steps.
```python
boundaries: [100000, 110000]
values: [1.0, 0.5, 0.1]
Attributes:
name: The name of the learning rate schedule. Defaults to PiecewiseConstant.
boundaries: A list of ints of strictly increasing entries.
Defaults to None.
values: A list of floats that specifies the values for the intervals defined
by `boundaries`. It should have one more element than `boundaries`.
The learning rate is computed as follows:
[0, boundaries[0]] -> values[0]
[boundaries[0], boundaries[1]] -> values[1]
[boundaries[n-1], boundaries[n]] -> values[n]
[boundaries[n], end] -> values[n+1]
Defaults to None.
"""
name
:
str
=
'PiecewiseConstantDecay'
boundaries
:
Optional
[
List
[
int
]]
=
None
values
:
Optional
[
List
[
float
]]
=
None
@
dataclasses
.
dataclass
class
ExponentialLrConfig
(
base_config
.
Config
):
"""Configuration for exponential learning rate decay.
This class is a containers for the exponential learning rate decay configs.
Attributes:
name: The name of the learning rate schedule. Defaults to ExponentialDecay.
initial_learning_rate: A float. The initial learning rate. Defaults to
None.
decay_steps: A positive integer that is used for decay computation.
Defaults to None.
decay_rate: A float. Defaults to None.
staircase: A boolean, if true, learning rate is decreased at discreate
intervals. Defaults to False.
"""
name
:
str
=
'ExponentialDecay'
initial_learning_rate
:
Optional
[
float
]
=
None
decay_steps
:
Optional
[
int
]
=
None
decay_rate
:
Optional
[
float
]
=
None
staircase
:
Optional
[
bool
]
=
None
@
dataclasses
.
dataclass
class
PolynomialLrConfig
(
base_config
.
Config
):
"""Configuration for polynomial learning rate decay.
This class is a containers for the polynomial learning rate decay configs.
Attributes:
name: The name of the learning rate schedule. Defaults to PolynomialDecay.
initial_learning_rate: A float. The initial learning rate. Defaults to
None.
decay_steps: A positive integer that is used for decay computation.
Defaults to None.
end_learning_rate: A float. The minimal end learning rate.
power: A float. The power of the polynomial. Defaults to linear, 1.0.
cycle: A boolean, whether or not it should cycle beyond decay_steps.
Defaults to False.
"""
name
:
str
=
'PolynomialDecay'
initial_learning_rate
:
Optional
[
float
]
=
None
decay_steps
:
Optional
[
int
]
=
None
end_learning_rate
:
float
=
0.0001
power
:
float
=
1.0
cycle
:
bool
=
False
@
dataclasses
.
dataclass
class
LinearWarmupConfig
(
base_config
.
Config
):
"""Configuration for linear warmup schedule config.
This class is a container for the linear warmup schedule configs.
Warmup_learning_rate is the initial learning rate, the final learning rate of
the warmup period is the learning_rate of the optimizer in use. The learning
rate at each step linearly increased according to the following formula:
warmup_learning_rate = warmup_learning_rate +
step / warmup_steps * (final_learning_rate - warmup_learning_rate).
Using warmup overrides the learning rate schedule by the number of warmup
steps.
Attributes:
name: The name of warmup schedule. Defaults to linear.
warmup_learning_rate: Initial learning rate for the warmup. Defaults to 0.
warmup_steps: Warmup steps. Defaults to None.
"""
name
:
str
=
'LinearWarmup'
warmup_learning_rate
:
float
=
0
warmup_steps
:
Optional
[
int
]
=
None
official/modeling/optimization/configs/optimization_config.py
0 → 100644
View file @
87ec3d2a
# Lint as: python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Dataclasses for optimization configs.
This file define the dataclass for optimization configs (OptimizationConfig).
It also has two helper functions get_optimizer_config, and get_lr_config from
an OptimizationConfig class.
"""
from
typing
import
Optional
import
dataclasses
from
official.modeling.hyperparams
import
base_config
from
official.modeling.hyperparams
import
oneof
from
official.modeling.optimization.configs
import
learning_rate_config
as
lr_cfg
from
official.modeling.optimization.configs
import
optimizer_config
as
opt_cfg
@
dataclasses
.
dataclass
class
OptimizerConfig
(
oneof
.
OneOfConfig
):
"""Configuration for optimizer.
Attributes:
type: 'str', type of optimizer to be used, on the of fields below.
sgd: sgd optimizer config.
adam: adam optimizer config.
adamw: adam with weight decay.
lamb: lamb optimizer.
"""
type
:
Optional
[
str
]
=
None
sgd
:
opt_cfg
.
SGDConfig
=
opt_cfg
.
SGDConfig
()
adam
:
opt_cfg
.
AdamConfig
=
opt_cfg
.
AdamConfig
()
adamw
:
opt_cfg
.
AdamWeightDecayConfig
=
opt_cfg
.
AdamWeightDecayConfig
()
lamb
:
opt_cfg
.
LAMBConfig
=
opt_cfg
.
LAMBConfig
()
@
dataclasses
.
dataclass
class
LrConfig
(
oneof
.
OneOfConfig
):
"""Configuration for lr schedule.
Attributes:
type: 'str', type of lr schedule to be used, on the of fields below.
stepwise: stepwise learning rate config.
exponential: exponential learning rate config.
polynomial: polynomial learning rate config.
"""
type
:
Optional
[
str
]
=
None
stepwise
:
lr_cfg
.
StepwiseLrConfig
=
lr_cfg
.
StepwiseLrConfig
()
exponential
:
lr_cfg
.
ExponentialLrConfig
=
lr_cfg
.
ExponentialLrConfig
()
polynomial
:
lr_cfg
.
PolynomialLrConfig
=
lr_cfg
.
PolynomialLrConfig
()
@
dataclasses
.
dataclass
class
WarmupConfig
(
oneof
.
OneOfConfig
):
"""Configuration for lr schedule.
Attributes:
type: 'str', type of warmup schedule to be used, on the of fields below.
linear: linear warmup config.
"""
type
:
Optional
[
str
]
=
None
linear
:
lr_cfg
.
LinearWarmupConfig
=
lr_cfg
.
LinearWarmupConfig
()
@
dataclasses
.
dataclass
class
OptimizationConfig
(
base_config
.
Config
):
"""Configuration for optimizer and learning rate schedule.
Attributes:
optimizer: optimizer oneof config.
learning_rate: learning rate oneof config.
warmup: warmup oneof config.
"""
optimizer
:
OptimizerConfig
=
OptimizerConfig
()
learning_rate
:
LrConfig
=
LrConfig
()
warmup
:
WarmupConfig
=
WarmupConfig
()
official/modeling/optimization/configs/optimization_config_test.py
0 → 100644
View file @
87ec3d2a
# Lint as: python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for optimization_config.py."""
import
tensorflow
as
tf
from
official.modeling.optimization.configs
import
learning_rate_config
as
lr_cfg
from
official.modeling.optimization.configs
import
optimization_config
from
official.modeling.optimization.configs
import
optimizer_config
as
opt_cfg
class
OptimizerConfigTest
(
tf
.
test
.
TestCase
):
def
test_no_optimizer
(
self
):
optimizer
=
optimization_config
.
OptimizationConfig
({}).
optimizer
.
get
()
self
.
assertEqual
(
optimizer
,
None
)
def
test_no_lr_schedule
(
self
):
lr
=
optimization_config
.
OptimizationConfig
({}).
learning_rate
.
get
()
self
.
assertEqual
(
lr
,
None
)
def
test_no_warmup_schedule
(
self
):
warmup
=
optimization_config
.
OptimizationConfig
({}).
warmup
.
get
()
self
.
assertEqual
(
warmup
,
None
)
def
test_config
(
self
):
opt_config
=
optimization_config
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{}
# default config
},
'learning_rate'
:
{
'type'
:
'polynomial'
,
'polynomial'
:
{}
},
'warmup'
:
{
'type'
:
'linear'
}
})
self
.
assertEqual
(
opt_config
.
optimizer
.
get
(),
opt_cfg
.
SGDConfig
())
self
.
assertEqual
(
opt_config
.
learning_rate
.
get
(),
lr_cfg
.
PolynomialLrConfig
())
self
.
assertEqual
(
opt_config
.
warmup
.
get
(),
lr_cfg
.
LinearWarmupConfig
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/modeling/optimization/configs/optimizer_config.py
0 → 100644
View file @
87ec3d2a
# Lint as: python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Dataclasses for optimizer configs."""
from
typing
import
List
,
Optional
import
dataclasses
from
official.modeling.hyperparams
import
base_config
@
dataclasses
.
dataclass
class
SGDConfig
(
base_config
.
Config
):
"""Configuration for SGD optimizer.
The attributes for this class matches the arguments of tf.keras.optimizer.SGD.
Attributes:
name: name of the optimizer.
learning_rate: learning_rate for SGD optimizer.
decay: decay rate for SGD optimizer.
nesterov: nesterov for SGD optimizer.
momentum: momentum for SGD optimizer.
"""
name
:
str
=
"SGD"
learning_rate
:
float
=
0.01
decay
:
float
=
0.0
nesterov
:
bool
=
False
momentum
:
float
=
0.0
@
dataclasses
.
dataclass
class
AdamConfig
(
base_config
.
Config
):
"""Configuration for Adam optimizer.
The attributes for this class matches the arguments of
tf.keras.optimizer.Adam.
Attributes:
name: name of the optimizer.
learning_rate: learning_rate for Adam optimizer.
beta_1: decay rate for 1st order moments.
beta_2: decay rate for 2st order moments.
epsilon: epsilon value used for numerical stability in Adam optimizer.
amsgrad: boolean. Whether to apply AMSGrad variant of this algorithm from
the paper "On the Convergence of Adam and beyond".
"""
name
:
str
=
"Adam"
learning_rate
:
float
=
0.001
beta_1
:
float
=
0.9
beta_2
:
float
=
0.999
epsilon
:
float
=
1e-07
amsgrad
:
bool
=
False
@
dataclasses
.
dataclass
class
AdamWeightDecayConfig
(
base_config
.
Config
):
"""Configuration for Adam optimizer with weight decay.
Attributes:
name: name of the optimizer.
learning_rate: learning_rate for the optimizer.
beta_1: decay rate for 1st order moments.
beta_2: decay rate for 2st order moments.
epsilon: epsilon value used for numerical stability in the optimizer.
amsgrad: boolean. Whether to apply AMSGrad variant of this algorithm from
the paper "On the Convergence of Adam and beyond".
weight_decay_rate: float. Weight decay rate. Default to 0.
include_in_weight_decay: list[str], or None. List of weight names to include
in weight decay.
include_in_weight_decay: list[str], or None. List of weight names to not
include in weight decay.
"""
name
:
str
=
"AdamWeightDecay"
learning_rate
:
float
=
0.001
beta_1
:
float
=
0.9
beta_2
:
float
=
0.999
epsilon
:
float
=
1e-07
amsgrad
:
bool
=
False
weight_decay_rate
:
float
=
0.0
include_in_weight_decay
:
Optional
[
List
[
str
]]
=
None
exclude_from_weight_decay
:
Optional
[
List
[
str
]]
=
None
@
dataclasses
.
dataclass
class
LAMBConfig
(
base_config
.
Config
):
"""Configuration for LAMB optimizer.
The attributes for this class matches the arguments of
tensorflow_addons.optimizers.LAMB.
Attributes:
name: name of the optimizer.
learning_rate: learning_rate for Adam optimizer.
beta_1: decay rate for 1st order moments.
beta_2: decay rate for 2st order moments.
epsilon: epsilon value used for numerical stability in LAMB optimizer.
weight_decay_rate: float. Weight decay rate. Default to 0.
exclude_from_weight_decay: List of regex patterns of variables excluded from
weight decay. Variables whose name contain a
substring matching the pattern will be excluded.
exclude_from_layer_adaptation: List of regex patterns of variables excluded
from layer adaptation. Variables whose name
contain a substring matching the pattern will
be excluded.
"""
name
:
str
=
"LAMB"
learning_rate
:
float
=
0.001
beta_1
:
float
=
0.9
beta_2
:
float
=
0.999
epsilon
:
float
=
1e-6
weight_decay_rate
:
float
=
0.0
exclude_from_weight_decay
:
Optional
[
List
[
str
]]
=
None
exclude_from_layer_adaptation
:
Optional
[
List
[
str
]]
=
None
official/modeling/optimization/lr_schedule.py
0 → 100644
View file @
87ec3d2a
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Learning rate schedule classes."""
from
typing
import
Mapping
,
Any
,
Union
,
Optional
import
tensorflow
as
tf
class
LinearWarmup
(
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
"""Linear warmup schedule."""
def
__init__
(
self
,
after_warmup_lr_sched
:
Union
[
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
,
float
],
warmup_steps
:
int
,
warmup_learning_rate
:
float
,
name
:
Optional
[
str
]
=
None
):
"""Add linear warmup schedule to a learning rate schedule.
warmup_lr is the initial learning rate, the final learning rate of the
init_warmup period is the initial learning rate of lr_schedule in use.
The learning rate at each step linearly increased according to the following
formula:
learning_rate = warmup_lr + step / warmup_steps
* (final_warmup_lr - warmup_lr).
Using warmup overrides the learning rate schedule by the number of warmup
steps.
Args:
after_warmup_lr_sched: tf.keras.optimizers.schedules
.LearningRateSchedule or a constant.
warmup_steps: int. number of the warmup steps.
warmup_learning_rate: floating point number. Initial learning rate for the
warmup.
name: Optional, name of warmup schedule.
"""
super
(
LinearWarmup
,
self
).
__init__
()
self
.
_name
=
name
self
.
_after_warmup_lr_sched
=
after_warmup_lr_sched
self
.
_warmup_steps
=
warmup_steps
self
.
_init_warmup_lr
=
warmup_learning_rate
if
isinstance
(
after_warmup_lr_sched
,
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
self
.
_final_warmup_lr
=
after_warmup_lr_sched
(
warmup_steps
)
else
:
self
.
_final_warmup_lr
=
tf
.
cast
(
after_warmup_lr_sched
,
dtype
=
tf
.
float32
)
def
__call__
(
self
,
step
:
int
):
global_step
=
tf
.
cast
(
step
,
dtype
=
tf
.
float32
)
linear_warmup_lr
=
(
self
.
_init_warmup_lr
+
global_step
/
self
.
_warmup_steps
*
(
self
.
_final_warmup_lr
-
self
.
_init_warmup_lr
))
if
isinstance
(
self
.
_after_warmup_lr_sched
,
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
after_warmup_lr
=
self
.
_after_warmup_lr_sched
(
step
)
else
:
after_warmup_lr
=
tf
.
cast
(
self
.
_after_warmup_lr_sched
,
dtype
=
tf
.
float32
)
lr
=
tf
.
cond
(
global_step
<
self
.
_warmup_steps
,
lambda
:
linear_warmup_lr
,
lambda
:
after_warmup_lr
)
return
lr
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
if
isinstance
(
self
.
_after_warmup_lr_sched
,
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
name
=
"{!s}WithWarmup"
.
format
(
self
.
_after_warmup_lr_sched
.
name
)
# pytype: disable=attribute-error
config
=
self
.
_after_warmup_lr_sched
.
get_config
()
# pytype: disable=attribute-error
else
:
name
=
"ConstantWithWarmup"
config
=
{
"learning_rate"
:
self
.
_after_warmup_lr_sched
}
config
.
update
({
"warmup_steps"
:
self
.
_warmup_steps
,
"warmup_learning_rate"
:
self
.
_init_warmup_lr
,
"name"
:
name
})
return
config
official/modeling/optimization/optimizer_factory.py
0 → 100644
View file @
87ec3d2a
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Optimizer factory class."""
from
typing
import
Union
import
tensorflow
as
tf
import
tensorflow_addons.optimizers
as
tfa_optimizers
from
official.modeling.optimization
import
lr_schedule
from
official.modeling.optimization.configs
import
optimization_config
as
opt_cfg
from
official.nlp
import
optimization
as
nlp_optimization
OPTIMIZERS_CLS
=
{
'sgd'
:
tf
.
keras
.
optimizers
.
SGD
,
'adam'
:
tf
.
keras
.
optimizers
.
Adam
,
'adamw'
:
nlp_optimization
.
AdamWeightDecay
,
'lamb'
:
tfa_optimizers
.
LAMB
}
LR_CLS
=
{
'stepwise'
:
tf
.
keras
.
optimizers
.
schedules
.
PiecewiseConstantDecay
,
'polynomial'
:
tf
.
keras
.
optimizers
.
schedules
.
PolynomialDecay
,
'exponential'
:
tf
.
keras
.
optimizers
.
schedules
.
ExponentialDecay
,
}
WARMUP_CLS
=
{
'linear'
:
lr_schedule
.
LinearWarmup
}
class
OptimizerFactory
(
object
):
"""Optimizer factory class.
This class builds learning rate and optimizer based on an optimization config.
To use this class, you need to do the following:
(1) Define optimization config, this includes optimizer, and learning rate
schedule.
(2) Initialize the class using the optimization config.
(3) Build learning rate.
(4) Build optimizer.
This is a typical example for using this class:
params = {
'optimizer': {
'type': 'sgd',
'sgd': {'learning_rate': 0.1, 'momentum': 0.9}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {'boundaries': [10000, 20000],
'values': [0.1, 0.01, 0.001]}
},
'warmup': {
'type': 'linear',
'linear': {'warmup_steps': 500, 'warmup_learning_rate': 0.01}
}
}
opt_config = OptimizationConfig(params)
opt_factory = OptimizerFactory(opt_config)
lr = opt_factory.build_learning_rate()
optimizer = opt_factory.build_optimizer(lr)
"""
def
__init__
(
self
,
config
:
opt_cfg
.
OptimizationConfig
):
"""Initializing OptimizerFactory.
Args:
config: OptimizationConfig instance contain optimization config.
"""
self
.
_config
=
config
self
.
_optimizer_config
=
config
.
optimizer
.
get
()
self
.
_optimizer_type
=
config
.
optimizer
.
type
if
self
.
_optimizer_config
is
None
:
raise
ValueError
(
'Optimizer type must be specified'
)
self
.
_lr_config
=
config
.
learning_rate
.
get
()
self
.
_lr_type
=
config
.
learning_rate
.
type
self
.
_warmup_config
=
config
.
warmup
.
get
()
self
.
_warmup_type
=
config
.
warmup
.
type
def
build_learning_rate
(
self
):
"""Build learning rate.
Builds learning rate from config. Learning rate schedule is built according
to the learning rate config. If there is no learning rate config, optimizer
learning rate is returned.
Returns:
tf.keras.optimizers.schedules.LearningRateSchedule instance. If no
learning rate schedule defined, optimizer_config.learning_rate is
returned.
"""
if
not
self
.
_lr_config
:
lr
=
self
.
_optimizer_config
.
learning_rate
else
:
lr
=
LR_CLS
[
self
.
_lr_type
](
**
self
.
_lr_config
.
as_dict
())
if
self
.
_warmup_config
:
lr
=
WARMUP_CLS
[
self
.
_warmup_type
](
lr
,
**
self
.
_warmup_config
.
as_dict
())
return
lr
def
build_optimizer
(
self
,
lr
:
Union
[
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
,
float
]):
"""Build optimizer.
Builds optimizer from config. It takes learning rate as input, and builds
the optimizer according to the optimizer config. Typically, the learning
rate built using self.build_lr() is passed as an argument to this method.
Args:
lr: A floating point value, or
a tf.keras.optimizers.schedules.LearningRateSchedule instance.
Returns:
tf.keras.optimizers.Optimizer instance.
"""
optimizer_dict
=
self
.
_optimizer_config
.
as_dict
()
optimizer_dict
[
'learning_rate'
]
=
lr
optimizer
=
OPTIMIZERS_CLS
[
self
.
_optimizer_type
](
**
optimizer_dict
)
return
optimizer
official/modeling/optimization/optimizer_factory_test.py
0 → 100644
View file @
87ec3d2a
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for optimizer_factory.py."""
import
tensorflow
as
tf
import
tensorflow_addons.optimizers
as
tfa_optimizers
from
official.modeling.optimization
import
optimizer_factory
from
official.modeling.optimization.configs
import
optimization_config
from
official.nlp
import
optimization
as
nlp_optimization
class
OptimizerFactoryTest
(
tf
.
test
.
TestCase
):
def
test_sgd_optimizer
(
self
):
params
=
{
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'learning_rate'
:
0.1
,
'momentum'
:
0.9
}
}
}
expected_optimizer_config
=
{
'name'
:
'SGD'
,
'learning_rate'
:
0.1
,
'decay'
:
0.0
,
'momentum'
:
0.9
,
'nesterov'
:
False
}
opt_config
=
optimization_config
.
OptimizationConfig
(
params
)
opt_factory
=
optimizer_factory
.
OptimizerFactory
(
opt_config
)
lr
=
opt_factory
.
build_learning_rate
()
optimizer
=
opt_factory
.
build_optimizer
(
lr
)
self
.
assertIsInstance
(
optimizer
,
tf
.
keras
.
optimizers
.
SGD
)
self
.
assertEqual
(
expected_optimizer_config
,
optimizer
.
get_config
())
def
test_adam_optimizer
(
self
):
# Define adam optimizer with default values.
params
=
{
'optimizer'
:
{
'type'
:
'adam'
}
}
expected_optimizer_config
=
tf
.
keras
.
optimizers
.
Adam
().
get_config
()
opt_config
=
optimization_config
.
OptimizationConfig
(
params
)
opt_factory
=
optimizer_factory
.
OptimizerFactory
(
opt_config
)
lr
=
opt_factory
.
build_learning_rate
()
optimizer
=
opt_factory
.
build_optimizer
(
lr
)
self
.
assertIsInstance
(
optimizer
,
tf
.
keras
.
optimizers
.
Adam
)
self
.
assertEqual
(
expected_optimizer_config
,
optimizer
.
get_config
())
def
test_adam_weight_decay_optimizer
(
self
):
params
=
{
'optimizer'
:
{
'type'
:
'adamw'
}
}
expected_optimizer_config
=
nlp_optimization
.
AdamWeightDecay
().
get_config
()
opt_config
=
optimization_config
.
OptimizationConfig
(
params
)
opt_factory
=
optimizer_factory
.
OptimizerFactory
(
opt_config
)
lr
=
opt_factory
.
build_learning_rate
()
optimizer
=
opt_factory
.
build_optimizer
(
lr
)
self
.
assertIsInstance
(
optimizer
,
nlp_optimization
.
AdamWeightDecay
)
self
.
assertEqual
(
expected_optimizer_config
,
optimizer
.
get_config
())
def
test_lamb_optimizer
(
self
):
params
=
{
'optimizer'
:
{
'type'
:
'lamb'
}
}
expected_optimizer_config
=
tfa_optimizers
.
LAMB
().
get_config
()
opt_config
=
optimization_config
.
OptimizationConfig
(
params
)
opt_factory
=
optimizer_factory
.
OptimizerFactory
(
opt_config
)
lr
=
opt_factory
.
build_learning_rate
()
optimizer
=
opt_factory
.
build_optimizer
(
lr
)
self
.
assertIsInstance
(
optimizer
,
tfa_optimizers
.
LAMB
)
self
.
assertEqual
(
expected_optimizer_config
,
optimizer
.
get_config
())
def
test_stepwise_lr_schedule
(
self
):
params
=
{
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'learning_rate'
:
0.1
,
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
10000
,
20000
],
'values'
:
[
0.1
,
0.01
,
0.001
]}
}
}
expected_lr_step_values
=
[
[
0
,
0.1
],
[
5000
,
0.1
],
[
10000
,
0.1
],
[
10001
,
0.01
],
[
20000
,
0.01
],
[
20001
,
0.001
]
]
opt_config
=
optimization_config
.
OptimizationConfig
(
params
)
opt_factory
=
optimizer_factory
.
OptimizerFactory
(
opt_config
)
lr
=
opt_factory
.
build_learning_rate
()
for
step
,
value
in
expected_lr_step_values
:
self
.
assertAlmostEqual
(
lr
(
step
).
numpy
(),
value
)
def
test_stepwise_lr_with_warmup_schedule
(
self
):
params
=
{
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'learning_rate'
:
0.1
,
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'stepwise'
,
'stepwise'
:
{
'boundaries'
:
[
10000
,
20000
],
'values'
:
[
0.1
,
0.01
,
0.001
]}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
500
,
'warmup_learning_rate'
:
0.01
}
}
}
expected_lr_step_values
=
[
[
0
,
0.01
],
[
250
,
0.055
],
[
500
,
0.1
],
[
5500
,
0.1
],
[
10000
,
0.1
],
[
10001
,
0.01
],
[
20000
,
0.01
],
[
20001
,
0.001
]
]
opt_config
=
optimization_config
.
OptimizationConfig
(
params
)
opt_factory
=
optimizer_factory
.
OptimizerFactory
(
opt_config
)
lr
=
opt_factory
.
build_learning_rate
()
for
step
,
value
in
expected_lr_step_values
:
self
.
assertAlmostEqual
(
lr
(
step
).
numpy
(),
value
)
def
test_exponential_lr_schedule
(
self
):
params
=
{
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'learning_rate'
:
0.1
,
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'exponential'
,
'exponential'
:
{
'initial_learning_rate'
:
0.1
,
'decay_steps'
:
1000
,
'decay_rate'
:
0.96
,
'staircase'
:
True
}
}
}
expected_lr_step_values
=
[
[
0
,
0.1
],
[
999
,
0.1
],
[
1000
,
0.096
],
[
1999
,
0.096
],
[
2000
,
0.09216
],
]
opt_config
=
optimization_config
.
OptimizationConfig
(
params
)
opt_factory
=
optimizer_factory
.
OptimizerFactory
(
opt_config
)
lr
=
opt_factory
.
build_learning_rate
()
for
step
,
value
in
expected_lr_step_values
:
self
.
assertAlmostEqual
(
lr
(
step
).
numpy
(),
value
)
def
test_polynomial_lr_schedule
(
self
):
params
=
{
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'learning_rate'
:
0.1
,
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'polynomial'
,
'polynomial'
:
{
'initial_learning_rate'
:
0.1
,
'decay_steps'
:
1000
,
'end_learning_rate'
:
0.001
}
}
}
expected_lr_step_values
=
[[
0
,
0.1
],
[
500
,
0.0505
],
[
1000
,
0.001
]]
opt_config
=
optimization_config
.
OptimizationConfig
(
params
)
opt_factory
=
optimizer_factory
.
OptimizerFactory
(
opt_config
)
lr
=
opt_factory
.
build_learning_rate
()
for
step
,
value
in
expected_lr_step_values
:
self
.
assertAlmostEqual
(
lr
(
step
).
numpy
(),
value
)
def
test_constant_lr_with_warmup_schedule
(
self
):
params
=
{
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'learning_rate'
:
0.1
,
'momentum'
:
0.9
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
500
,
'warmup_learning_rate'
:
0.01
}
}
}
expected_lr_step_values
=
[[
0
,
0.01
],
[
250
,
0.055
],
[
500
,
0.1
],
[
5000
,
0.1
],
[
10000
,
0.1
],
[
20000
,
0.1
]]
opt_config
=
optimization_config
.
OptimizationConfig
(
params
)
opt_factory
=
optimizer_factory
.
OptimizerFactory
(
opt_config
)
lr
=
opt_factory
.
build_learning_rate
()
for
step
,
value
in
expected_lr_step_values
:
self
.
assertAlmostEqual
(
lr
(
step
).
numpy
(),
value
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment