Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
28cbb02d
Commit
28cbb02d
authored
Aug 27, 2020
by
Abdullah Rashwan
Committed by
A. Unique TensorFlower
Aug 27, 2020
Browse files
Internal change
PiperOrigin-RevId: 328803102
parent
5a1bce51
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
287 additions
and
234 deletions
+287
-234
official/modeling/optimization/__init__.py
official/modeling/optimization/__init__.py
+1
-0
official/modeling/optimization/configs/optimization_config.py
...cial/modeling/optimization/configs/optimization_config.py
+3
-0
official/modeling/optimization/configs/optimizer_config.py
official/modeling/optimization/configs/optimizer_config.py
+16
-0
official/modeling/optimization/ema_optimizer.py
official/modeling/optimization/ema_optimizer.py
+243
-0
official/modeling/optimization/optimizer_factory.py
official/modeling/optimization/optimizer_factory.py
+11
-2
official/vision/image_classification/callbacks.py
official/vision/image_classification/callbacks.py
+7
-5
official/vision/image_classification/optimizer_factory.py
official/vision/image_classification/optimizer_factory.py
+6
-227
No files found.
official/modeling/optimization/__init__.py
View file @
28cbb02d
...
@@ -4,4 +4,5 @@
...
@@ -4,4 +4,5 @@
from
official.modeling.optimization.configs.learning_rate_config
import
*
from
official.modeling.optimization.configs.learning_rate_config
import
*
from
official.modeling.optimization.configs.optimization_config
import
*
from
official.modeling.optimization.configs.optimization_config
import
*
from
official.modeling.optimization.configs.optimizer_config
import
*
from
official.modeling.optimization.configs.optimizer_config
import
*
from
official.modeling.optimization.ema_optimizer
import
ExponentialMovingAverage
from
official.modeling.optimization.optimizer_factory
import
OptimizerFactory
from
official.modeling.optimization.optimizer_factory
import
OptimizerFactory
official/modeling/optimization/configs/optimization_config.py
View file @
28cbb02d
...
@@ -91,9 +91,12 @@ class OptimizationConfig(base_config.Config):
...
@@ -91,9 +91,12 @@ class OptimizationConfig(base_config.Config):
Attributes:
Attributes:
optimizer: optimizer oneof config.
optimizer: optimizer oneof config.
ema: optional exponential moving average optimizer config, if specified,
ema optimizer will be used.
learning_rate: learning rate oneof config.
learning_rate: learning rate oneof config.
warmup: warmup oneof config.
warmup: warmup oneof config.
"""
"""
optimizer
:
OptimizerConfig
=
OptimizerConfig
()
optimizer
:
OptimizerConfig
=
OptimizerConfig
()
ema
:
Optional
[
opt_cfg
.
EMAConfig
]
=
None
learning_rate
:
LrConfig
=
LrConfig
()
learning_rate
:
LrConfig
=
LrConfig
()
warmup
:
WarmupConfig
=
WarmupConfig
()
warmup
:
WarmupConfig
=
WarmupConfig
()
official/modeling/optimization/configs/optimizer_config.py
View file @
28cbb02d
...
@@ -136,3 +136,19 @@ class LAMBConfig(base_config.Config):
...
@@ -136,3 +136,19 @@ class LAMBConfig(base_config.Config):
weight_decay_rate
:
float
=
0.0
weight_decay_rate
:
float
=
0.0
exclude_from_weight_decay
:
Optional
[
List
[
str
]]
=
None
exclude_from_weight_decay
:
Optional
[
List
[
str
]]
=
None
exclude_from_layer_adaptation
:
Optional
[
List
[
str
]]
=
None
exclude_from_layer_adaptation
:
Optional
[
List
[
str
]]
=
None
@
dataclasses
.
dataclass
class
EMAConfig
(
base_config
.
Config
):
"""Exponential moving average optimizer config.
Attributes:
name: 'str', name of the optimizer.
average_decay: 'float', average decay value.
start_step: 'int', start step to apply moving average.
dynamic_decay: 'bool', whether to apply dynamic decay or not.
"""
name
:
str
=
"ExponentialMovingAverage"
average_decay
:
float
=
0.99
start_step
:
int
=
0
dynamic_decay
:
bool
=
True
official/modeling/optimization/ema_optimizer.py
0 → 100644
View file @
28cbb02d
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Exponential moving average optimizer."""
from
typing
import
Text
,
List
import
tensorflow
as
tf
# pylint: disable=protected-access
class
ExponentialMovingAverage
(
tf
.
keras
.
optimizers
.
Optimizer
):
"""Optimizer that computes an exponential moving average of the variables.
Empirically it has been found that using the moving average of the trained
parameters of a deep network is better than using its trained parameters
directly. This optimizer allows you to compute this moving average and swap
the variables at save time so that any code outside of the training loop
will use by default the average values instead of the original ones.
Example of usage for training:
```python
opt = tf.keras.optimizers.SGD(learning_rate)
opt = ExponentialMovingAverage(opt)
opt.shadow_copy(model)
```
At test time, swap the shadow variables to evaluate on the averaged weights:
```python
opt.swap_weights()
# Test eval the model here
opt.swap_weights()
```
"""
def
__init__
(
self
,
optimizer
:
tf
.
keras
.
optimizers
.
Optimizer
,
average_decay
:
float
=
0.99
,
start_step
:
int
=
0
,
dynamic_decay
:
bool
=
True
,
name
:
Text
=
'ExponentialMovingAverage'
,
**
kwargs
):
"""Construct a new ExponentialMovingAverage optimizer.
Args:
optimizer: `tf.keras.optimizers.Optimizer` that will be
used to compute and apply gradients.
average_decay: float. Decay to use to maintain the moving averages
of trained variables.
start_step: int. What step to start the moving average.
dynamic_decay: bool. Whether to change the decay based on the number
of optimizer updates. Decay will start at 0.1 and gradually increase
up to `average_decay` after each optimizer update. This behavior is
similar to `tf.train.ExponentialMovingAverage` in TF 1.x.
name: Optional name for the operations created when applying
gradients. Defaults to "moving_average".
**kwargs: keyword arguments. Allowed to be {`clipnorm`,
`clipvalue`, `lr`, `decay`}.
"""
super
(
ExponentialMovingAverage
,
self
).
__init__
(
name
,
**
kwargs
)
self
.
_average_decay
=
average_decay
self
.
_start_step
=
tf
.
constant
(
start_step
,
tf
.
float32
)
self
.
_dynamic_decay
=
dynamic_decay
self
.
_optimizer
=
optimizer
self
.
_track_trackable
(
self
.
_optimizer
,
'base_optimizer'
)
def
shadow_copy
(
self
,
model
:
tf
.
keras
.
Model
):
"""Creates shadow variables for the given model weights."""
for
var
in
model
.
weights
:
self
.
add_slot
(
var
,
'average'
,
initializer
=
'zeros'
)
self
.
_average_weights
=
[
self
.
get_slot
(
var
,
'average'
)
for
var
in
model
.
weights
]
self
.
_model_weights
=
model
.
weights
@
property
def
has_shadow_copy
(
self
):
"""Whether this optimizer has created shadow variables."""
return
self
.
_model_weights
is
not
None
def
_create_slots
(
self
,
var_list
):
self
.
_optimizer
.
_create_slots
(
var_list
=
var_list
)
# pylint: disable=protected-access
def
apply_gradients
(
self
,
grads_and_vars
,
name
:
Text
=
None
):
result
=
self
.
_optimizer
.
apply_gradients
(
grads_and_vars
,
name
)
self
.
update_average
(
self
.
iterations
)
return
result
@
tf
.
function
def
update_average
(
self
,
step
:
tf
.
Tensor
):
step
=
tf
.
cast
(
step
,
tf
.
float32
)
if
step
<
self
.
_start_step
:
decay
=
tf
.
constant
(
0.
,
tf
.
float32
)
elif
self
.
_dynamic_decay
:
decay
=
step
-
self
.
_start_step
decay
=
tf
.
minimum
(
self
.
_average_decay
,
(
1.
+
decay
)
/
(
10.
+
decay
))
else
:
decay
=
self
.
_average_decay
def
_apply_moving
(
v_moving
,
v_normal
):
diff
=
v_moving
-
v_normal
v_moving
.
assign_sub
(
tf
.
cast
(
1.
-
decay
,
v_moving
.
dtype
)
*
diff
)
return
v_moving
def
_update
(
strategy
,
v_moving_and_v_normal
):
for
v_moving
,
v_normal
in
v_moving_and_v_normal
:
strategy
.
extended
.
update
(
v_moving
,
_apply_moving
,
args
=
(
v_normal
,))
ctx
=
tf
.
distribute
.
get_replica_context
()
return
ctx
.
merge_call
(
_update
,
args
=
(
zip
(
self
.
_average_weights
,
self
.
_model_weights
),))
def
swap_weights
(
self
):
"""Swap the average and moving weights.
This is a convenience method to allow one to evaluate the averaged weights
at test time. Loads the weights stored in `self._average` into the model,
keeping a copy of the original model weights. Swapping twice will return
the original weights.
"""
if
tf
.
distribute
.
in_cross_replica_context
():
strategy
=
tf
.
distribute
.
get_strategy
()
strategy
.
run
(
self
.
_swap_weights
,
args
=
())
else
:
raise
ValueError
(
'Swapping weights must occur under a '
'tf.distribute.Strategy'
)
@
tf
.
function
def
_swap_weights
(
self
):
def
fn_0
(
a
,
b
):
a
.
assign_add
(
b
)
return
a
def
fn_1
(
b
,
a
):
b
.
assign
(
a
-
b
)
return
b
def
fn_2
(
a
,
b
):
a
.
assign_sub
(
b
)
return
a
def
swap
(
strategy
,
a_and_b
):
"""Swap `a` and `b` and mirror to all devices."""
for
a
,
b
in
a_and_b
:
strategy
.
extended
.
update
(
a
,
fn_0
,
args
=
(
b
,))
# a = a + b
strategy
.
extended
.
update
(
b
,
fn_1
,
args
=
(
a
,))
# b = a - b
strategy
.
extended
.
update
(
a
,
fn_2
,
args
=
(
b
,))
# a = a - b
ctx
=
tf
.
distribute
.
get_replica_context
()
return
ctx
.
merge_call
(
swap
,
args
=
(
zip
(
self
.
_average_weights
,
self
.
_model_weights
),))
def
assign_average_vars
(
self
,
var_list
:
List
[
tf
.
Variable
]):
"""Assign variables in var_list with their respective averages.
Args:
var_list: List of model variables to be assigned to their average.
Returns:
assign_op: The op corresponding to the assignment operation of
variables to their average.
"""
assign_op
=
tf
.
group
([
var
.
assign
(
self
.
get_slot
(
var
,
'average'
))
for
var
in
var_list
if
var
.
trainable
])
return
assign_op
def
_create_hypers
(
self
):
self
.
_optimizer
.
_create_hypers
()
# pylint: disable=protected-access
def
_prepare
(
self
,
var_list
):
return
self
.
_optimizer
.
_prepare
(
var_list
=
var_list
)
# pylint: disable=protected-access
@
property
def
iterations
(
self
):
return
self
.
_optimizer
.
iterations
@
iterations
.
setter
def
iterations
(
self
,
variable
):
self
.
_optimizer
.
iterations
=
variable
@
property
def
weights
(
self
):
# return self._weights + self._optimizer.weights
return
self
.
_optimizer
.
weights
def
variables
(
self
):
return
self
.
_weights
+
[
self
.
iterations
]
@
property
def
lr
(
self
):
return
self
.
_optimizer
.
_get_hyper
(
'learning_rate'
)
@
lr
.
setter
def
lr
(
self
,
lr
):
self
.
_optimizer
.
_set_hyper
(
'learning_rate'
,
lr
)
@
property
def
learning_rate
(
self
):
return
self
.
_optimizer
.
_get_hyper
(
'learning_rate'
)
@
learning_rate
.
setter
def
learning_rate
(
self
,
learning_rate
):
# pylint: disable=redefined-outer-name
self
.
_optimizer
.
_set_hyper
(
'learning_rate'
,
learning_rate
)
def
_resource_apply_dense
(
self
,
grad
,
var
):
return
self
.
_optimizer
.
_resource_apply_dense
(
grad
,
var
)
def
_resource_apply_sparse
(
self
,
grad
,
var
,
indices
):
return
self
.
_optimizer
.
_resource_apply_sparse
(
grad
,
var
,
indices
)
def
_resource_apply_sparse_duplicate_indices
(
self
,
grad
,
var
,
indices
):
return
self
.
_optimizer
.
_resource_apply_sparse_duplicate_indices
(
grad
,
var
,
indices
)
def
get_config
(
self
):
config
=
{
'optimizer'
:
tf
.
keras
.
optimizers
.
serialize
(
self
.
_optimizer
),
'average_decay'
:
self
.
_average_decay
,
'start_step'
:
self
.
_start_step
,
'dynamic_decay'
:
self
.
_dynamic_decay
,
}
base_config
=
super
(
ExponentialMovingAverage
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
optimizer
=
tf
.
keras
.
optimizers
.
deserialize
(
config
.
pop
(
'optimizer'
),
custom_objects
=
custom_objects
,
)
return
cls
(
optimizer
,
**
config
)
official/modeling/optimization/optimizer_factory.py
View file @
28cbb02d
...
@@ -16,10 +16,11 @@
...
@@ -16,10 +16,11 @@
"""Optimizer factory class."""
"""Optimizer factory class."""
from
typing
import
Union
from
typing
import
Union
import
tensorflow
as
tf
import
tensorflow
as
tf
import
tensorflow_addons.optimizers
as
tfa_optimizers
import
tensorflow_addons.optimizers
as
tfa_optimizers
from
official.modeling.optimization
import
ema_optimizer
from
official.modeling.optimization
import
lr_schedule
from
official.modeling.optimization
import
lr_schedule
from
official.modeling.optimization.configs
import
optimization_config
as
opt_cfg
from
official.modeling.optimization.configs
import
optimization_config
as
opt_cfg
from
official.nlp
import
optimization
as
nlp_optimization
from
official.nlp
import
optimization
as
nlp_optimization
...
@@ -89,7 +90,10 @@ class OptimizerFactory(object):
...
@@ -89,7 +90,10 @@ class OptimizerFactory(object):
self
.
_optimizer_config
=
config
.
optimizer
.
get
()
self
.
_optimizer_config
=
config
.
optimizer
.
get
()
self
.
_optimizer_type
=
config
.
optimizer
.
type
self
.
_optimizer_type
=
config
.
optimizer
.
type
if
self
.
_optimizer_type
is
None
:
self
.
_use_ema
=
config
.
ema
is
not
None
self
.
_ema_config
=
config
.
ema
if
self
.
_optimizer_config
is
None
:
raise
ValueError
(
'Optimizer type must be specified'
)
raise
ValueError
(
'Optimizer type must be specified'
)
self
.
_lr_config
=
config
.
learning_rate
.
get
()
self
.
_lr_config
=
config
.
learning_rate
.
get
()
...
@@ -143,4 +147,9 @@ class OptimizerFactory(object):
...
@@ -143,4 +147,9 @@ class OptimizerFactory(object):
optimizer_dict
[
'learning_rate'
]
=
lr
optimizer_dict
[
'learning_rate'
]
=
lr
optimizer
=
OPTIMIZERS_CLS
[
self
.
_optimizer_type
](
**
optimizer_dict
)
optimizer
=
OPTIMIZERS_CLS
[
self
.
_optimizer_type
](
**
optimizer_dict
)
if
self
.
_use_ema
:
optimizer
=
ema_optimizer
.
ExponentialMovingAverage
(
optimizer
,
**
self
.
_ema_config
.
as_dict
())
return
optimizer
return
optimizer
official/vision/image_classification/callbacks.py
View file @
28cbb02d
...
@@ -25,8 +25,8 @@ from typing import Any, List, MutableMapping, Text
...
@@ -25,8 +25,8 @@ from typing import Any, List, MutableMapping, Text
from
absl
import
logging
from
absl
import
logging
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.modeling
import
optimization
from
official.utils.misc
import
keras_utils
from
official.utils.misc
import
keras_utils
from
official.vision.image_classification
import
optimizer_factory
def
get_callbacks
(
model_checkpoint
:
bool
=
True
,
def
get_callbacks
(
model_checkpoint
:
bool
=
True
,
...
@@ -165,7 +165,7 @@ class CustomTensorBoard(tf.keras.callbacks.TensorBoard):
...
@@ -165,7 +165,7 @@ class CustomTensorBoard(tf.keras.callbacks.TensorBoard):
class
MovingAverageCallback
(
tf
.
keras
.
callbacks
.
Callback
):
class
MovingAverageCallback
(
tf
.
keras
.
callbacks
.
Callback
):
"""A Callback to be used with a `MovingAverage` optimizer.
"""A Callback to be used with a `
Exponential
MovingAverage` optimizer.
Applies moving average weights to the model during validation time to test
Applies moving average weights to the model during validation time to test
and predict on the averaged weights rather than the current model weights.
and predict on the averaged weights rather than the current model weights.
...
@@ -184,7 +184,8 @@ class MovingAverageCallback(tf.keras.callbacks.Callback):
...
@@ -184,7 +184,8 @@ class MovingAverageCallback(tf.keras.callbacks.Callback):
def
set_model
(
self
,
model
:
tf
.
keras
.
Model
):
def
set_model
(
self
,
model
:
tf
.
keras
.
Model
):
super
(
MovingAverageCallback
,
self
).
set_model
(
model
)
super
(
MovingAverageCallback
,
self
).
set_model
(
model
)
assert
isinstance
(
self
.
model
.
optimizer
,
optimizer_factory
.
MovingAverage
)
assert
isinstance
(
self
.
model
.
optimizer
,
optimization
.
ExponentialMovingAverage
)
self
.
model
.
optimizer
.
shadow_copy
(
self
.
model
)
self
.
model
.
optimizer
.
shadow_copy
(
self
.
model
)
def
on_test_begin
(
self
,
logs
:
MutableMapping
[
Text
,
Any
]
=
None
):
def
on_test_begin
(
self
,
logs
:
MutableMapping
[
Text
,
Any
]
=
None
):
...
@@ -225,13 +226,14 @@ class AverageModelCheckpoint(tf.keras.callbacks.ModelCheckpoint):
...
@@ -225,13 +226,14 @@ class AverageModelCheckpoint(tf.keras.callbacks.ModelCheckpoint):
save_weights_only
,
mode
,
save_freq
,
**
kwargs
)
save_weights_only
,
mode
,
save_freq
,
**
kwargs
)
def
set_model
(
self
,
model
):
def
set_model
(
self
,
model
):
if
not
isinstance
(
model
.
optimizer
,
optimiz
er_factory
.
MovingAverage
):
if
not
isinstance
(
model
.
optimizer
,
optimiz
ation
.
Exponential
MovingAverage
):
raise
TypeError
(
'AverageModelCheckpoint is only used when training'
raise
TypeError
(
'AverageModelCheckpoint is only used when training'
'with MovingAverage'
)
'with MovingAverage'
)
return
super
().
set_model
(
model
)
return
super
().
set_model
(
model
)
def
_save_model
(
self
,
epoch
,
logs
):
def
_save_model
(
self
,
epoch
,
logs
):
assert
isinstance
(
self
.
model
.
optimizer
,
optimizer_factory
.
MovingAverage
)
assert
isinstance
(
self
.
model
.
optimizer
,
optimization
.
ExponentialMovingAverage
)
if
self
.
update_weights
:
if
self
.
update_weights
:
self
.
model
.
optimizer
.
assign_average_vars
(
self
.
model
.
variables
)
self
.
model
.
optimizer
.
assign_average_vars
(
self
.
model
.
variables
)
...
...
official/vision/image_classification/optimizer_factory.py
View file @
28cbb02d
...
@@ -18,241 +18,19 @@ from __future__ import division
...
@@ -18,241 +18,19 @@ from __future__ import division
# from __future__ import google_type_annotations
# from __future__ import google_type_annotations
from
__future__
import
print_function
from
__future__
import
print_function
from
typing
import
Any
,
Dict
,
Text
,
List
from
typing
import
Any
,
Dict
,
Text
from
absl
import
logging
from
absl
import
logging
import
tensorflow
as
tf
import
tensorflow
as
tf
import
tensorflow_addons
as
tfa
import
tensorflow_addons
as
tfa
from
official.modeling
import
optimization
from
official.vision.image_classification
import
learning_rate
from
official.vision.image_classification
import
learning_rate
from
official.vision.image_classification.configs
import
base_configs
from
official.vision.image_classification.configs
import
base_configs
# pylint: disable=protected-access
# pylint: disable=protected-access
class
MovingAverage
(
tf
.
keras
.
optimizers
.
Optimizer
):
"""Optimizer that computes a moving average of the variables.
Empirically it has been found that using the moving average of the trained
parameters of a deep network is better than using its trained parameters
directly. This optimizer allows you to compute this moving average and swap
the variables at save time so that any code outside of the training loop
will use by default the average values instead of the original ones.
Example of usage for training:
```python
opt = tf.keras.optimizers.SGD(learning_rate)
opt = MovingAverage(opt)
opt.shadow_copy(model)
```
At test time, swap the shadow variables to evaluate on the averaged weights:
```python
opt.swap_weights()
# Test eval the model here
opt.swap_weights()
```
"""
def
__init__
(
self
,
optimizer
:
tf
.
keras
.
optimizers
.
Optimizer
,
average_decay
:
float
=
0.99
,
start_step
:
int
=
0
,
dynamic_decay
:
bool
=
True
,
name
:
Text
=
'moving_average'
,
**
kwargs
):
"""Construct a new MovingAverage optimizer.
Args:
optimizer: `tf.keras.optimizers.Optimizer` that will be used to compute
and apply gradients.
average_decay: float. Decay to use to maintain the moving averages of
trained variables.
start_step: int. What step to start the moving average.
dynamic_decay: bool. Whether to change the decay based on the number of
optimizer updates. Decay will start at 0.1 and gradually increase up to
`average_decay` after each optimizer update. This behavior is similar to
`tf.train.ExponentialMovingAverage` in TF 1.x.
name: Optional name for the operations created when applying gradients.
Defaults to "moving_average".
**kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`, `lr`,
`decay`}.
"""
super
(
MovingAverage
,
self
).
__init__
(
name
,
**
kwargs
)
self
.
_optimizer
=
optimizer
self
.
_average_decay
=
average_decay
self
.
_start_step
=
tf
.
constant
(
start_step
,
tf
.
float32
)
self
.
_dynamic_decay
=
dynamic_decay
def
shadow_copy
(
self
,
model
:
tf
.
keras
.
Model
):
"""Creates shadow variables for the given model weights."""
for
var
in
model
.
weights
:
self
.
add_slot
(
var
,
'average'
,
initializer
=
'zeros'
)
self
.
_average_weights
=
[
self
.
get_slot
(
var
,
'average'
)
for
var
in
model
.
weights
]
self
.
_model_weights
=
model
.
weights
@
property
def
has_shadow_copy
(
self
):
"""Whether this optimizer has created shadow variables."""
return
self
.
_model_weights
is
not
None
def
_create_slots
(
self
,
var_list
):
self
.
_optimizer
.
_create_slots
(
var_list
=
var_list
)
# pylint: disable=protected-access
def
apply_gradients
(
self
,
grads_and_vars
,
name
:
Text
=
None
):
result
=
self
.
_optimizer
.
apply_gradients
(
grads_and_vars
,
name
)
self
.
update_average
(
self
.
_optimizer
.
iterations
)
return
result
@
tf
.
function
def
update_average
(
self
,
step
:
tf
.
Tensor
):
step
=
tf
.
cast
(
step
,
tf
.
float32
)
if
step
<
self
.
_start_step
:
decay
=
tf
.
constant
(
0.
,
tf
.
float32
)
elif
self
.
_dynamic_decay
:
decay
=
step
-
self
.
_start_step
decay
=
tf
.
minimum
(
self
.
_average_decay
,
(
1.
+
decay
)
/
(
10.
+
decay
))
else
:
decay
=
self
.
_average_decay
def
_apply_moving
(
v_moving
,
v_normal
):
diff
=
v_moving
-
v_normal
v_moving
.
assign_sub
(
tf
.
cast
(
1.
-
decay
,
v_moving
.
dtype
)
*
diff
)
return
v_moving
def
_update
(
strategy
,
v_moving_and_v_normal
):
for
v_moving
,
v_normal
in
v_moving_and_v_normal
:
strategy
.
extended
.
update
(
v_moving
,
_apply_moving
,
args
=
(
v_normal
,))
ctx
=
tf
.
distribute
.
get_replica_context
()
return
ctx
.
merge_call
(
_update
,
args
=
(
zip
(
self
.
_average_weights
,
self
.
_model_weights
),))
def
swap_weights
(
self
):
"""Swap the average and moving weights.
This is a convenience method to allow one to evaluate the averaged weights
at test time. Loads the weights stored in `self._average` into the model,
keeping a copy of the original model weights. Swapping twice will return
the original weights.
"""
if
tf
.
distribute
.
in_cross_replica_context
():
strategy
=
tf
.
distribute
.
get_strategy
()
strategy
.
run
(
self
.
_swap_weights
,
args
=
())
else
:
raise
ValueError
(
'Swapping weights must occur under a '
'tf.distribute.Strategy'
)
@
tf
.
function
def
_swap_weights
(
self
):
def
fn_0
(
a
,
b
):
a
.
assign_add
(
b
)
return
a
def
fn_1
(
b
,
a
):
b
.
assign
(
a
-
b
)
return
b
def
fn_2
(
a
,
b
):
a
.
assign_sub
(
b
)
return
a
def
swap
(
strategy
,
a_and_b
):
"""Swap `a` and `b` and mirror to all devices."""
for
a
,
b
in
a_and_b
:
strategy
.
extended
.
update
(
a
,
fn_0
,
args
=
(
b
,))
# a = a + b
strategy
.
extended
.
update
(
b
,
fn_1
,
args
=
(
a
,))
# b = a - b
strategy
.
extended
.
update
(
a
,
fn_2
,
args
=
(
b
,))
# a = a - b
ctx
=
tf
.
distribute
.
get_replica_context
()
return
ctx
.
merge_call
(
swap
,
args
=
(
zip
(
self
.
_average_weights
,
self
.
_model_weights
),))
def
assign_average_vars
(
self
,
var_list
:
List
[
tf
.
Variable
]):
"""Assign variables in var_list with their respective averages.
Args:
var_list: List of model variables to be assigned to their average.
Returns:
assign_op: The op corresponding to the assignment operation of
variables to their average.
"""
assign_op
=
tf
.
group
([
var
.
assign
(
self
.
get_slot
(
var
,
'average'
))
for
var
in
var_list
if
var
.
trainable
])
return
assign_op
def
_create_hypers
(
self
):
self
.
_optimizer
.
_create_hypers
()
# pylint: disable=protected-access
def
_prepare
(
self
,
var_list
):
return
self
.
_optimizer
.
_prepare
(
var_list
=
var_list
)
# pylint: disable=protected-access
@
property
def
iterations
(
self
):
return
self
.
_optimizer
.
iterations
@
iterations
.
setter
def
iterations
(
self
,
variable
):
self
.
_optimizer
.
iterations
=
variable
@
property
def
weights
(
self
):
# return self._weights + self._optimizer.weights
return
self
.
_optimizer
.
weights
@
property
def
lr
(
self
):
return
self
.
_optimizer
.
_get_hyper
(
'learning_rate'
)
@
lr
.
setter
def
lr
(
self
,
lr
):
self
.
_optimizer
.
_set_hyper
(
'learning_rate'
,
lr
)
@
property
def
learning_rate
(
self
):
return
self
.
_optimizer
.
_get_hyper
(
'learning_rate'
)
@
learning_rate
.
setter
def
learning_rate
(
self
,
learning_rate
):
# pylint: disable=redefined-outer-name
self
.
_optimizer
.
_set_hyper
(
'learning_rate'
,
learning_rate
)
def
_resource_apply_dense
(
self
,
grad
,
var
):
return
self
.
_optimizer
.
_resource_apply_dense
(
grad
,
var
)
def
_resource_apply_sparse
(
self
,
grad
,
var
,
indices
):
return
self
.
_optimizer
.
_resource_apply_sparse
(
grad
,
var
,
indices
)
def
_resource_apply_sparse_duplicate_indices
(
self
,
grad
,
var
,
indices
):
return
self
.
_optimizer
.
_resource_apply_sparse_duplicate_indices
(
grad
,
var
,
indices
)
def
get_config
(
self
):
config
=
{
'optimizer'
:
tf
.
keras
.
optimizers
.
serialize
(
self
.
_optimizer
),
'average_decay'
:
self
.
_average_decay
,
'start_step'
:
self
.
_start_step
,
'dynamic_decay'
:
self
.
_dynamic_decay
,
}
base_config
=
super
(
MovingAverage
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
optimizer
=
tf
.
keras
.
optimizers
.
deserialize
(
config
.
pop
(
'optimizer'
),
custom_objects
=
custom_objects
,
)
return
cls
(
optimizer
,
**
config
)
def
build_optimizer
(
def
build_optimizer
(
optimizer_name
:
Text
,
optimizer_name
:
Text
,
base_learning_rate
:
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
,
base_learning_rate
:
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
,
...
@@ -269,7 +47,7 @@ def build_optimizer(
...
@@ -269,7 +47,7 @@ def build_optimizer(
should contain optimizer specific parameters such as `base_learning_rate`,
should contain optimizer specific parameters such as `base_learning_rate`,
`decay`, etc.
`decay`, etc.
model: The `tf.keras.Model`. This is used for the shadow copy if using
model: The `tf.keras.Model`. This is used for the shadow copy if using
`MovingAverage`.
`
Exponential
MovingAverage`.
Returns:
Returns:
A tf.keras.Optimizer.
A tf.keras.Optimizer.
...
@@ -336,9 +114,10 @@ def build_optimizer(
...
@@ -336,9 +114,10 @@ def build_optimizer(
moving_average_decay
=
params
.
get
(
'moving_average_decay'
,
0.
)
moving_average_decay
=
params
.
get
(
'moving_average_decay'
,
0.
)
if
moving_average_decay
is
not
None
and
moving_average_decay
>
0.
:
if
moving_average_decay
is
not
None
and
moving_average_decay
>
0.
:
if
model
is
None
:
if
model
is
None
:
raise
ValueError
(
'`model` must be provided if using `MovingAverage`.'
)
raise
ValueError
(
'`model` must be provided if using `ExponentialMovingAverage`.'
)
logging
.
info
(
'Including moving average decay.'
)
logging
.
info
(
'Including moving average decay.'
)
optimizer
=
MovingAverage
(
optimizer
=
optimization
.
Exponential
MovingAverage
(
optimizer
=
optimizer
,
average_decay
=
moving_average_decay
)
optimizer
=
optimizer
,
average_decay
=
moving_average_decay
)
optimizer
.
shadow_copy
(
model
)
optimizer
.
shadow_copy
(
model
)
return
optimizer
return
optimizer
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment