Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
32e4ca51
Commit
32e4ca51
authored
Nov 28, 2023
by
qianyj
Browse files
Update code to v2.11.0
parents
9485aa1d
71060f67
Changes
772
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
158 additions
and
47 deletions
+158
-47
official/modeling/multitask/configs.py
official/modeling/multitask/configs.py
+4
-1
official/modeling/multitask/evaluator.py
official/modeling/multitask/evaluator.py
+1
-1
official/modeling/multitask/evaluator_test.py
official/modeling/multitask/evaluator_test.py
+1
-1
official/modeling/multitask/interleaving_trainer.py
official/modeling/multitask/interleaving_trainer.py
+11
-2
official/modeling/multitask/interleaving_trainer_test.py
official/modeling/multitask/interleaving_trainer_test.py
+1
-1
official/modeling/multitask/multitask.py
official/modeling/multitask/multitask.py
+11
-7
official/modeling/multitask/task_sampler.py
official/modeling/multitask/task_sampler.py
+1
-1
official/modeling/multitask/task_sampler_test.py
official/modeling/multitask/task_sampler_test.py
+1
-1
official/modeling/multitask/test_utils.py
official/modeling/multitask/test_utils.py
+5
-1
official/modeling/multitask/train_lib.py
official/modeling/multitask/train_lib.py
+25
-15
official/modeling/multitask/train_lib_test.py
official/modeling/multitask/train_lib_test.py
+4
-2
official/modeling/optimization/__init__.py
official/modeling/optimization/__init__.py
+1
-1
official/modeling/optimization/adafactor_optimizer.py
official/modeling/optimization/adafactor_optimizer.py
+1
-1
official/modeling/optimization/configs/__init__.py
official/modeling/optimization/configs/__init__.py
+1
-1
official/modeling/optimization/configs/learning_rate_config.py
...ial/modeling/optimization/configs/learning_rate_config.py
+4
-4
official/modeling/optimization/configs/optimization_config.py
...cial/modeling/optimization/configs/optimization_config.py
+7
-1
official/modeling/optimization/configs/optimization_config_test.py
...modeling/optimization/configs/optimization_config_test.py
+1
-1
official/modeling/optimization/configs/optimizer_config.py
official/modeling/optimization/configs/optimizer_config.py
+74
-1
official/modeling/optimization/ema_optimizer.py
official/modeling/optimization/ema_optimizer.py
+2
-2
official/modeling/optimization/lars_optimizer.py
official/modeling/optimization/lars_optimizer.py
+2
-2
No files found.
Too many changes to show.
To preserve performance only
772 of 772+
files are displayed.
Plain diff
Email patch
official/modeling/multitask/configs.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -19,6 +19,7 @@ import dataclasses
from
official.core
import
config_definitions
as
cfg
from
official.modeling
import
hyperparams
from
official.modeling.privacy
import
configs
as
dp_configs
@
dataclasses
.
dataclass
...
...
@@ -35,6 +36,8 @@ class MultiTaskConfig(hyperparams.Config):
init_checkpoint
:
str
=
""
model
:
hyperparams
.
Config
=
None
task_routines
:
Tuple
[
TaskRoutine
,
...]
=
()
differential_privacy_config
:
Optional
[
dp_configs
.
DifferentialPrivacyConfig
]
=
None
@
dataclasses
.
dataclass
...
...
official/modeling/multitask/evaluator.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/modeling/multitask/evaluator_test.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/modeling/multitask/interleaving_trainer.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -31,7 +31,9 @@ class MultiTaskInterleavingTrainer(base_trainer.MultiTaskBaseTrainer):
multi_task
:
multitask
.
MultiTask
,
multi_task_model
:
Union
[
tf
.
keras
.
Model
,
base_model
.
MultiTaskBaseModel
],
optimizer
:
tf
.
optimizers
.
Optimizer
,
optimizer
:
Union
[
tf
.
optimizers
.
Optimizer
,
tf
.
keras
.
optimizers
.
experimental
.
Optimizer
,
tf
.
keras
.
optimizers
.
legacy
.
Optimizer
],
task_sampler
:
sampler
.
TaskSampler
,
trainer_options
=
None
):
super
().
__init__
(
...
...
@@ -69,6 +71,13 @@ class MultiTaskInterleavingTrainer(base_trainer.MultiTaskBaseTrainer):
name
:
orbit
.
utils
.
create_global_step
()
for
name
in
self
.
multi_task
.
tasks
}
# If the new Keras optimizer is used, we require all model variables are
# created before the training and let the optimizer to create the slot
# variable all together.
if
isinstance
(
optimizer
,
tf
.
keras
.
optimizers
.
experimental
.
Optimizer
):
multi_task_model
.
build
()
optimizer
.
build
(
multi_task_model
.
trainable_variables
)
def
task_step_counter
(
self
,
name
):
return
self
.
_task_step_counters
[
name
]
...
...
official/modeling/multitask/interleaving_trainer_test.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/modeling/multitask/multitask.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -23,9 +23,11 @@ from official.core import task_factory
from
official.modeling
import
optimization
from
official.modeling.multitask
import
base_model
from
official.modeling.multitask
import
configs
from
official.modeling.privacy
import
configs
as
dp_configs
OptimizationConfig
=
optimization
.
OptimizationConfig
RuntimeConfig
=
config_definitions
.
RuntimeConfig
DifferentialPrivacyConfig
=
dp_configs
.
DifferentialPrivacyConfig
class
MultiTask
(
tf
.
Module
,
metaclass
=
abc
.
ABCMeta
):
...
...
@@ -93,9 +95,11 @@ class MultiTask(tf.Module, metaclass=abc.ABCMeta):
@
classmethod
def
create_optimizer
(
cls
,
optimizer_config
:
OptimizationConfig
,
runtime_config
:
Optional
[
RuntimeConfig
]
=
None
):
runtime_config
:
Optional
[
RuntimeConfig
]
=
None
,
dp_config
:
Optional
[
DifferentialPrivacyConfig
]
=
None
):
return
base_task
.
Task
.
create_optimizer
(
optimizer_config
=
optimizer_config
,
runtime_config
=
runtime_config
)
optimizer_config
=
optimizer_config
,
runtime_config
=
runtime_config
,
dp_config
=
dp_config
)
def
joint_train_step
(
self
,
task_inputs
,
multi_task_model
:
base_model
.
MultiTaskBaseModel
,
...
...
@@ -134,10 +138,10 @@ class MultiTask(tf.Module, metaclass=abc.ABCMeta):
self
.
tasks
[
name
].
process_metrics
(
task_metrics
[
name
],
labels
,
outputs
,
**
kwargs
)
# Scales loss as the default gradients allreduce performs sum inside
# the optimizer.
scaled_loss
=
total_loss
/
tf
.
distribute
.
get_strategy
(
).
num_replicas_in_sync
# Scales loss as the default gradients allreduce performs sum inside
# the optimizer.
scaled_loss
=
total_loss
/
tf
.
distribute
.
get_strategy
(
).
num_replicas_in_sync
tvars
=
multi_task_model
.
trainable_variables
grads
=
tape
.
gradient
(
scaled_loss
,
tvars
)
optimizer
.
apply_gradients
(
list
(
zip
(
grads
,
tvars
)))
...
...
official/modeling/multitask/task_sampler.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/modeling/multitask/task_sampler_test.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/modeling/multitask/test_utils.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -28,6 +28,8 @@ class MockFooModel(tf.keras.Model):
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
_share_layer
=
shared_layer
self
.
_foo_specific_layer
=
tf
.
keras
.
layers
.
Dense
(
1
)
self
.
inputs
=
{
"foo"
:
tf
.
keras
.
Input
(
shape
=
(
2
,),
dtype
=
tf
.
float32
),
"bar"
:
tf
.
keras
.
Input
(
shape
=
(
2
,),
dtype
=
tf
.
float32
)}
def
call
(
self
,
inputs
):
self
.
add_loss
(
tf
.
zeros
((
1
,),
dtype
=
tf
.
float32
))
...
...
@@ -39,11 +41,13 @@ class MockFooModel(tf.keras.Model):
class
MockBarModel
(
tf
.
keras
.
Model
):
"""A mock model can only consume 'bar' inputs."""
def
__init__
(
self
,
shared_layer
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
_share_layer
=
shared_layer
self
.
_bar_specific_layer
=
tf
.
keras
.
layers
.
Dense
(
1
)
self
.
inputs
=
{
"bar"
:
tf
.
keras
.
Input
(
shape
=
(
2
,),
dtype
=
tf
.
float32
)}
def
call
(
self
,
inputs
):
self
.
add_loss
(
tf
.
zeros
((
2
,),
dtype
=
tf
.
float32
))
...
...
official/modeling/multitask/train_lib.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -15,7 +15,7 @@
"""Multitask training driver library."""
# pytype: disable=attribute-error
import
os
from
typing
import
Any
,
List
,
Optional
,
Tuple
from
typing
import
Any
,
List
,
Mapping
,
Optional
,
Tuple
,
Union
from
absl
import
logging
import
orbit
import
tensorflow
as
tf
...
...
@@ -44,8 +44,12 @@ def run_experiment(
mode
:
str
,
params
:
configs
.
MultiTaskExperimentConfig
,
model_dir
:
str
,
trainer
:
base_trainer
.
MultiTaskBaseTrainer
=
None
)
->
base_model
.
MultiTaskBaseModel
:
run_post_eval
:
bool
=
False
,
trainer
:
base_trainer
.
MultiTaskBaseTrainer
=
None
,
best_ckpt_exporter_creator
:
Optional
[
Any
]
=
train_utils
.
maybe_create_best_ckpt_exporter
)
->
Union
[
base_model
.
MultiTaskBaseModel
,
Tuple
[
base_model
.
MultiTaskBaseModel
,
Mapping
[
Any
,
Any
]]]:
"""Runs train/eval configured by the experiment params.
Args:
...
...
@@ -56,8 +60,11 @@ def run_experiment(
or 'continuous_eval'.
params: ExperimentConfig instance.
model_dir: A 'str', a path to store model checkpoints and summaries.
run_post_eval: Whether to run post eval once after training, metrics logs
are returned.
trainer: (optional) A multi-task trainer to use. If none is provided, a
default one will be created based on `params`.
best_ckpt_exporter_creator: A functor for creating best checkpoint exporter.
Returns:
model: `base_model.MultiTaskBaseModel` instance.
...
...
@@ -66,8 +73,7 @@ def run_experiment(
is_training
=
'train'
in
mode
is_eval
=
'eval'
in
mode
with
distribution_strategy
.
scope
():
optimizer
=
task
.
create_optimizer
(
params
.
trainer
.
optimizer_config
,
params
.
runtime
)
optimizer
=
train_utils
.
create_optimizer
(
task
,
params
)
kwargs
=
dict
(
multi_task
=
task
,
multi_task_model
=
model
,
optimizer
=
optimizer
)
if
params
.
trainer
.
trainer_type
==
'interleaving'
:
sampler
=
task_sampler
.
get_task_sampler
(
params
.
trainer
.
task_sampler
,
...
...
@@ -83,8 +89,7 @@ def run_experiment(
model
=
model
,
eval_steps
=
eval_steps
,
global_step
=
trainer
.
global_step
if
is_training
else
None
,
checkpoint_exporter
=
train_utils
.
maybe_create_best_ckpt_exporter
(
params
,
model_dir
))
checkpoint_exporter
=
best_ckpt_exporter_creator
(
params
,
model_dir
))
else
:
evaluator
=
None
...
...
@@ -95,7 +100,6 @@ def run_experiment(
checkpoint
=
evaluator
.
checkpoint
global_step
=
evaluator
.
global_step
# TODO(hongkuny,haozhangthu): Revisit initialization method.
checkpoint_manager
=
tf
.
train
.
CheckpointManager
(
checkpoint
,
directory
=
model_dir
,
...
...
@@ -140,7 +144,11 @@ def run_experiment(
else
:
raise
NotImplementedError
(
'The mode is not implemented: %s'
%
mode
)
return
model
if
run_post_eval
:
return
model
,
evaluator
.
evaluate
(
tf
.
convert_to_tensor
(
params
.
trainer
.
validation_steps
))
# pytype: disable=bad-return-type # typed-keras
else
:
return
model
def
run_experiment_with_multitask_eval
(
...
...
@@ -153,7 +161,10 @@ def run_experiment_with_multitask_eval(
model_dir
:
str
,
run_post_eval
:
bool
=
False
,
save_summary
:
bool
=
True
,
trainer
:
Optional
[
core_lib
.
Trainer
]
=
None
)
->
Tuple
[
Any
,
Any
]:
trainer
:
Optional
[
core_lib
.
Trainer
]
=
None
,
best_ckpt_exporter_creator
:
Optional
[
Any
]
=
train_utils
.
maybe_create_best_ckpt_exporter
,
)
->
Tuple
[
Any
,
Any
]:
"""Runs train/eval configured by the experiment params.
Args:
...
...
@@ -170,6 +181,7 @@ def run_experiment_with_multitask_eval(
trainer: the core_lib.Trainer instance. It should be created within the
strategy.scope(). If not provided, an instance will be created by default
if `mode` contains 'train'.
best_ckpt_exporter_creator: A functor for creating best checkpoint exporter.
Returns:
model: `tf.keras.Model` instance.
...
...
@@ -183,8 +195,7 @@ def run_experiment_with_multitask_eval(
config
=
params
,
task
=
train_task
,
model
=
train_task
.
build_model
(),
optimizer
=
train_task
.
create_optimizer
(
params
.
trainer
.
optimizer_config
,
params
.
runtime
),
optimizer
=
train_utils
.
create_optimizer
(
train_task
,
params
),
train
=
True
,
evaluate
=
False
)
else
:
...
...
@@ -200,8 +211,7 @@ def run_experiment_with_multitask_eval(
model
=
model
,
global_step
=
trainer
.
global_step
if
is_training
else
None
,
eval_steps
=
eval_steps
,
checkpoint_exporter
=
train_utils
.
maybe_create_best_ckpt_exporter
(
params
,
model_dir
))
checkpoint_exporter
=
best_ckpt_exporter_creator
(
params
,
model_dir
))
else
:
evaluator
=
None
...
...
official/modeling/multitask/train_lib_test.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -58,8 +58,9 @@ class TrainLibTest(tf.test.TestCase, parameterized.TestCase):
strategy_combinations
.
one_device_strategy_gpu
,
],
mode
=
'eager'
,
optimizer
=
[
'sgd_experimental'
,
'sgd'
],
flag_mode
=
[
'train'
,
'eval'
,
'train_and_eval'
]))
def
test_end_to_end
(
self
,
distribution_strategy
,
flag_mode
):
def
test_end_to_end
(
self
,
distribution_strategy
,
optimizer
,
flag_mode
):
model_dir
=
self
.
get_temp_dir
()
experiment_config
=
configs
.
MultiTaskExperimentConfig
(
task
=
configs
.
MultiTaskConfig
(
...
...
@@ -70,6 +71,7 @@ class TrainLibTest(tf.test.TestCase, parameterized.TestCase):
task_name
=
'bar'
,
task_config
=
test_utils
.
BarConfig
()))))
experiment_config
=
params_dict
.
override_params_dict
(
experiment_config
,
self
.
_test_config
,
is_strict
=
False
)
experiment_config
.
trainer
.
optimizer_config
.
optimizer
.
type
=
optimizer
with
distribution_strategy
.
scope
():
test_multitask
=
multitask
.
MultiTask
.
from_config
(
experiment_config
.
task
)
model
=
test_utils
.
MockMultiTaskModel
()
...
...
official/modeling/optimization/__init__.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/modeling/optimization/adafactor_optimizer.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/modeling/optimization/configs/__init__.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/modeling/optimization/configs/learning_rate_config.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -216,14 +216,14 @@ class StepCosineLrConfig(base_config.Config):
"""Configuration for stepwise learning rate decay.
This class is a container for the piecewise cosine learning rate scheduling
configs. It will configure an instance of StepCo
n
sineDecayWithOffset keras
configs. It will configure an instance of StepCosineDecayWithOffset keras
learning rate schedule.
```python
boundaries: [100000, 110000]
values: [1.0, 0.5]
lr_decayed_fn = (
lr_schedule.StepCo
n
sineDecayWithOffset(
lr_schedule.StepCosineDecayWithOffset(
boundaries,
values))
```
...
...
@@ -243,7 +243,7 @@ class StepCosineLrConfig(base_config.Config):
[boundaries[n], end] -> values[n+1] to 0.
offset: An int. The offset applied to steps. Defaults to 0.
"""
name
:
str
=
'StepCo
n
sineDecayWithOffset'
name
:
str
=
'StepCosineDecayWithOffset'
boundaries
:
Optional
[
List
[
int
]]
=
None
values
:
Optional
[
List
[
float
]]
=
None
offset
:
int
=
0
...
...
official/modeling/optimization/configs/optimization_config.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -45,8 +45,14 @@ class OptimizerConfig(oneof.OneOfConfig):
"""
type
:
Optional
[
str
]
=
None
sgd
:
opt_cfg
.
SGDConfig
=
opt_cfg
.
SGDConfig
()
sgd_experimental
:
opt_cfg
.
SGDExperimentalConfig
=
(
opt_cfg
.
SGDExperimentalConfig
())
adam
:
opt_cfg
.
AdamConfig
=
opt_cfg
.
AdamConfig
()
adam_experimental
:
opt_cfg
.
AdamExperimentalConfig
=
(
opt_cfg
.
AdamExperimentalConfig
())
adamw
:
opt_cfg
.
AdamWeightDecayConfig
=
opt_cfg
.
AdamWeightDecayConfig
()
adamw_experimental
:
opt_cfg
.
AdamWeightDecayExperimentalConfig
=
(
opt_cfg
.
AdamWeightDecayExperimentalConfig
())
lamb
:
opt_cfg
.
LAMBConfig
=
opt_cfg
.
LAMBConfig
()
rmsprop
:
opt_cfg
.
RMSPropConfig
=
opt_cfg
.
RMSPropConfig
()
lars
:
opt_cfg
.
LARSConfig
=
opt_cfg
.
LARSConfig
()
...
...
official/modeling/optimization/configs/optimization_config_test.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/modeling/optimization/configs/optimizer_config.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -54,6 +54,27 @@ class SGDConfig(BaseOptimizerConfig):
momentum
:
float
=
0.0
# TODO(b/216129465): Merge this config with SGDConfig after the experimental
# optimizer graduates.
@
dataclasses
.
dataclass
class
SGDExperimentalConfig
(
BaseOptimizerConfig
):
"""Configuration for SGD optimizer.
The attributes for this class matches the arguments of
`tf.keras.optimizer.experimental.SGD`.
Attributes:
name: name of the optimizer.
nesterov: nesterov for SGD optimizer.
momentum: momentum for SGD optimizer.
jit_compile: if True, jit compile will be used.
"""
name
:
str
=
"SGD"
nesterov
:
bool
=
False
momentum
:
float
=
0.0
jit_compile
:
bool
=
False
@
dataclasses
.
dataclass
class
RMSPropConfig
(
BaseOptimizerConfig
):
"""Configuration for RMSProp optimizer.
...
...
@@ -115,6 +136,30 @@ class AdamConfig(BaseOptimizerConfig):
amsgrad
:
bool
=
False
@
dataclasses
.
dataclass
class
AdamExperimentalConfig
(
BaseOptimizerConfig
):
"""Configuration for experimental Adam optimizer.
The attributes for this class matches the arguments of
`tf.keras.optimizer.experimental.Adam`.
Attributes:
name: name of the optimizer.
beta_1: decay rate for 1st order moments.
beta_2: decay rate for 2st order moments.
epsilon: epsilon value used for numerical stability in Adam optimizer.
amsgrad: boolean. Whether to apply AMSGrad variant of this algorithm from
the paper "On the Convergence of Adam and beyond".
jit_compile: if True, jit compile will be used.
"""
name
:
str
=
"Adam"
beta_1
:
float
=
0.9
beta_2
:
float
=
0.999
epsilon
:
float
=
1e-07
amsgrad
:
bool
=
False
jit_compile
:
bool
=
False
@
dataclasses
.
dataclass
class
AdamWeightDecayConfig
(
BaseOptimizerConfig
):
"""Configuration for Adam optimizer with weight decay.
...
...
@@ -145,6 +190,32 @@ class AdamWeightDecayConfig(BaseOptimizerConfig):
gradient_clip_norm
:
float
=
1.0
@
dataclasses
.
dataclass
class
AdamWeightDecayExperimentalConfig
(
BaseOptimizerConfig
):
"""Configuration for Adam optimizer with weight decay.
Attributes:
name: name of the optimizer.
beta_1: decay rate for 1st order moments.
beta_2: decay rate for 2st order moments.
epsilon: epsilon value used for numerical stability in the optimizer.
amsgrad: boolean. Whether to apply AMSGrad variant of this algorithm from
the paper "On the Convergence of Adam and beyond".
weight_decay: float. Weight decay rate. Default to 0.
global_clipnorm: A positive float. Clips the gradients to this maximum
L2-norm. Default to 1.0.
jit_compile: if True, jit compile will be used.
"""
name
:
str
=
"AdamWeightDecayExperimental"
beta_1
:
float
=
0.9
beta_2
:
float
=
0.999
epsilon
:
float
=
1e-07
amsgrad
:
bool
=
False
weight_decay
:
float
=
0.0
global_clipnorm
:
float
=
1.0
jit_compile
:
bool
=
False
@
dataclasses
.
dataclass
class
LAMBConfig
(
BaseOptimizerConfig
):
"""Configuration for LAMB optimizer.
...
...
@@ -266,3 +337,5 @@ class AdafactorConfig(BaseOptimizerConfig):
min_dim_size_to_factor
:
int
=
128
epsilon1
:
float
=
1e-30
epsilon2
:
float
=
1e-3
weight_decay
:
Optional
[
float
]
=
None
include_in_weight_decay
:
Optional
[
str
]
=
None
official/modeling/optimization/ema_optimizer.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -21,7 +21,7 @@ import tensorflow as tf
# pylint: disable=protected-access
class
ExponentialMovingAverage
(
tf
.
keras
.
optimizers
.
Optimizer
):
class
ExponentialMovingAverage
(
tf
.
keras
.
optimizers
.
legacy
.
Optimizer
):
"""Optimizer that computes an exponential moving average of the variables.
Empirically it has been found that using the moving average of the trained
...
...
official/modeling/optimization/lars_optimizer.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -22,7 +22,7 @@ import tensorflow as tf
# pylint: disable=protected-access
class
LARS
(
tf
.
keras
.
optimizers
.
Optimizer
):
class
LARS
(
tf
.
keras
.
optimizers
.
legacy
.
Optimizer
):
"""Layer-wise Adaptive Rate Scaling for large batch training.
Introduced by "Large Batch Training of Convolutional Networks" by Y. You,
...
...
Prev
1
…
10
11
12
13
14
15
16
17
18
…
39
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment