Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
124e3918
Commit
124e3918
authored
Mar 18, 2021
by
Frederick Liu
Committed by
A. Unique TensorFlower
Mar 18, 2021
Browse files
[core] Add tpu_enable_xla_dynamic_padder to RuntimeConfig.
PiperOrigin-RevId: 363595377
parent
5e5f12f3
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
26 additions
and
2 deletions
+26
-2
official/core/base_trainer.py
official/core/base_trainer.py
+15
-2
official/core/config_definitions.py
official/core/config_definitions.py
+10
-0
official/modeling/progressive/trainer.py
official/modeling/progressive/trainer.py
+1
-0
No files found.
official/core/base_trainer.py
View file @
124e3918
...
@@ -158,6 +158,16 @@ class _AsyncTrainer(orbit.StandardTrainer, orbit.StandardEvaluator):
...
@@ -158,6 +158,16 @@ class _AsyncTrainer(orbit.StandardTrainer, orbit.StandardEvaluator):
*
args
,
**
kwargs
)
*
args
,
**
kwargs
)
def
get_runtime_options
(
config
:
ExperimentConfig
):
"""Get tf.distribute.RunOptions from config."""
xla_options
=
{}
if
config
.
runtime
.
tpu_enable_xla_dynamic_padder
is
not
None
:
xla_options
[
"enable_xla_dynamic_padder"
]
=
(
config
.
runtime
.
enable_xla_dynamic_padder
)
return
tf
.
distribute
.
RunOptions
(
experimental_xla_options
=
tf
.
tpu
.
XLAOptions
(
**
xla_options
))
@
gin
.
configurable
@
gin
.
configurable
class
Trainer
(
_AsyncTrainer
):
class
Trainer
(
_AsyncTrainer
):
"""Implements the common trainer shared for TensorFlow models."""
"""Implements the common trainer shared for TensorFlow models."""
...
@@ -195,6 +205,7 @@ class Trainer(_AsyncTrainer):
...
@@ -195,6 +205,7 @@ class Trainer(_AsyncTrainer):
self
.
_optimizer
=
optimizer
self
.
_optimizer
=
optimizer
self
.
_checkpoint_exporter
=
checkpoint_exporter
self
.
_checkpoint_exporter
=
checkpoint_exporter
self
.
_recovery
=
None
self
.
_recovery
=
None
self
.
_runtime_options
=
get_runtime_options
(
config
)
# Creates a shadow copy of the weights to store weights moving average.
# Creates a shadow copy of the weights to store weights moving average.
if
isinstance
(
self
.
_optimizer
,
optimization
.
ExponentialMovingAverage
):
if
isinstance
(
self
.
_optimizer
,
optimization
.
ExponentialMovingAverage
):
...
@@ -374,7 +385,8 @@ class Trainer(_AsyncTrainer):
...
@@ -374,7 +385,8 @@ class Trainer(_AsyncTrainer):
self
.
_train_loss
.
update_state
(
logs
[
self
.
task
.
loss
])
self
.
_train_loss
.
update_state
(
logs
[
self
.
task
.
loss
])
self
.
global_step
.
assign_add
(
1
)
self
.
global_step
.
assign_add
(
1
)
self
.
strategy
.
run
(
step_fn
,
args
=
(
next
(
iterator
),))
self
.
strategy
.
run
(
step_fn
,
args
=
(
next
(
iterator
),),
options
=
self
.
_runtime_options
)
def
eval_begin
(
self
):
def
eval_begin
(
self
):
"""Sets up metrics."""
"""Sets up metrics."""
...
@@ -395,7 +407,8 @@ class Trainer(_AsyncTrainer):
...
@@ -395,7 +407,8 @@ class Trainer(_AsyncTrainer):
self
.
_validation_loss
.
update_state
(
logs
[
self
.
task
.
loss
])
self
.
_validation_loss
.
update_state
(
logs
[
self
.
task
.
loss
])
return
logs
return
logs
distributed_outputs
=
self
.
strategy
.
run
(
step_fn
,
args
=
(
next
(
iterator
),))
distributed_outputs
=
self
.
strategy
.
run
(
step_fn
,
args
=
(
next
(
iterator
),),
options
=
self
.
_runtime_options
)
return
tf
.
nest
.
map_structure
(
self
.
strategy
.
experimental_local_results
,
return
tf
.
nest
.
map_structure
(
self
.
strategy
.
experimental_local_results
,
distributed_outputs
)
distributed_outputs
)
...
...
official/core/config_definitions.py
View file @
124e3918
...
@@ -140,6 +140,16 @@ class RuntimeConfig(base_config.Config):
...
@@ -140,6 +140,16 @@ class RuntimeConfig(base_config.Config):
run_eagerly
:
bool
=
False
run_eagerly
:
bool
=
False
batchnorm_spatial_persistent
:
bool
=
False
batchnorm_spatial_persistent
:
bool
=
False
# XLA runtime
# Whether to enable XLA dynamic padder
# infrastructure to handle dynamic shapes inputs inside XLA. True by
# default. Disabling this may cause correctness issues with dynamic shapes
# inputs, as XLA will just assume the inputs are with padded shapes. However
# users can optionally set it to False to improve device time if masking is
# already handled in the user side.
# If None, will respect XLA default.
tpu_enable_xla_dynamic_padder
:
Optional
[
bool
]
=
None
# Global model parallelism configurations.
# Global model parallelism configurations.
num_cores_per_replica
:
int
=
1
num_cores_per_replica
:
int
=
1
default_shard_dim
:
int
=
-
1
default_shard_dim
:
int
=
-
1
...
...
official/modeling/progressive/trainer.py
View file @
124e3918
...
@@ -92,6 +92,7 @@ class ProgressiveTrainer(trainer_lib.Trainer):
...
@@ -92,6 +92,7 @@ class ProgressiveTrainer(trainer_lib.Trainer):
# it gets a single-replica no-op strategy.
# it gets a single-replica no-op strategy.
self
.
_strategy
=
tf
.
distribute
.
get_strategy
()
self
.
_strategy
=
tf
.
distribute
.
get_strategy
()
self
.
_config
=
config
self
.
_config
=
config
self
.
_runtime_options
=
trainer_lib
.
get_runtime_options
(
config
)
self
.
_task
=
prog_task
self
.
_task
=
prog_task
# Directory for non-progressive checkpoint
# Directory for non-progressive checkpoint
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment