Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
e11010fc
Commit
e11010fc
authored
Aug 23, 2017
by
Eli Bixby
Browse files
Use HParams rather than dict. Don't tune sync
parent
5cb2dbde
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
16 deletions
+19
-16
tutorials/image/cifar10_estimator/cifar10_main.py
tutorials/image/cifar10_estimator/cifar10_main.py
+19
-16
No files found.
tutorials/image/cifar10_estimator/cifar10_main.py
View file @
e11010fc
...
...
@@ -29,7 +29,6 @@ from __future__ import division
from
__future__
import
print_function
import
argparse
import
collections
import
functools
import
itertools
import
os
...
...
@@ -47,7 +46,7 @@ import cifar10_utils
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
def
get_model_fn
(
num_gpus
,
variable_strategy
,
num_workers
):
def
get_model_fn
(
num_gpus
,
variable_strategy
,
num_workers
,
sync
):
def
_resnet_model_fn
(
features
,
labels
,
mode
,
params
):
"""Resnet model body.
...
...
@@ -61,13 +60,13 @@ def get_model_fn(num_gpus, variable_strategy, num_workers):
features: a list of tensors, one for each tower
labels: a list of tensors, one for each tower
mode: ModeKeys.TRAIN or EVAL
params:
Dictionary of
Hyperparameters suitable for tuning
params: Hyperparameters suitable for tuning
Returns:
A EstimatorSpec object.
"""
is_training
=
(
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
)
weight_decay
=
params
[
'
weight_decay
'
]
momentum
=
params
[
'
momentum
'
]
weight_decay
=
params
.
weight_decay
momentum
=
params
.
momentum
tower_features
=
features
tower_labels
=
labels
...
...
@@ -105,9 +104,9 @@ def get_model_fn(num_gpus, variable_strategy, num_workers):
tower_features
[
i
],
tower_labels
[
i
],
(
device_type
==
'cpu'
),
params
[
'
num_layers
'
]
,
params
[
'
batch_norm_decay
'
]
,
params
[
'
batch_norm_epsilon
'
]
)
params
.
num_layers
,
params
.
batch_norm_decay
,
params
.
batch_norm_epsilon
)
tower_losses
.
append
(
loss
)
tower_gradvars
.
append
(
gradvars
)
tower_preds
.
append
(
preds
)
...
...
@@ -144,12 +143,12 @@ def get_model_fn(num_gpus, variable_strategy, num_workers):
# Suggested learning rate scheduling from
# https://github.com/ppwwyyxx/tensorpack/blob/master/examples/ResNet/cifar10-resnet.py#L155
num_batches_per_epoch
=
cifar10
.
Cifar10DataSet
.
num_examples_per_epoch
(
'train'
)
//
(
params
[
'
train_batch_size
'
]
*
num_workers
)
'train'
)
//
(
params
.
train_batch_size
*
num_workers
)
boundaries
=
[
num_batches_per_epoch
*
x
for
x
in
np
.
array
([
82
,
123
,
300
],
dtype
=
np
.
int64
)
]
staged_lr
=
[
params
[
'
learning_rate
'
]
*
x
for
x
in
[
1
,
0.1
,
0.01
,
0.002
]]
staged_lr
=
[
params
.
learning_rate
*
x
for
x
in
[
1
,
0.1
,
0.01
,
0.002
]]
learning_rate
=
tf
.
train
.
piecewise_constant
(
tf
.
train
.
get_global_step
(),
boundaries
,
staged_lr
)
...
...
@@ -160,7 +159,7 @@ def get_model_fn(num_gpus, variable_strategy, num_workers):
learning_rate
=
learning_rate
,
momentum
=
momentum
)
chief_hooks
=
[]
if
params
[
'
sync
'
]
:
if
sync
:
optimizer
=
tf
.
train
.
SyncReplicasOptimizer
(
optimizer
,
replicas_to_aggregate
=
num_workers
)
...
...
@@ -279,7 +278,8 @@ def input_fn(data_dir, subset, num_shards, batch_size,
# create experiment
def
get_experiment_fn
(
data_dir
,
num_gpus
,
is_gpu_ps
,
use_distortion_for_training
=
True
):
use_distortion_for_training
=
True
,
sync
=
True
):
"""Returns an Experiment function.
Experiments perform training on several workers in parallel,
...
...
@@ -293,6 +293,7 @@ def get_experiment_fn(data_dir, num_gpus, is_gpu_ps,
num_gpus: int. Number of GPUs on each worker.
is_gpu_ps: bool. If true, average gradients on GPUs.
use_distortion_for_training: bool. See cifar10.Cifar10DataSet.
sync: bool. If true synchronizes variable updates across workers.
Returns:
A function (tf.estimator.RunConfig, tf.contrib.training.HParams) ->
tf.contrib.learn.Experiment.
...
...
@@ -340,9 +341,9 @@ def get_experiment_fn(data_dir, num_gpus, is_gpu_ps,
classifier
=
tf
.
estimator
.
Estimator
(
model_fn
=
get_model_fn
(
num_gpus
,
is_gpu_ps
,
run_config
.
num_worker_replicas
or
1
),
num_gpus
,
is_gpu_ps
,
run_config
.
num_worker_replicas
or
1
,
sync
),
config
=
run_config
,
params
=
vars
(
hparams
)
params
=
hparams
)
# Create experiment.
...
...
@@ -365,6 +366,7 @@ def main(job_dir,
use_distortion_for_training
,
log_device_placement
,
num_intra_threads
,
sync
,
**
hparams
):
# The env variable is on deprecation path, default is set to off.
os
.
environ
[
'TF_SYNC_ON_FINISH'
]
=
'0'
...
...
@@ -387,7 +389,8 @@ def main(job_dir,
data_dir
,
num_gpus
,
variable_strategy
,
use_distortion_for_training
use_distortion_for_training
,
sync
),
run_config
=
config
,
hparams
=
tf
.
contrib
.
training
.
HParams
(
**
hparams
)
...
...
@@ -456,7 +459,7 @@ if __name__ == '__main__':
type
=
float
,
default
=
2e-4
,
help
=
'Weight decay for convolutions.'
)
)
parser
.
add_argument
(
'--learning-rate'
,
type
=
float
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment