Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
74ecc048
Commit
74ecc048
authored
Jul 24, 2017
by
Marianne Linhares Monteiro
Committed by
GitHub
Jul 24, 2017
Browse files
Refactoring and adding sync mode
parent
28328ae3
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
72 additions
and
98 deletions
+72
-98
tutorials/image/cifar10_estimator/cifar10_main.py
tutorials/image/cifar10_estimator/cifar10_main.py
+72
-98
No files found.
tutorials/image/cifar10_estimator/cifar10_main.py
View file @
74ecc048
...
@@ -36,6 +36,7 @@ import os
...
@@ -36,6 +36,7 @@ import os
import
numpy
as
np
import
numpy
as
np
from
six.moves
import
xrange
# pylint: disable=redefined-builtin
from
six.moves
import
xrange
# pylint: disable=redefined-builtin
import
tensorflow
as
tf
import
tensorflow
as
tf
from
tensorflow.contrib.learn.python.learn
import
learn_runner
# run the experiment
import
cifar10
import
cifar10
import
cifar10_model
import
cifar10_model
...
@@ -44,13 +45,13 @@ tf.logging.set_verbosity(tf.logging.INFO)
...
@@ -44,13 +45,13 @@ tf.logging.set_verbosity(tf.logging.INFO)
FLAGS
=
tf
.
flags
.
FLAGS
FLAGS
=
tf
.
flags
.
FLAGS
tf
.
flags
.
DEFINE_string
(
'data_dir'
,
''
,
tf
.
flags
.
DEFINE_string
(
'data_dir'
,
'
cifar10
'
,
'The directory where the CIFAR-10 input data is stored.'
)
'The directory where the CIFAR-10 input data is stored.'
)
tf
.
flags
.
DEFINE_string
(
'model_dir'
,
''
,
tf
.
flags
.
DEFINE_string
(
'model_dir'
,
'
output2_2
'
,
'The directory where the model will be stored.'
)
'The directory where the model will be stored.'
)
tf
.
flags
.
DEFINE_boolean
(
'is_cpu_ps'
,
Fals
e
,
tf
.
flags
.
DEFINE_boolean
(
'is_cpu_ps'
,
Tru
e
,
'If using CPU as the parameter server.'
)
'If using CPU as the parameter server.'
)
tf
.
flags
.
DEFINE_integer
(
'num_gpus'
,
1
,
tf
.
flags
.
DEFINE_integer
(
'num_gpus'
,
1
,
...
@@ -58,12 +59,12 @@ tf.flags.DEFINE_integer('num_gpus', 1,
...
@@ -58,12 +59,12 @@ tf.flags.DEFINE_integer('num_gpus', 1,
tf
.
flags
.
DEFINE_integer
(
'num_layers'
,
44
,
'The number of layers of the model.'
)
tf
.
flags
.
DEFINE_integer
(
'num_layers'
,
44
,
'The number of layers of the model.'
)
tf
.
flags
.
DEFINE_integer
(
'train_steps'
,
10000
,
tf
.
flags
.
DEFINE_integer
(
'train_batch_size'
,
1024
,
'Batch size for training.'
)
'The number of steps to use for training.'
)
tf
.
flags
.
DEFINE_integer
(
'train_batch_size'
,
128
,
'Batch size for training.'
)
tf
.
flags
.
DEFINE_integer
(
'train_steps'
,
(
50000.0
/
FLAGS
.
train_batch_size
)
*
40
,
'The number of steps to use for training.'
)
# 40 epochs
tf
.
flags
.
DEFINE_integer
(
'eval_batch_size'
,
1
00
,
'Batch size for validation.'
)
tf
.
flags
.
DEFINE_integer
(
'eval_batch_size'
,
2
00
,
'Batch size for validation.'
)
tf
.
flags
.
DEFINE_float
(
'momentum'
,
0.9
,
'Momentum for MomentumOptimizer.'
)
tf
.
flags
.
DEFINE_float
(
'momentum'
,
0.9
,
'Momentum for MomentumOptimizer.'
)
...
@@ -71,10 +72,6 @@ tf.flags.DEFINE_float('weight_decay', 1e-4, 'Weight decay for convolutions.')
...
@@ -71,10 +72,6 @@ tf.flags.DEFINE_float('weight_decay', 1e-4, 'Weight decay for convolutions.')
tf
.
flags
.
DEFINE_boolean
(
'use_distortion_for_training'
,
True
,
tf
.
flags
.
DEFINE_boolean
(
'use_distortion_for_training'
,
True
,
'If doing image distortion for training.'
)
'If doing image distortion for training.'
)
tf
.
flags
.
DEFINE_boolean
(
'run_experiment'
,
False
,
'If True will run an experiment,'
'otherwise will run training and evaluation'
'using the estimator interface'
)
# Perf flags
# Perf flags
tf
.
flags
.
DEFINE_integer
(
'num_intra_threads'
,
1
,
tf
.
flags
.
DEFINE_integer
(
'num_intra_threads'
,
1
,
...
@@ -141,7 +138,6 @@ def _create_device_setter(is_cpu_ps, worker):
...
@@ -141,7 +138,6 @@ def _create_device_setter(is_cpu_ps, worker):
gpus
=
[
'/gpu:%d'
%
i
for
i
in
range
(
FLAGS
.
num_gpus
)]
gpus
=
[
'/gpu:%d'
%
i
for
i
in
range
(
FLAGS
.
num_gpus
)]
return
ParamServerDeviceSetter
(
worker
,
gpus
)
return
ParamServerDeviceSetter
(
worker
,
gpus
)
def
_resnet_model_fn
(
features
,
labels
,
mode
):
def
_resnet_model_fn
(
features
,
labels
,
mode
):
"""Resnet model body.
"""Resnet model body.
...
@@ -224,18 +220,19 @@ def _resnet_model_fn(features, labels, mode):
...
@@ -224,18 +220,19 @@ def _resnet_model_fn(features, labels, mode):
for
x
in
np
.
array
([
82
,
123
,
300
],
dtype
=
np
.
int64
)
for
x
in
np
.
array
([
82
,
123
,
300
],
dtype
=
np
.
int64
)
]
]
staged_lr
=
[
0.1
,
0.01
,
0.001
,
0.0002
]
staged_lr
=
[
0.1
,
0.01
,
0.001
,
0.0002
]
learning_rate
=
tf
.
train
.
piecewise_constant
(
tf
.
train
.
get_global_step
(),
global_step
=
tf
.
train
.
get_global_step
()
learning_rate
=
tf
.
train
.
piecewise_constant
(
global_step
,
boundaries
,
staged_lr
)
boundaries
,
staged_lr
)
# Create a nicely-named tensor for logging
# Create a nicely-named tensor for logging
learning_rate
=
tf
.
identity
(
learning_rate
,
name
=
'learning_rate'
)
learning_rate
=
tf
.
identity
(
learning_rate
,
name
=
'learning_rate'
)
optimizer
=
tf
.
train
.
MomentumOptimizer
(
optimizer
=
tf
.
train
.
MomentumOptimizer
(
learning_rate
=
learning_rate
,
learning_rate
=
learning_rate
,
momentum
=
momentum
)
momentum
=
momentum
)
# Create single grouped train op
# Create single grouped train op
train_op
=
[
train_op
=
[
optimizer
.
apply_gradients
(
optimizer
.
apply_gradients
(
gradvars
,
global_step
=
tf
.
train
.
get_
global_step
()
)
gradvars
,
global_step
=
global_step
)
]
]
train_op
.
extend
(
update_ops
)
train_op
.
extend
(
update_ops
)
train_op
=
tf
.
group
(
*
train_op
)
train_op
=
tf
.
group
(
*
train_op
)
...
@@ -363,23 +360,21 @@ def input_fn(subset, num_shards):
...
@@ -363,23 +360,21 @@ def input_fn(subset, num_shards):
label_shards
=
[
tf
.
parallel_stack
(
x
)
for
x
in
label_shards
]
label_shards
=
[
tf
.
parallel_stack
(
x
)
for
x
in
label_shards
]
return
feature_shards
,
label_shards
return
feature_shards
,
label_shards
def
create_experiment_fn
(
train_input
,
test_input
,
hooks
):
# create experiment
def
get_experiment_fn
(
train_input_fn
,
eval_input_fn
,
train_steps
,
eval_steps
):
def
_experiment_fn
(
run_config
,
hparams
):
def
_experiment_fn
(
run_config
,
hparams
):
del
hparams
# unused arg
estimator
=
tf
.
estimator
.
Estimator
(
model_fn
=
_resnet_model_fn
,
# create estimator
config
=
run_config
,
classifier
=
tf
.
estimator
.
Estimator
(
model_fn
=
_resnet_model_fn
,
model_dir
=
FLAGS
.
model_dir
)
config
=
run_config
)
experiment
=
tf
.
contrib
.
learn
.
Experiment
(
return
tf
.
contrib
.
learn
.
Experiment
(
estimator
,
classifier
,
train_input_fn
=
train_input
,
train_input_fn
=
train_input_fn
,
eval_input_fn
=
test_input
,
eval_input_fn
=
eval_input_fn
,
train_steps
=
FLAGS
.
train_steps
)
train_steps
=
train_steps
,
eval_steps
=
eval_steps
experiment
.
extend_train_hooks
(
hooks
)
)
return
experiment
return
_experiment_fn
return
_experiment_fn
def
main
(
unused_argv
):
def
main
(
unused_argv
):
# The env variable is on deprecation path, default is set to off.
# The env variable is on deprecation path, default is set to off.
...
@@ -411,38 +406,17 @@ def main(unused_argv):
...
@@ -411,38 +406,17 @@ def main(unused_argv):
sess_config
.
gpu_options
.
force_gpu_compatible
=
FLAGS
.
force_gpu_compatible
sess_config
.
gpu_options
.
force_gpu_compatible
=
FLAGS
.
force_gpu_compatible
config
=
config
.
replace
(
session_config
=
sess_config
)
config
=
config
.
replace
(
session_config
=
sess_config
)
train_input_fn
=
functools
.
partial
(
input_fn
,
subset
=
'train'
,
train_input
=
functools
.
partial
(
input_fn
,
subset
=
'train'
,
num_shards
=
FLAGS
.
num_gpus
)
num_shards
=
FLAGS
.
num_gpus
)
test_input
=
functools
.
partial
(
input_fn
,
subset
=
'eval'
,
num_shards
=
FLAGS
.
num_gpus
)
eval_input_fn
=
functools
.
partial
(
input_fn
,
subset
=
'eval'
,
num_shards
=
FLAGS
.
num_gpus
)
train_steps
=
FLAGS
.
train_steps
eval_steps
=
num_eval_examples
//
FLAGS
.
eval_batch_size
if
FLAGS
.
run_experiment
:
tf
.
contrib
.
learn
.
learn_runner
.
run
(
get_experiment_fn
(
train_input_fn
,
eval_input_fn
,
train_steps
,
eval_steps
),
run_config
=
config
)
else
:
classifier
=
tf
.
estimator
.
Estimator
(
model_fn
=
_resnet_model_fn
,
config
=
config
)
tensors_to_log
=
{
'learning_rate'
:
'learning_rate'
}
tensors_to_log
=
{
'learning_rate'
:
'learning_rate'
}
logging_hook
=
tf
.
train
.
LoggingTensorHook
(
logging_hook
=
tf
.
train
.
LoggingTensorHook
(
tensors
=
tensors_to_log
,
every_n_iter
=
100
)
tensors
=
tensors_to_log
,
every_n_iter
=
100
)
print
(
'Starting to train...'
)
hooks
=
[
logging_hook
]
classifier
.
train
(
input_fn
=
train_input_fn
,
steps
=
train_steps
,
hooks
=
[
logging_hook
])
print
(
'Starting to evaluate...'
)
eval_results
=
classifier
.
evaluate
(
input_fn
=
eval_input_fn
,
steps
=
eval_steps
)
print
(
eval_results
)
# run experiment
learn_runner
.
run
(
create_experiment_fn
(
train_input
,
test_input
,
hooks
),
run_config
=
config
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
tf
.
app
.
run
()
tf
.
app
.
run
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment