Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
a66d4713
Unverified
Commit
a66d4713
authored
Jan 31, 2019
by
guptapriya
Committed by
GitHub
Jan 31, 2019
Browse files
Use core mirrored strategy in official models (#6126)
parent
2519f29b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
14 additions
and
18 deletions
+14
-18
official/resnet/keras/keras_cifar_main.py
official/resnet/keras/keras_cifar_main.py
+2
-5
official/resnet/keras/keras_common.py
official/resnet/keras/keras_common.py
+3
-2
official/resnet/keras/keras_imagenet_main.py
official/resnet/keras/keras_imagenet_main.py
+2
-5
official/utils/misc/distribution_utils.py
official/utils/misc/distribution_utils.py
+7
-6
No files found.
official/resnet/keras/keras_cifar_main.py
View file @
a66d4713
...
@@ -105,9 +105,6 @@ def run(flags_obj):
...
@@ -105,9 +105,6 @@ def run(flags_obj):
raise
ValueError
(
'dtype fp16 is not supported in Keras. Use the default '
raise
ValueError
(
'dtype fp16 is not supported in Keras. Use the default '
'value(fp32).'
)
'value(fp32).'
)
per_device_batch_size
=
distribution_utils
.
per_device_batch_size
(
flags_obj
.
batch_size
,
flags_core
.
get_num_gpus
(
flags_obj
))
data_format
=
flags_obj
.
data_format
data_format
=
flags_obj
.
data_format
if
data_format
is
None
:
if
data_format
is
None
:
data_format
=
(
'channels_first'
data_format
=
(
'channels_first'
...
@@ -127,14 +124,14 @@ def run(flags_obj):
...
@@ -127,14 +124,14 @@ def run(flags_obj):
train_input_dataset
=
input_fn
(
train_input_dataset
=
input_fn
(
is_training
=
True
,
is_training
=
True
,
data_dir
=
flags_obj
.
data_dir
,
data_dir
=
flags_obj
.
data_dir
,
batch_size
=
per_device_
batch_size
,
batch_size
=
flags_obj
.
batch_size
,
num_epochs
=
flags_obj
.
train_epochs
,
num_epochs
=
flags_obj
.
train_epochs
,
parse_record_fn
=
parse_record_keras
)
parse_record_fn
=
parse_record_keras
)
eval_input_dataset
=
input_fn
(
eval_input_dataset
=
input_fn
(
is_training
=
False
,
is_training
=
False
,
data_dir
=
flags_obj
.
data_dir
,
data_dir
=
flags_obj
.
data_dir
,
batch_size
=
per_device_
batch_size
,
batch_size
=
flags_obj
.
batch_size
,
num_epochs
=
flags_obj
.
train_epochs
,
num_epochs
=
flags_obj
.
train_epochs
,
parse_record_fn
=
parse_record_keras
)
parse_record_fn
=
parse_record_keras
)
...
...
official/resnet/keras/keras_common.py
View file @
a66d4713
...
@@ -227,19 +227,20 @@ def get_synth_input_fn(height, width, num_channels, num_classes,
...
@@ -227,19 +227,20 @@ def get_synth_input_fn(height, width, num_channels, num_classes,
"""Returns dataset filled with random data."""
"""Returns dataset filled with random data."""
# Synthetic input should be within [0, 255].
# Synthetic input should be within [0, 255].
inputs
=
tf
.
truncated_normal
(
inputs
=
tf
.
truncated_normal
(
[
batch_size
]
+
[
height
,
width
,
num_channels
],
[
height
,
width
,
num_channels
],
dtype
=
dtype
,
dtype
=
dtype
,
mean
=
127
,
mean
=
127
,
stddev
=
60
,
stddev
=
60
,
name
=
'synthetic_inputs'
)
name
=
'synthetic_inputs'
)
labels
=
tf
.
random_uniform
(
labels
=
tf
.
random_uniform
(
[
batch_size
]
+
[
1
],
[
1
],
minval
=
0
,
minval
=
0
,
maxval
=
num_classes
-
1
,
maxval
=
num_classes
-
1
,
dtype
=
tf
.
int32
,
dtype
=
tf
.
int32
,
name
=
'synthetic_labels'
)
name
=
'synthetic_labels'
)
data
=
tf
.
data
.
Dataset
.
from_tensors
((
inputs
,
labels
)).
repeat
()
data
=
tf
.
data
.
Dataset
.
from_tensors
((
inputs
,
labels
)).
repeat
()
data
=
data
.
batch
(
batch_size
)
data
=
data
.
prefetch
(
buffer_size
=
tf
.
contrib
.
data
.
AUTOTUNE
)
data
=
data
.
prefetch
(
buffer_size
=
tf
.
contrib
.
data
.
AUTOTUNE
)
return
data
return
data
...
...
official/resnet/keras/keras_imagenet_main.py
View file @
a66d4713
...
@@ -101,9 +101,6 @@ def run(flags_obj):
...
@@ -101,9 +101,6 @@ def run(flags_obj):
if
tf
.
test
.
is_built_with_cuda
()
else
'channels_last'
)
if
tf
.
test
.
is_built_with_cuda
()
else
'channels_last'
)
tf
.
keras
.
backend
.
set_image_data_format
(
data_format
)
tf
.
keras
.
backend
.
set_image_data_format
(
data_format
)
per_device_batch_size
=
distribution_utils
.
per_device_batch_size
(
flags_obj
.
batch_size
,
flags_core
.
get_num_gpus
(
flags_obj
))
# pylint: disable=protected-access
# pylint: disable=protected-access
if
flags_obj
.
use_synthetic_data
:
if
flags_obj
.
use_synthetic_data
:
input_fn
=
keras_common
.
get_synth_input_fn
(
input_fn
=
keras_common
.
get_synth_input_fn
(
...
@@ -117,13 +114,13 @@ def run(flags_obj):
...
@@ -117,13 +114,13 @@ def run(flags_obj):
train_input_dataset
=
input_fn
(
is_training
=
True
,
train_input_dataset
=
input_fn
(
is_training
=
True
,
data_dir
=
flags_obj
.
data_dir
,
data_dir
=
flags_obj
.
data_dir
,
batch_size
=
per_device_
batch_size
,
batch_size
=
flags_obj
.
batch_size
,
num_epochs
=
flags_obj
.
train_epochs
,
num_epochs
=
flags_obj
.
train_epochs
,
parse_record_fn
=
parse_record_keras
)
parse_record_fn
=
parse_record_keras
)
eval_input_dataset
=
input_fn
(
is_training
=
False
,
eval_input_dataset
=
input_fn
(
is_training
=
False
,
data_dir
=
flags_obj
.
data_dir
,
data_dir
=
flags_obj
.
data_dir
,
batch_size
=
per_device_
batch_size
,
batch_size
=
flags_obj
.
batch_size
,
num_epochs
=
flags_obj
.
train_epochs
,
num_epochs
=
flags_obj
.
train_epochs
,
parse_record_fn
=
parse_record_keras
)
parse_record_fn
=
parse_record_keras
)
...
...
official/utils/misc/distribution_utils.py
View file @
a66d4713
...
@@ -57,21 +57,22 @@ def get_distribution_strategy(num_gpus,
...
@@ -57,21 +57,22 @@ def get_distribution_strategy(num_gpus,
"turn_off_distribution_strategy flag cannot be set to"
"turn_off_distribution_strategy flag cannot be set to"
"True."
.
format
(
num_gpus
))
"True."
.
format
(
num_gpus
))
else
:
# num_gpus > 1 and not turn_off_distribution_strategy
else
:
# num_gpus > 1 and not turn_off_distribution_strategy
devices
=
[
"device:GPU:%d"
%
i
for
i
in
range
(
num_gpus
)]
if
all_reduce_alg
:
if
all_reduce_alg
:
return
tf
.
contrib
.
distribute
.
MirroredStrategy
(
return
tf
.
distribute
.
MirroredStrategy
(
num_gpus
=
num_gpu
s
,
devices
=
device
s
,
cross_device_ops
=
tf
.
contrib
.
distribute
.
AllReduceCrossDeviceOps
(
cross_device_ops
=
tf
.
contrib
.
distribute
.
AllReduceCrossDeviceOps
(
all_reduce_alg
,
num_packs
=
2
))
all_reduce_alg
,
num_packs
=
2
))
else
:
else
:
return
tf
.
contrib
.
distribute
.
MirroredStrategy
(
num_gpus
=
num_gpu
s
)
return
tf
.
distribute
.
MirroredStrategy
(
devices
=
device
s
)
def
per_device_batch_size
(
batch_size
,
num_gpus
):
def
per_device_batch_size
(
batch_size
,
num_gpus
):
"""For multi-gpu, batch-size must be a multiple of the number of GPUs.
"""For multi-gpu, batch-size must be a multiple of the number of GPUs.
Note that this should eventually be handled by DistributionStrategies
directly. Multi-GPU support is currently experimental, however,
Note that distribution strategy handles this automatically when used with
so doing the work here until that feature is in place
.
Keras. For using with Estimator, we need to get per GPU batch
.
Args:
Args:
batch_size: Global batch size to be divided among devices. This should be
batch_size: Global batch size to be divided among devices. This should be
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment