Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
9a88e415
"vscode:/vscode.git/clone" did not exist on "5511c258cf00f2f247b6b346ba3c82321f10cf5c"
Commit
9a88e415
authored
Sep 03, 2019
by
Hongkun Yu
Committed by
A. Unique TensorFlower
Sep 03, 2019
Browse files
Internal change
PiperOrigin-RevId: 267007907
parent
bd211e3e
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
60 additions
and
129 deletions
+60
-129
official/resnet/ctl/ctl_imagenet_main.py
official/resnet/ctl/ctl_imagenet_main.py
+1
-2
official/vision/image_classification/common.py
official/vision/image_classification/common.py
+41
-3
official/vision/image_classification/resnet_imagenet_main.py
official/vision/image_classification/resnet_imagenet_main.py
+15
-42
official/vision/image_classification/resnet_imagenet_test.py
official/vision/image_classification/resnet_imagenet_test.py
+3
-82
No files found.
official/resnet/ctl/ctl_imagenet_main.py
View file @
9a88e415
...
...
@@ -26,7 +26,6 @@ import tensorflow as tf
from
official.resnet.ctl
import
ctl_common
from
official.vision.image_classification
import
imagenet_preprocessing
from
official.vision.image_classification
import
common
from
official.vision.image_classification
import
resnet_imagenet_main
from
official.vision.image_classification
import
resnet_model
from
official.utils.flags
import
core
as
flags_core
from
official.utils.logs
import
logger
...
...
@@ -246,7 +245,7 @@ def run(flags_obj):
training_accuracy
.
reset_states
()
for
step
in
range
(
train_steps
):
optimizer
.
lr
=
resnet_imagenet_mai
n
.
learning_rate_schedule
(
optimizer
.
lr
=
commo
n
.
learning_rate_schedule
(
epoch
,
step
,
train_steps
,
flags_obj
.
batch_size
)
time_callback
.
on_batch_begin
(
step
+
epoch
*
train_steps
)
...
...
official/vision/image_classification/common.py
View file @
9a88e415
...
...
@@ -31,6 +31,41 @@ from official.utils.misc import keras_utils
FLAGS
=
flags
.
FLAGS
BASE_LEARNING_RATE
=
0.1
# This matches Jing's version.
TRAIN_TOP_1
=
'training_accuracy_top_1'
LR_SCHEDULE
=
[
# (multiplier, epoch to start) tuples
(
1.0
,
5
),
(
0.1
,
30
),
(
0.01
,
60
),
(
0.001
,
80
)
]
def
learning_rate_schedule
(
current_epoch
,
current_batch
,
batches_per_epoch
,
batch_size
):
"""Handles linear scaling rule, gradual warmup, and LR decay.
Scale learning rate at epoch boundaries provided in LR_SCHEDULE by the
provided scaling factor.
Args:
current_epoch: integer, current epoch indexed from 0.
current_batch: integer, current batch in the current epoch, indexed from 0.
batches_per_epoch: integer, number of steps in an epoch.
batch_size: integer, total batch sized.
Returns:
Adjusted learning rate.
"""
initial_lr
=
BASE_LEARNING_RATE
*
batch_size
/
256
epoch
=
current_epoch
+
float
(
current_batch
)
/
batches_per_epoch
warmup_lr_multiplier
,
warmup_end_epoch
=
LR_SCHEDULE
[
0
]
if
epoch
<
warmup_end_epoch
:
# Learning rate increases linearly per step.
return
initial_lr
*
warmup_lr_multiplier
*
epoch
/
warmup_end_epoch
for
mult
,
start_epoch
in
LR_SCHEDULE
:
if
epoch
>=
start_epoch
:
learning_rate
=
initial_lr
*
mult
else
:
break
return
learning_rate
class
LearningRateBatchScheduler
(
tf
.
keras
.
callbacks
.
Callback
):
...
...
@@ -172,12 +207,13 @@ def get_optimizer(learning_rate=0.1):
return
gradient_descent_v2
.
SGD
(
learning_rate
=
learning_rate
,
momentum
=
0.9
)
def
get_callbacks
(
learning_rate_schedule_fn
,
num_images
):
# TODO(hongkuny,haoyuzhang): make cifar model use_tensor_lr to clean up code.
def
get_callbacks
(
learning_rate_schedule_fn
=
None
,
num_images
=
None
):
"""Returns common callbacks."""
time_callback
=
keras_utils
.
TimeHistory
(
FLAGS
.
batch_size
,
FLAGS
.
log_steps
)
callbacks
=
[
time_callback
]
if
not
FLAGS
.
use_tensor_lr
:
if
not
FLAGS
.
use_tensor_lr
and
learning_rate_schedule_fn
:
lr_callback
=
LearningRateBatchScheduler
(
learning_rate_schedule_fn
,
batch_size
=
FLAGS
.
batch_size
,
...
...
@@ -312,6 +348,9 @@ def define_keras_flags(dynamic_loss_scale=True):
flags
.
DEFINE_boolean
(
name
=
'enable_get_next_as_optional'
,
default
=
False
,
help
=
'Enable get_next_as_optional behavior in DistributedIterator.'
)
flags
.
DEFINE_boolean
(
name
=
'enable_checkpoint_and_export'
,
default
=
False
,
help
=
'Whether to enable a checkpoint callback and export the savedmodel.'
)
def
get_synth_input_fn
(
height
,
width
,
num_channels
,
num_classes
,
...
...
@@ -346,7 +385,6 @@ def get_synth_input_fn(height, width, num_channels, num_classes,
mean
=
127
,
stddev
=
60
,
name
=
'synthetic_inputs'
)
labels
=
tf
.
random
.
uniform
([
1
],
minval
=
0
,
maxval
=
num_classes
-
1
,
...
...
official/vision/image_classification/resnet_imagenet_main.py
View file @
9a88e415
...
...
@@ -18,6 +18,8 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
...
...
@@ -33,42 +35,6 @@ from official.vision.image_classification import common
from
official.vision.image_classification
import
imagenet_preprocessing
from
official.vision.image_classification
import
resnet_model
LR_SCHEDULE
=
[
# (multiplier, epoch to start) tuples
(
1.0
,
5
),
(
0.1
,
30
),
(
0.01
,
60
),
(
0.001
,
80
)
]
def
learning_rate_schedule
(
current_epoch
,
current_batch
,
batches_per_epoch
,
batch_size
):
"""Handles linear scaling rule, gradual warmup, and LR decay.
Scale learning rate at epoch boundaries provided in LR_SCHEDULE by the
provided scaling factor.
Args:
current_epoch: integer, current epoch indexed from 0.
current_batch: integer, current batch in the current epoch, indexed from 0.
batches_per_epoch: integer, number of steps in an epoch.
batch_size: integer, total batch sized.
Returns:
Adjusted learning rate.
"""
initial_lr
=
common
.
BASE_LEARNING_RATE
*
batch_size
/
256
epoch
=
current_epoch
+
float
(
current_batch
)
/
batches_per_epoch
warmup_lr_multiplier
,
warmup_end_epoch
=
LR_SCHEDULE
[
0
]
if
epoch
<
warmup_end_epoch
:
# Learning rate increases linearly per step.
return
initial_lr
*
warmup_lr_multiplier
*
epoch
/
warmup_end_epoch
for
mult
,
start_epoch
in
LR_SCHEDULE
:
if
epoch
>=
start_epoch
:
learning_rate
=
initial_lr
*
mult
else
:
break
return
learning_rate
def
run
(
flags_obj
):
"""Run ResNet ImageNet training and eval loop using native Keras APIs.
...
...
@@ -94,7 +60,7 @@ def run(flags_obj):
common
.
set_cudnn_batchnorm_mode
()
dtype
=
flags_core
.
get_tf_dtype
(
flags_obj
)
if
dtype
==
'
float16
'
:
if
dtype
==
tf
.
float16
:
loss_scale
=
flags_core
.
get_loss_scale
(
flags_obj
,
default_for_fp16
=
128
)
policy
=
tf
.
compat
.
v2
.
keras
.
mixed_precision
.
experimental
.
Policy
(
'mixed_float16'
,
loss_scale
=
loss_scale
)
...
...
@@ -175,9 +141,9 @@ def run(flags_obj):
lr_schedule
=
common
.
PiecewiseConstantDecayWithWarmup
(
batch_size
=
flags_obj
.
batch_size
,
epoch_size
=
imagenet_preprocessing
.
NUM_IMAGES
[
'train'
],
warmup_epochs
=
LR_SCHEDULE
[
0
][
1
],
boundaries
=
list
(
p
[
1
]
for
p
in
LR_SCHEDULE
[
1
:]),
multipliers
=
list
(
p
[
0
]
for
p
in
LR_SCHEDULE
),
warmup_epochs
=
common
.
LR_SCHEDULE
[
0
][
1
],
boundaries
=
list
(
p
[
1
]
for
p
in
common
.
LR_SCHEDULE
[
1
:]),
multipliers
=
list
(
p
[
0
]
for
p
in
common
.
LR_SCHEDULE
),
compute_lr_on_cpu
=
True
)
with
strategy_scope
:
...
...
@@ -218,8 +184,11 @@ def run(flags_obj):
run_eagerly
=
flags_obj
.
run_eagerly
)
callbacks
=
common
.
get_callbacks
(
learning_rate_schedule
,
imagenet_preprocessing
.
NUM_IMAGES
[
'train'
])
common
.
learning_rate_schedule
,
imagenet_preprocessing
.
NUM_IMAGES
[
'train'
])
if
flags_obj
.
enable_checkpoint_and_export
:
ckpt_full_path
=
os
.
path
.
join
(
flags_obj
.
model_dir
,
'model.ckpt-{epoch:04d}'
)
callbacks
.
append
(
tf
.
keras
.
callbacks
.
ModelCheckpoint
(
ckpt_full_path
,
save_weights_only
=
True
))
train_steps
=
(
imagenet_preprocessing
.
NUM_IMAGES
[
'train'
]
//
flags_obj
.
batch_size
)
train_epochs
=
flags_obj
.
train_epochs
...
...
@@ -257,6 +226,10 @@ def run(flags_obj):
validation_data
=
validation_data
,
validation_freq
=
flags_obj
.
epochs_between_evals
,
verbose
=
2
)
if
flags_obj
.
enable_checkpoint_and_export
:
# Keras model.save assumes a float32 input designature.
export_path
=
os
.
path
.
join
(
flags_obj
.
model_dir
,
'saved_model'
)
model
.
save
(
export_path
,
include_optimizer
=
False
)
eval_output
=
None
if
not
flags_obj
.
skip_eval
:
...
...
official/vision/image_classification/resnet_imagenet_test.py
View file @
9a88e415
...
...
@@ -18,19 +18,16 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tempfile
import
tensorflow
as
tf
from
tensorflow.python.eager
import
context
from
tensorflow.python.platform
import
googletest
from
official.utils.misc
import
keras_utils
from
official.utils.testing
import
integration
from
official.vision.image_classification
import
imagenet_preprocessing
from
official.vision.image_classification
import
resnet_imagenet_main
class
KerasImagenetTest
(
google
test
.
TestCase
):
class
KerasImagenetTest
(
tf
.
test
.
TestCase
):
"""Unit tests for Keras ResNet with ImageNet."""
_extra_flags
=
[
...
...
@@ -40,11 +37,6 @@ class KerasImagenetTest(googletest.TestCase):
]
_tempdir
=
None
def
get_temp_dir
(
self
):
if
not
self
.
_tempdir
:
self
.
_tempdir
=
tempfile
.
mkdtemp
(
dir
=
googletest
.
GetTempDir
())
return
self
.
_tempdir
@
classmethod
def
setUpClass
(
cls
):
# pylint: disable=invalid-name
super
(
KerasImagenetTest
,
cls
).
setUpClass
()
...
...
@@ -65,7 +57,6 @@ class KerasImagenetTest(googletest.TestCase):
extra_flags
=
[
"-distribution_strategy"
,
"off"
,
"-model_dir"
,
"keras_imagenet_no_dist_strat"
,
"-data_format"
,
"channels_last"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
...
...
@@ -81,7 +72,6 @@ class KerasImagenetTest(googletest.TestCase):
extra_flags
=
[
"-enable_eager"
,
"false"
,
"-distribution_strategy"
,
"off"
,
"-model_dir"
,
"keras_imagenet_graph_no_dist_strat"
,
"-data_format"
,
"channels_last"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
...
...
@@ -105,8 +95,8 @@ class KerasImagenetTest(googletest.TestCase):
extra_flags
=
[
"-num_gpus"
,
"1"
,
"-distribution_strategy"
,
"default"
,
"-model_dir"
,
"keras_imagenet_1_gpu"
,
"-data_format"
,
"channels_last"
,
"-enable_checkpoint_and_export"
,
"1"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
...
...
@@ -130,7 +120,6 @@ class KerasImagenetTest(googletest.TestCase):
"-num_gpus"
,
"1"
,
"-dtype"
,
"fp16"
,
"-distribution_strategy"
,
"default"
,
"-model_dir"
,
"keras_imagenet_1_gpu"
,
"-data_format"
,
"channels_last"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
...
...
@@ -141,27 +130,6 @@ class KerasImagenetTest(googletest.TestCase):
extra_flags
=
extra_flags
)
def
test_end_to_end_graph_1_gpu
(
self
):
"""Test Keras model in legacy graph mode with 1 GPU."""
if
context
.
num_gpus
()
<
1
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
1
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"1"
,
"-enable_eager"
,
"false"
,
"-distribution_strategy"
,
"default"
,
"-model_dir"
,
"keras_imagenet_graph_1_gpu"
,
"-data_format"
,
"channels_last"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_2_gpu
(
self
):
"""Test Keras model with 2 GPUs."""
...
...
@@ -176,7 +144,6 @@ class KerasImagenetTest(googletest.TestCase):
extra_flags
=
[
"-num_gpus"
,
"2"
,
"-distribution_strategy"
,
"default"
,
"-model_dir"
,
"keras_imagenet_2_gpu"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
...
...
@@ -200,7 +167,6 @@ class KerasImagenetTest(googletest.TestCase):
"-num_gpus"
,
"2"
,
"-enable_xla"
,
"true"
,
"-distribution_strategy"
,
"default"
,
"-model_dir"
,
"keras_imagenet_xla_2_gpu"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
...
...
@@ -224,7 +190,6 @@ class KerasImagenetTest(googletest.TestCase):
"-num_gpus"
,
"2"
,
"-dtype"
,
"fp16"
,
"-distribution_strategy"
,
"default"
,
"-model_dir"
,
"keras_imagenet_2_gpu_fp16"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
...
...
@@ -249,50 +214,6 @@ class KerasImagenetTest(googletest.TestCase):
"-dtype"
,
"fp16"
,
"-enable_xla"
,
"true"
,
"-distribution_strategy"
,
"default"
,
"-model_dir"
,
"keras_imagenet_xla_2_gpu_fp16"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_graph_2_gpu
(
self
):
"""Test Keras model in legacy graph mode with 2 GPUs."""
if
context
.
num_gpus
()
<
2
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
2
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"2"
,
"-enable_eager"
,
"false"
,
"-distribution_strategy"
,
"default"
,
"-model_dir"
,
"keras_imagenet_graph_2_gpu"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_graph_xla_2_gpu
(
self
):
"""Test Keras model in legacy graph mode with XLA and 2 GPUs."""
if
context
.
num_gpus
()
<
2
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
2
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"2"
,
"-enable_eager"
,
"false"
,
"-enable_xla"
,
"true"
,
"-distribution_strategy"
,
"default"
,
"-model_dir"
,
"keras_imagenet_graph_xla_2_gpu"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
...
...
@@ -305,4 +226,4 @@ class KerasImagenetTest(googletest.TestCase):
if
__name__
==
"__main__"
:
tf
.
compat
.
v1
.
enable_v2_behavior
()
google
test
.
main
()
tf
.
test
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment