Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
0788a23c
Commit
0788a23c
authored
Dec 13, 2019
by
Hongkun Yu
Committed by
A. Unique TensorFlower
Dec 13, 2019
Browse files
Internal change
PiperOrigin-RevId: 285533511
parent
357f30f4
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
51 additions
and
62 deletions
+51
-62
official/r1/boosted_trees/train_higgs.py
official/r1/boosted_trees/train_higgs.py
+1
-2
official/r1/mnist/mnist_eager.py
official/r1/mnist/mnist_eager.py
+8
-9
official/r1/mnist/mnist_tpu.py
official/r1/mnist/mnist_tpu.py
+12
-11
official/r1/resnet/resnet_run_loop.py
official/r1/resnet/resnet_run_loop.py
+1
-2
official/r1/utils/tpu.py
official/r1/utils/tpu.py
+4
-5
official/recommendation/neumf_model.py
official/recommendation/neumf_model.py
+2
-3
official/transformer/transformer_main.py
official/transformer/transformer_main.py
+16
-23
official/utils/misc/distribution_utils.py
official/utils/misc/distribution_utils.py
+7
-7
No files found.
official/r1/boosted_trees/train_higgs.py
View file @
0788a23c
...
...
@@ -53,7 +53,6 @@ import tensorflow as tf
# pylint: enable=g-bad-import-order
from
official.utils.flags
import
core
as
flags_core
from
tensorflow.contrib
import
estimator
as
contrib_estimator
from
official.utils.flags._conventions
import
help_wrap
from
official.utils.logs
import
logger
...
...
@@ -230,7 +229,7 @@ def train_boosted_trees(flags_obj):
# Though BoostedTreesClassifier is under tf.estimator, faster in-memory
# training is yet provided as a contrib library.
classifier
=
contrib
_
estimator
.
boosted_trees_classifier_train_in_memory
(
classifier
=
tf
.
contrib
.
estimator
.
boosted_trees_classifier_train_in_memory
(
train_input_fn
,
feature_columns
,
model_dir
=
flags_obj
.
model_dir
or
None
,
...
...
official/r1/mnist/mnist_eager.py
View file @
0788a23c
...
...
@@ -33,7 +33,6 @@ import time
from
absl
import
app
as
absl_app
from
absl
import
flags
import
tensorflow
as
tf
from
tensorflow.contrib
import
summary
as
contrib_summary
from
tensorflow.python
import
eager
as
tfe
# pylint: enable=g-bad-import-order
...
...
@@ -62,7 +61,7 @@ def train(model, optimizer, dataset, step_counter, log_interval=None):
start
=
time
.
time
()
for
(
batch
,
(
images
,
labels
))
in
enumerate
(
dataset
):
with
contrib
_
summary
.
record_summaries_every_n_global_steps
(
with
tf
.
contrib
.
summary
.
record_summaries_every_n_global_steps
(
10
,
global_step
=
step_counter
):
# Record the operations used to compute the loss given the input,
# so that the gradient of the loss with respect to the variables
...
...
@@ -70,8 +69,8 @@ def train(model, optimizer, dataset, step_counter, log_interval=None):
with
tf
.
GradientTape
()
as
tape
:
logits
=
model
(
images
,
training
=
True
)
loss_value
=
loss
(
logits
,
labels
)
contrib
_
summary
.
scalar
(
'loss'
,
loss_value
)
contrib
_
summary
.
scalar
(
'accuracy'
,
compute_accuracy
(
logits
,
labels
))
tf
.
contrib
.
summary
.
scalar
(
'loss'
,
loss_value
)
tf
.
contrib
.
summary
.
scalar
(
'accuracy'
,
compute_accuracy
(
logits
,
labels
))
grads
=
tape
.
gradient
(
loss_value
,
model
.
variables
)
optimizer
.
apply_gradients
(
zip
(
grads
,
model
.
variables
),
global_step
=
step_counter
)
...
...
@@ -94,9 +93,9 @@ def test(model, dataset):
tf
.
cast
(
labels
,
tf
.
int64
))
print
(
'Test set: Average loss: %.4f, Accuracy: %4f%%
\n
'
%
(
avg_loss
.
result
(),
100
*
accuracy
.
result
()))
with
contrib
_
summary
.
always_record_summaries
():
contrib
_
summary
.
scalar
(
'loss'
,
avg_loss
.
result
())
contrib
_
summary
.
scalar
(
'accuracy'
,
accuracy
.
result
())
with
tf
.
contrib
.
summary
.
always_record_summaries
():
tf
.
contrib
.
summary
.
scalar
(
'loss'
,
avg_loss
.
result
())
tf
.
contrib
.
summary
.
scalar
(
'accuracy'
,
accuracy
.
result
())
def
run_mnist_eager
(
flags_obj
):
...
...
@@ -138,9 +137,9 @@ def run_mnist_eager(flags_obj):
else
:
train_dir
=
None
test_dir
=
None
summary_writer
=
contrib
_
summary
.
create_file_writer
(
summary_writer
=
tf
.
contrib
.
summary
.
create_file_writer
(
train_dir
,
flush_millis
=
10000
)
test_summary_writer
=
contrib
_
summary
.
create_file_writer
(
test_summary_writer
=
tf
.
contrib
.
summary
.
create_file_writer
(
test_dir
,
flush_millis
=
10000
,
name
=
'test'
)
# Create and restore checkpoint (if one exists on the path)
...
...
official/r1/mnist/mnist_tpu.py
View file @
0788a23c
...
...
@@ -33,8 +33,6 @@ import tensorflow as tf
# For open source environment, add grandparent directory for import
sys
.
path
.
append
(
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
sys
.
path
[
0
]))))
from
tensorflow.contrib
import
cluster_resolver
as
contrib_cluster_resolver
from
tensorflow.contrib
import
tpu
as
contrib_tpu
from
official.r1.mnist
import
dataset
# pylint: disable=wrong-import-position
from
official.r1.mnist
import
mnist
# pylint: disable=wrong-import-position
...
...
@@ -100,7 +98,7 @@ def model_fn(features, labels, mode, params):
'class_ids'
:
tf
.
argmax
(
logits
,
axis
=
1
),
'probabilities'
:
tf
.
nn
.
softmax
(
logits
),
}
return
contrib
_
tpu
.
TPUEstimatorSpec
(
mode
,
predictions
=
predictions
)
return
tf
.
contrib
.
tpu
.
TPUEstimatorSpec
(
mode
,
predictions
=
predictions
)
logits
=
model
(
image
,
training
=
(
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
))
loss
=
tf
.
losses
.
sparse_softmax_cross_entropy
(
labels
=
labels
,
logits
=
logits
)
...
...
@@ -113,14 +111,14 @@ def model_fn(features, labels, mode, params):
decay_rate
=
0.96
)
optimizer
=
tf
.
train
.
GradientDescentOptimizer
(
learning_rate
=
learning_rate
)
if
FLAGS
.
use_tpu
:
optimizer
=
contrib
_
tpu
.
CrossShardOptimizer
(
optimizer
)
return
contrib
_
tpu
.
TPUEstimatorSpec
(
optimizer
=
tf
.
contrib
.
tpu
.
CrossShardOptimizer
(
optimizer
)
return
tf
.
contrib
.
tpu
.
TPUEstimatorSpec
(
mode
=
mode
,
loss
=
loss
,
train_op
=
optimizer
.
minimize
(
loss
,
tf
.
train
.
get_global_step
()))
if
mode
==
tf
.
estimator
.
ModeKeys
.
EVAL
:
return
contrib
_
tpu
.
TPUEstimatorSpec
(
return
tf
.
contrib
.
tpu
.
TPUEstimatorSpec
(
mode
=
mode
,
loss
=
loss
,
eval_metrics
=
(
metric_fn
,
[
labels
,
logits
]))
...
...
@@ -155,18 +153,21 @@ def main(argv):
del
argv
# Unused.
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
tpu_cluster_resolver
=
contrib_cluster_resolver
.
TPUClusterResolver
(
FLAGS
.
tpu
,
zone
=
FLAGS
.
tpu_zone
,
project
=
FLAGS
.
gcp_project
)
tpu_cluster_resolver
=
tf
.
contrib
.
cluster_resolver
.
TPUClusterResolver
(
FLAGS
.
tpu
,
zone
=
FLAGS
.
tpu_zone
,
project
=
FLAGS
.
gcp_project
)
run_config
=
contrib
_
tpu
.
RunConfig
(
run_config
=
tf
.
contrib
.
tpu
.
RunConfig
(
cluster
=
tpu_cluster_resolver
,
model_dir
=
FLAGS
.
model_dir
,
session_config
=
tf
.
ConfigProto
(
allow_soft_placement
=
True
,
log_device_placement
=
True
),
tpu_config
=
contrib
_
tpu
.
TPUConfig
(
FLAGS
.
iterations
,
FLAGS
.
num_shards
),
tpu_config
=
tf
.
contrib
.
tpu
.
TPUConfig
(
FLAGS
.
iterations
,
FLAGS
.
num_shards
),
)
estimator
=
contrib
_
tpu
.
TPUEstimator
(
estimator
=
tf
.
contrib
.
tpu
.
TPUEstimator
(
model_fn
=
model_fn
,
use_tpu
=
FLAGS
.
use_tpu
,
train_batch_size
=
FLAGS
.
batch_size
,
...
...
official/r1/resnet/resnet_run_loop.py
View file @
0788a23c
...
...
@@ -30,7 +30,6 @@ import os
from
absl
import
flags
import
tensorflow
as
tf
from
tensorflow.contrib
import
opt
as
contrib_opt
from
official.r1.resnet
import
imagenet_preprocessing
from
official.r1.resnet
import
resnet_model
...
...
@@ -446,7 +445,7 @@ def resnet_model_fn(features, labels, mode, model_class,
tf
.
compat
.
v1
.
summary
.
scalar
(
'learning_rate'
,
learning_rate
)
if
flags
.
FLAGS
.
enable_lars
:
optimizer
=
contrib
_
opt
.
LARSOptimizer
(
optimizer
=
tf
.
contrib
.
opt
.
LARSOptimizer
(
learning_rate
,
momentum
=
momentum
,
weight_decay
=
weight_decay
,
...
...
official/r1/utils/tpu.py
View file @
0788a23c
...
...
@@ -15,7 +15,6 @@
"""Functions specific to running TensorFlow on TPUs."""
import
tensorflow
as
tf
from
tensorflow.contrib
import
summary
as
contrib_summary
# "local" is a magic word in the TPU cluster resolver; it informs the resolver
...
...
@@ -59,13 +58,13 @@ def construct_scalar_host_call(metric_dict, model_dir, prefix=""):
List of summary ops to run on the CPU host.
"""
step
=
global_step
[
0
]
with
contrib
_
summary
.
create_file_writer
(
with
tf
.
contrib
.
summary
.
create_file_writer
(
logdir
=
model_dir
,
filename_suffix
=
".host_call"
).
as_default
():
with
contrib
_
summary
.
always_record_summaries
():
with
tf
.
contrib
.
summary
.
always_record_summaries
():
for
i
,
name
in
enumerate
(
metric_names
):
contrib
_
summary
.
scalar
(
prefix
+
name
,
args
[
i
][
0
],
step
=
step
)
tf
.
contrib
.
summary
.
scalar
(
prefix
+
name
,
args
[
i
][
0
],
step
=
step
)
return
contrib
_
summary
.
all_summary_ops
()
return
tf
.
contrib
.
summary
.
all_summary_ops
()
# To log the current learning rate, and gradient norm for Tensorboard, the
# summary op needs to be run on the host CPU via host_call. host_call
...
...
official/recommendation/neumf_model.py
View file @
0788a23c
...
...
@@ -37,7 +37,6 @@ import sys
from
six.moves
import
xrange
# pylint: disable=redefined-builtin
import
tensorflow
as
tf
from
tensorflow.contrib
import
tpu
as
contrib_tpu
from
official.recommendation
import
constants
as
rconst
from
official.recommendation
import
movielens
...
...
@@ -117,7 +116,7 @@ def neumf_model_fn(features, labels, mode, params):
epsilon
=
params
[
"epsilon"
])
if
params
[
"use_tpu"
]:
# TODO(seemuch): remove this contrib import
optimizer
=
contrib
_
tpu
.
CrossShardOptimizer
(
optimizer
)
optimizer
=
tf
.
contrib
.
tpu
.
CrossShardOptimizer
(
optimizer
)
mlperf_helper
.
ncf_print
(
key
=
mlperf_helper
.
TAGS
.
MODEL_HP_LOSS_FN
,
value
=
mlperf_helper
.
TAGS
.
BCE
)
...
...
@@ -275,7 +274,7 @@ def _get_estimator_spec_with_metrics(logits, # type: tf.Tensor
use_tpu_spec
)
if
use_tpu_spec
:
return
contrib
_
tpu
.
TPUEstimatorSpec
(
return
tf
.
contrib
.
tpu
.
TPUEstimatorSpec
(
mode
=
tf
.
estimator
.
ModeKeys
.
EVAL
,
loss
=
cross_entropy
,
eval_metrics
=
(
metric_fn
,
[
in_top_k
,
ndcg
,
metric_weights
]))
...
...
official/transformer/transformer_main.py
View file @
0788a23c
...
...
@@ -33,9 +33,6 @@ import tensorflow as tf
# pylint: enable=g-bad-import-order
from
official.r1.utils
import
export
from
tensorflow.contrib
import
cluster_resolver
as
contrib_cluster_resolver
from
tensorflow.contrib
import
opt
as
contrib_opt
from
tensorflow.contrib
import
tpu
as
contrib_tpu
from
official.r1.utils
import
tpu
as
tpu_util
from
official.transformer
import
compute_bleu
from
official.transformer
import
translate
...
...
@@ -118,10 +115,8 @@ def model_fn(features, labels, mode, params):
metric_fn
=
lambda
logits
,
labels
:
(
metrics
.
get_eval_metrics
(
logits
,
labels
,
params
=
params
))
eval_metrics
=
(
metric_fn
,
[
logits
,
labels
])
return
contrib_tpu
.
TPUEstimatorSpec
(
mode
=
mode
,
loss
=
loss
,
predictions
=
{
"predictions"
:
logits
},
return
tf
.
contrib
.
tpu
.
TPUEstimatorSpec
(
mode
=
mode
,
loss
=
loss
,
predictions
=
{
"predictions"
:
logits
},
eval_metrics
=
eval_metrics
)
return
tf
.
estimator
.
EstimatorSpec
(
mode
=
mode
,
loss
=
loss
,
predictions
=
{
"predictions"
:
logits
},
...
...
@@ -133,14 +128,12 @@ def model_fn(features, labels, mode, params):
# in TensorBoard.
metric_dict
[
"minibatch_loss"
]
=
loss
if
params
[
"use_tpu"
]:
return
contrib_tpu
.
TPUEstimatorSpec
(
mode
=
mode
,
loss
=
loss
,
train_op
=
train_op
,
return
tf
.
contrib
.
tpu
.
TPUEstimatorSpec
(
mode
=
mode
,
loss
=
loss
,
train_op
=
train_op
,
host_call
=
tpu_util
.
construct_scalar_host_call
(
metric_dict
=
metric_dict
,
model_dir
=
params
[
"model_dir"
],
prefix
=
"training/"
)
)
metric_dict
=
metric_dict
,
model_dir
=
params
[
"model_dir"
],
prefix
=
"training/"
)
)
record_scalars
(
metric_dict
)
return
tf
.
estimator
.
EstimatorSpec
(
mode
=
mode
,
loss
=
loss
,
train_op
=
train_op
)
...
...
@@ -180,14 +173,14 @@ def get_train_op_and_metrics(loss, params):
# Create optimizer. Use LazyAdamOptimizer from TF contrib, which is faster
# than the TF core Adam optimizer.
optimizer
=
contrib
_
opt
.
LazyAdamOptimizer
(
optimizer
=
tf
.
contrib
.
opt
.
LazyAdamOptimizer
(
learning_rate
,
beta1
=
params
[
"optimizer_adam_beta1"
],
beta2
=
params
[
"optimizer_adam_beta2"
],
epsilon
=
params
[
"optimizer_adam_epsilon"
])
if
params
[
"use_tpu"
]
and
params
[
"tpu"
]
!=
tpu_util
.
LOCAL
:
optimizer
=
contrib
_
tpu
.
CrossShardOptimizer
(
optimizer
)
optimizer
=
tf
.
contrib
.
tpu
.
CrossShardOptimizer
(
optimizer
)
# Uses automatic mixed precision FP16 training if on GPU.
if
params
[
"dtype"
]
==
"fp16"
:
...
...
@@ -535,31 +528,31 @@ def construct_estimator(flags_obj, params, schedule_manager):
model_fn
=
model_fn
,
model_dir
=
flags_obj
.
model_dir
,
params
=
params
,
config
=
tf
.
estimator
.
RunConfig
(
train_distribute
=
distribution_strategy
))
tpu_cluster_resolver
=
contrib
_
cluster_resolver
.
TPUClusterResolver
(
tpu_cluster_resolver
=
tf
.
contrib
.
cluster_resolver
.
TPUClusterResolver
(
tpu
=
flags_obj
.
tpu
,
zone
=
flags_obj
.
tpu_zone
,
project
=
flags_obj
.
tpu_gcp_project
)
project
=
flags_obj
.
tpu_gcp_project
)
tpu_config
=
contrib
_
tpu
.
TPUConfig
(
tpu_config
=
tf
.
contrib
.
tpu
.
TPUConfig
(
iterations_per_loop
=
schedule_manager
.
single_iteration_train_steps
,
num_shards
=
flags_obj
.
num_tpu_shards
)
run_config
=
contrib
_
tpu
.
RunConfig
(
run_config
=
tf
.
contrib
.
tpu
.
RunConfig
(
cluster
=
tpu_cluster_resolver
,
model_dir
=
flags_obj
.
model_dir
,
session_config
=
tf
.
ConfigProto
(
allow_soft_placement
=
True
,
log_device_placement
=
True
),
tpu_config
=
tpu_config
)
return
contrib
_
tpu
.
TPUEstimator
(
return
tf
.
contrib
.
tpu
.
TPUEstimator
(
model_fn
=
model_fn
,
use_tpu
=
params
[
"use_tpu"
]
and
flags_obj
.
tpu
!=
tpu_util
.
LOCAL
,
train_batch_size
=
schedule_manager
.
batch_size
,
eval_batch_size
=
schedule_manager
.
batch_size
,
params
=
{
# TPUEstimator needs to populate batch_size itself due to sharding.
key
:
value
for
key
,
value
in
params
.
items
()
if
key
!=
"batch_size"
},
key
:
value
for
key
,
value
in
params
.
items
()
if
key
!=
"batch_size"
},
config
=
run_config
)
...
...
official/utils/misc/distribution_utils.py
View file @
0788a23c
...
...
@@ -23,7 +23,6 @@ import os
import
random
import
string
import
tensorflow
as
tf
from
tensorflow.contrib
import
distribute
as
contrib_distribute
from
official.utils.misc
import
tpu_lib
...
...
@@ -286,9 +285,10 @@ def set_up_synthetic_data():
tf
.
distribute
.
experimental
.
MultiWorkerMirroredStrategy
)
# TODO(tobyboyd): Remove when contrib.distribute is all in core.
if
hasattr
(
tf
,
'contrib'
):
_monkey_patch_dataset_method
(
contrib_distribute
.
MirroredStrategy
)
_monkey_patch_dataset_method
(
contrib_distribute
.
OneDeviceStrategy
)
_monkey_patch_dataset_method
(
contrib_distribute
.
CollectiveAllReduceStrategy
)
_monkey_patch_dataset_method
(
tf
.
contrib
.
distribute
.
MirroredStrategy
)
_monkey_patch_dataset_method
(
tf
.
contrib
.
distribute
.
OneDeviceStrategy
)
_monkey_patch_dataset_method
(
tf
.
contrib
.
distribute
.
CollectiveAllReduceStrategy
)
else
:
print
(
'Contrib missing: Skip monkey patch tf.contrib.distribute.*'
)
...
...
@@ -300,10 +300,10 @@ def undo_set_up_synthetic_data():
tf
.
distribute
.
experimental
.
MultiWorkerMirroredStrategy
)
# TODO(tobyboyd): Remove when contrib.distribute is all in core.
if
hasattr
(
tf
,
'contrib'
):
_undo_monkey_patch_dataset_method
(
contrib
_
distribute
.
MirroredStrategy
)
_undo_monkey_patch_dataset_method
(
contrib
_
distribute
.
OneDeviceStrategy
)
_undo_monkey_patch_dataset_method
(
tf
.
contrib
.
distribute
.
MirroredStrategy
)
_undo_monkey_patch_dataset_method
(
tf
.
contrib
.
distribute
.
OneDeviceStrategy
)
_undo_monkey_patch_dataset_method
(
contrib
_
distribute
.
CollectiveAllReduceStrategy
)
tf
.
contrib
.
distribute
.
CollectiveAllReduceStrategy
)
else
:
print
(
'Contrib missing: Skip remove monkey patch tf.contrib.distribute.*'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment