Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
eebea3f8
Commit
eebea3f8
authored
Mar 29, 2019
by
Haoyu Zhang
Browse files
Optimize data input pipeline
Co-authored-by:
Jiri Simsa
<
jsimsa@google.com
>
parent
8e7051a8
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
12 deletions
+16
-12
official/resnet/imagenet_main.py
official/resnet/imagenet_main.py
+7
-6
official/resnet/resnet_run_loop.py
official/resnet/resnet_run_loop.py
+9
-6
No files found.
official/resnet/imagenet_main.py
View file @
eebea3f8
...
...
@@ -200,12 +200,13 @@ def input_fn(is_training,
dataset
=
dataset
.
shuffle
(
buffer_size
=
_NUM_TRAIN_FILES
)
# Convert to individual records.
# cycle_length = 10 means 10 files will be read and deserialized in parallel.
# This number is low enough to not cause too much contention on small systems
# but high enough to provide the benefits of parallelization. You may want
# to increase this number if you have a large number of CPU cores.
dataset
=
dataset
.
apply
(
tf
.
data
.
experimental
.
parallel_interleave
(
tf
.
data
.
TFRecordDataset
,
cycle_length
=
10
))
# cycle_length = 10 means that up to 10 files will be read and deserialized in
# parallel. You may want to increase this number if you have a large number of
# CPU cores.
dataset
=
dataset
.
interleave
(
tf
.
data
.
TFRecordDataset
,
cycle_length
=
10
,
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
return
resnet_run_loop
.
process_record_dataset
(
dataset
=
dataset
,
...
...
official/resnet/resnet_run_loop.py
View file @
eebea3f8
...
...
@@ -83,6 +83,11 @@ def process_record_dataset(dataset,
tf
.
compat
.
v1
.
logging
.
info
(
'datasets_num_private_threads: %s'
,
datasets_num_private_threads
)
# Disable intra-op parallelism to optimize for throughput instead of latency.
options
=
tf
.
data
.
Options
()
options
.
experimental_threading
.
max_intra_op_parallelism
=
1
dataset
=
dataset
.
with_options
(
options
)
# Prefetches a batch at a time to smooth out the time taken to load input
# files for shuffling and processing.
dataset
=
dataset
.
prefetch
(
buffer_size
=
batch_size
)
...
...
@@ -94,12 +99,10 @@ def process_record_dataset(dataset,
dataset
=
dataset
.
repeat
(
num_epochs
)
# Parses the raw records into images and labels.
dataset
=
dataset
.
apply
(
tf
.
data
.
experimental
.
map_and_batch
(
dataset
=
dataset
.
map
(
lambda
value
:
parse_record_fn
(
value
,
is_training
,
dtype
),
batch_size
=
batch_size
,
num_parallel_batches
=
num_parallel_batches
,
drop_remainder
=
False
))
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
dataset
=
dataset
.
batch
(
batch_size
,
drop_remainder
=
False
)
# Operations between the final prefetch and the get_next call to the iterator
# will happen synchronously during run time. We prefetch here again to
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment