Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
beeae099
Commit
beeae099
authored
Jul 10, 2017
by
Toby Boyd
Committed by
GitHub
Jul 10, 2017
Browse files
Merge pull request #1912 from tensorflow/cifar10_estimator
Replace in-memory DastaSet with FixedLengthRecordDataSet
parents
3fb07dc0
71e8adc7
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
106 additions
and
87 deletions
+106
-87
tutorials/image/cifar10_estimator/README.md
tutorials/image/cifar10_estimator/README.md
+17
-17
tutorials/image/cifar10_estimator/cifar10.py
tutorials/image/cifar10_estimator/cifar10.py
+81
-35
tutorials/image/cifar10_estimator/cifar10_main.py
tutorials/image/cifar10_estimator/cifar10_main.py
+8
-35
No files found.
tutorials/image/cifar10_estimator/README.md
View file @
beeae099
...
...
@@ -11,8 +11,8 @@ Code in this directory focuses on how to use TensorFlow Estimators to train and
2.
Download the CIFAR-10 dataset.
```
shell
curl
-o
cifar-10-
python
.tar.gz https://www.cs.toronto.edu/~kriz/cifar-10-
python
.tar.gz
tar
xzf cifar-10-
python
.tar.gz
curl
-o
cifar-10-
binary
.tar.gz https://www.cs.toronto.edu/~kriz/cifar-10-
binary
.tar.gz
tar
xzf cifar-10-
binary
.tar.gz
```
<b>
How to run:
</b>
...
...
@@ -20,22 +20,22 @@ tar xzf cifar-10-python.tar.gz
```
shell
# After running the above commands, you should see the following in the folder
# where the data is downloaded.
$
ls
-R
cifar-10-batches-
py
$
ls
-R
cifar-10-batches-
bin
cifar-10-batches-
py
:
batches.meta data_batch_
2
data_batch_
4 readme.html
data_batch_
1
data_batch_
3 data_batch_5
test_batch
cifar-10-batches-
bin
:
batches.meta
.txt
data_batch_
1.bin
data_batch_
2.bin data_batch_3.bin
data_batch_
4.bin
data_batch_
5.bin readme.html
test_batch
.bin
# Run the model on CPU only. After training, it runs the evaluation.
$
python cifar10_main.py
--data_dir
=
/prefix/to/downloaded/data/cifar-10-batches-
py
\
--model_dir
=
/tmp/
resnet_model
\
$
python cifar10_main.py
--data_dir
=
/prefix/to/downloaded/data/cifar-10-batches-
bin
\
--model_dir
=
/tmp/
cifar10
\
--is_cpu_ps
=
True
\
--num_gpus
=
0
\
--train_steps
=
1000
# Run the model on CPU and 2 CPUs. After training, it runs the evaluation.
$
python cifar10_main.py
--data_dir
=
/prefix/to/downloaded/data/cifar-10-batches-
py
\
--model_dir
=
/tmp/
resnet_model
\
$
python cifar10_main.py
--data_dir
=
/prefix/to/downloaded/data/cifar-10-batches-
bin
\
--model_dir
=
/tmp/
cifar10
\
--is_cpu_ps
=
False
\
--force_gpu_compatible
=
True
\
--num_gpus
=
2
\
...
...
tutorials/image/cifar10_estimator/cifar10.py
View file @
beeae099
...
...
@@ -27,8 +27,6 @@ import tensorflow as tf
HEIGHT
=
32
WIDTH
=
32
DEPTH
=
3
NUM_CLASSES
=
10
class
Cifar10DataSet
(
object
):
"""Cifar10 data set.
...
...
@@ -36,40 +34,88 @@ class Cifar10DataSet(object):
Described by http://www.cs.toronto.edu/~kriz/cifar.html.
"""
def
__init__
(
self
,
data_dir
):
def
__init__
(
self
,
data_dir
,
subset
=
'train'
,
use_distortion
=
True
):
self
.
data_dir
=
data_dir
self
.
subset
=
subset
self
.
use_distortion
=
use_distortion
def
read_all_data
(
self
,
subset
=
'train'
):
"""Reads from data file and return images and labels in a numpy array."""
if
subset
==
'train'
:
filenames
=
[
os
.
path
.
join
(
self
.
data_dir
,
'data_batch_%d'
%
i
)
def
get_filenames
(
self
):
if
self
.
subset
==
'train'
:
return
[
os
.
path
.
join
(
self
.
data_dir
,
'data_batch_%d.bin'
%
i
)
for
i
in
xrange
(
1
,
5
)
]
elif
subset
==
'validation'
:
filenames
=
[
os
.
path
.
join
(
self
.
data_dir
,
'data_batch_5'
)]
elif
subset
==
'eval'
:
filenames
=
[
os
.
path
.
join
(
self
.
data_dir
,
'test_batch'
)]
elif
self
.
subset
==
'validation'
:
return
[
os
.
path
.
join
(
self
.
data_dir
,
'data_batch_5
.bin
'
)]
elif
self
.
subset
==
'eval'
:
return
[
os
.
path
.
join
(
self
.
data_dir
,
'test_batch
.bin
'
)]
else
:
raise
ValueError
(
'Invalid data subset "%s"'
%
subset
)
raise
ValueError
(
'Invalid data subset "%s"'
%
self
.
subset
)
inputs
=
[]
for
filename
in
filenames
:
with
tf
.
gfile
.
Open
(
filename
,
'r'
)
as
f
:
inputs
.
append
(
cPickle
.
load
(
f
))
all_images
=
np
.
concatenate
([
each_input
[
'data'
]
for
each_input
in
inputs
]).
astype
(
np
.
float32
)
all_labels
=
np
.
concatenate
([
each_input
[
'labels'
]
for
each_input
in
inputs
])
return
all_images
,
all_labels
def
make_batch
(
self
,
batch_size
):
"""Read the images and labels from 'filenames'."""
filenames
=
self
.
get_filenames
()
record_bytes
=
(
32
*
32
*
3
)
+
1
# Repeat infinitely.
dataset
=
tf
.
contrib
.
data
.
FixedLengthRecordDataset
(
filenames
,
record_bytes
).
repeat
()
# Parse records.
dataset
=
dataset
.
map
(
self
.
parser
,
num_threads
=
batch_size
,
output_buffer_size
=
2
*
batch_size
)
# Potentially shuffle records.
if
self
.
subset
==
'train'
:
min_queue_examples
=
int
(
Cifar10DataSet
.
num_examples_per_epoch
(
self
.
subset
)
*
0.4
)
# Ensure that the capacity is sufficiently large to provide good random
# shuffling.
dataset
=
dataset
.
shuffle
(
buffer_size
=
min_queue_examples
+
3
*
batch_size
)
# Batch it up.
dataset
=
dataset
.
batch
(
batch_size
)
iterator
=
dataset
.
make_one_shot_iterator
()
image_batch
,
label_batch
=
iterator
.
get_next
()
return
image_batch
,
label_batch
@
staticmethod
def
preprocess
(
image
,
is_training
,
distortion
):
with
tf
.
name_scope
(
'preprocess'
):
# Read image layout as flattened CHW.
image
=
tf
.
reshape
(
image
,
[
DEPTH
,
HEIGHT
,
WIDTH
])
# Convert to NHWC layout, compatible with TF image preprocessing APIs
image
=
tf
.
transpose
(
image
,
[
1
,
2
,
0
])
if
is_training
and
distortion
:
def
parser
(
self
,
value
):
"""Parse a Cifar10 record from value.
Output images are in [height, width, depth] layout.
"""
# Dimensions of the images in the CIFAR-10 dataset.
# See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
# input format.
label_bytes
=
1
image_bytes
=
HEIGHT
*
WIDTH
*
DEPTH
# Every record consists of a label followed by the image, with a
# fixed number of bytes for each.
record_bytes
=
label_bytes
+
image_bytes
# Convert from a string to a vector of uint8 that is record_bytes long.
record_as_bytes
=
tf
.
decode_raw
(
value
,
tf
.
uint8
)
# The first bytes represent the label, which we convert from
# uint8->int32.
label
=
tf
.
cast
(
tf
.
strided_slice
(
record_as_bytes
,
[
0
],
[
label_bytes
]),
tf
.
int32
)
label
.
set_shape
([
1
])
# The remaining bytes after the label represent the image, which
# we reshape from [depth * height * width] to [depth, height, width].
depth_major
=
tf
.
reshape
(
tf
.
strided_slice
(
record_as_bytes
,
[
label_bytes
],
[
record_bytes
]),
[
3
,
32
,
32
])
# Convert from [depth, height, width] to [height, width, depth].
# This puts data in a compatible layout with TF image preprocessing APIs.
image
=
tf
.
transpose
(
depth_major
,
[
1
,
2
,
0
])
# Do custom preprocessing here.
image
=
self
.
preprocess
(
image
)
return
image
,
label
def
preprocess
(
self
,
image
):
"""Preprocess a single image in [height, width, depth] layout."""
if
self
.
subset
==
'train'
and
self
.
use_distortion
:
# Pad 4 pixels on each dimension of feature map, done in mini-batch
image
=
tf
.
image
.
resize_image_with_crop_or_pad
(
image
,
40
,
40
)
image
=
tf
.
random_crop
(
image
,
[
HEIGHT
,
WIDTH
,
DEPTH
])
...
...
tutorials/image/cifar10_estimator/cifar10_main.py
View file @
beeae099
...
...
@@ -302,43 +302,16 @@ def input_fn(subset, num_shards):
Returns:
two lists of tensors for features and labels, each of num_shards length.
"""
dataset
=
cifar10
.
Cifar10DataSet
(
FLAGS
.
data_dir
)
is_training
=
(
subset
==
'train'
)
if
is_training
:
if
subset
==
'train'
:
batch_size
=
FLAGS
.
train_batch_size
el
se
:
el
if
subset
==
'validate'
or
subset
==
'eval'
:
batch_size
=
FLAGS
.
eval_batch_size
with
tf
.
device
(
'/cpu:0'
),
tf
.
name_scope
(
'batching'
):
# CPU loads all data from disk since there're only 60k 32*32 RGB images.
all_images
,
all_labels
=
dataset
.
read_all_data
(
subset
)
dataset
=
tf
.
contrib
.
data
.
Dataset
.
from_tensor_slices
(
(
all_images
,
all_labels
))
dataset
=
dataset
.
map
(
lambda
x
,
y
:
(
tf
.
cast
(
x
,
tf
.
float32
),
tf
.
cast
(
y
,
tf
.
int32
)),
num_threads
=
2
,
output_buffer_size
=
batch_size
)
# Image preprocessing.
def
_preprocess
(
image
,
label
):
# If GPU is available, NHWC to NCHW transpose is done in ResNetCifar10
# class, not included in preprocessing.
return
cifar10
.
Cifar10DataSet
.
preprocess
(
image
,
is_training
,
FLAGS
.
use_distortion_for_training
),
label
dataset
=
dataset
.
map
(
_preprocess
,
num_threads
=
batch_size
,
output_buffer_size
=
2
*
batch_size
)
# Repeat infinitely.
dataset
=
dataset
.
repeat
()
if
is_training
:
min_fraction_of_examples_in_queue
=
0.4
min_queue_examples
=
int
(
cifar10
.
Cifar10DataSet
.
num_examples_per_epoch
(
subset
)
*
min_fraction_of_examples_in_queue
)
# Ensure that the capacity is sufficiently large to provide good random
# shuffling
dataset
=
dataset
.
shuffle
(
buffer_size
=
min_queue_examples
+
3
*
batch_size
)
dataset
=
dataset
.
batch
(
batch_size
)
iterator
=
dataset
.
make_one_shot_iterator
()
image_batch
,
label_batch
=
iterator
.
get_next
()
else
:
raise
ValueError
(
'Subset must be one of
\'
train
\'
,
\'
validate
\'
and
\'
eval
\'
'
)
with
tf
.
device
(
'/cpu:0'
):
use_distortion
=
subset
==
'train'
and
FLAGS
.
use_distortion_for_training
dataset
=
cifar10
.
Cifar10DataSet
(
FLAGS
.
data_dir
,
subset
,
use_distortion
)
image_batch
,
label_batch
=
dataset
.
make_batch
(
batch_size
)
if
num_shards
<=
1
:
# No GPU available or only 1 GPU.
return
[
image_batch
],
[
label_batch
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment