Commit 7f99c1c3 authored by huchen's avatar huchen
Browse files

Merge branch 'dtk21.10.1_v1' into 'main'

update some TF file

See merge request dcutoolkit/deeplearing/dlexamples_new!5
parents 6b6f8b0c cf66c525
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility code for the default platform."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import tempfile
import cnn_util
from models import model_config
_ROOT_PROJECT_DIR = os.path.dirname(cnn_util.__file__)
def define_platform_params():
"""Defines platform-specific parameters.
Currently there are no platform-specific parameters to be defined.
"""
pass
def get_cluster_manager(params, config_proto):
"""Returns the cluster manager to be used."""
return cnn_util.GrpcClusterManager(params, config_proto)
def get_command_to_run_python_module(module):
"""Returns a command to run a Python module."""
python_interpretter = sys.executable
if not python_interpretter:
raise ValueError('Could not find Python interpreter')
return [python_interpretter,
os.path.join(_ROOT_PROJECT_DIR, module + '.py')]
def get_test_output_dir():
"""Returns a directory where test outputs should be placed."""
base_dir = os.environ.get('TEST_OUTPUTS_DIR',
'/tmp/tf_cnn_benchmarks_test_outputs')
if not os.path.exists(base_dir):
os.mkdir(base_dir)
return tempfile.mkdtemp(dir=base_dir)
def get_test_data_dir():
"""Returns the path to the test_data directory."""
return os.path.join(_ROOT_PROJECT_DIR, 'test_data')
def get_ssd_backborn_model_file():
raise NotImplementedError
def get_ssd_backboard_data_dir():
raise NotImplementedError
def _initialize(params, config_proto):
del params, config_proto
model_config.register_tf1_models()
_is_initalized = False
def initialize(params, config_proto):
global _is_initalized
if _is_initalized:
return
_is_initalized = True
_initialize(params, config_proto)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility code for a certain platform.
This file simply imports everything from the default platform. To switch to a
different platform, the import statement can be changed to point to a new
platform.
Creating a custom platform can be useful to, e.g., run some initialization code
required by the platform or register a platform-specific model.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from platforms.default.util import * # pylint: disable=unused-import,wildcard-import
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Image pre-processing utilities.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow.compat.v1 as tf
# pylint: disable=g-direct-tensorflow-import
import cnn_util
from tensorflow.python.data.ops import multi_device_iterator_ops
from tensorflow.python.framework import function
from tensorflow.python.layers import utils
from tensorflow.python.ops import data_flow_ops
from tensorflow.python.platform import gfile
import mlperf
def parse_example_proto(example_serialized):
"""Parses an Example proto containing a training example of an image.
The output of the build_image_data.py image preprocessing script is a dataset
containing serialized Example protocol buffers. Each Example proto contains
the following fields:
image/height: 462
image/width: 581
image/colorspace: 'RGB'
image/channels: 3
image/class/label: 615
image/class/synset: 'n03623198'
image/class/text: 'knee pad'
image/object/bbox/xmin: 0.1
image/object/bbox/xmax: 0.9
image/object/bbox/ymin: 0.2
image/object/bbox/ymax: 0.6
image/object/bbox/label: 615
image/format: 'JPEG'
image/filename: 'ILSVRC2012_val_00041207.JPEG'
image/encoded: <JPEG encoded string>
Args:
example_serialized: scalar Tensor tf.string containing a serialized
Example protocol buffer.
Returns:
image_buffer: Tensor tf.string containing the contents of a JPEG file.
label: Tensor tf.int32 containing the label.
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged as
[ymin, xmin, ymax, xmax].
text: Tensor tf.string containing the human-readable label.
"""
# Dense features in Example proto.
feature_map = {
'image/encoded': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64,
default_value=-1),
'image/class/text': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
}
sparse_float32 = tf.VarLenFeature(dtype=tf.float32)
# Sparse features in Example proto.
feature_map.update(
{k: sparse_float32 for k in ['image/object/bbox/xmin',
'image/object/bbox/ymin',
'image/object/bbox/xmax',
'image/object/bbox/ymax']})
features = tf.parse_single_example(example_serialized, feature_map)
label = tf.cast(features['image/class/label'], dtype=tf.int32)
xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0)
ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0)
xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0)
ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)
# Note that we impose an ordering of (y, x) just to make life difficult.
bbox = tf.concat([ymin, xmin, ymax, xmax], 0)
# Force the variable number of bounding boxes into the shape
# [1, num_boxes, coords].
bbox = tf.expand_dims(bbox, 0)
bbox = tf.transpose(bbox, [0, 2, 1])
return features['image/encoded'], label, bbox, features['image/class/text']
_RESIZE_METHOD_MAP = {
'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR,
'bilinear': tf.image.ResizeMethod.BILINEAR,
'bicubic': tf.image.ResizeMethod.BICUBIC,
'area': tf.image.ResizeMethod.AREA
}
def get_image_resize_method(resize_method, batch_position=0):
"""Get tensorflow resize method.
If resize_method is 'round_robin', return different methods based on batch
position in a round-robin fashion. NOTE: If the batch size is not a multiple
of the number of methods, then the distribution of methods will not be
uniform.
Args:
resize_method: (string) nearest, bilinear, bicubic, area, or round_robin.
batch_position: position of the image in a batch. NOTE: this argument can
be an integer or a tensor
Returns:
one of resize type defined in tf.image.ResizeMethod.
"""
if resize_method != 'round_robin':
return _RESIZE_METHOD_MAP[resize_method]
# return a resize method based on batch position in a round-robin fashion.
resize_methods = list(_RESIZE_METHOD_MAP.values())
def lookup(index):
return resize_methods[index]
def resize_method_0():
return utils.smart_cond(batch_position % len(resize_methods) == 0,
lambda: lookup(0), resize_method_1)
def resize_method_1():
return utils.smart_cond(batch_position % len(resize_methods) == 1,
lambda: lookup(1), resize_method_2)
def resize_method_2():
return utils.smart_cond(batch_position % len(resize_methods) == 2,
lambda: lookup(2), lambda: lookup(3))
# NOTE(jsimsa): Unfortunately, we cannot use a single recursive function here
# because TF would not be able to construct a finite graph.
return resize_method_0()
def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32):
"""Decode a JPEG string into one 3-D float image Tensor.
Args:
image_buffer: scalar string Tensor.
scope: Optional scope for op_scope.
Returns:
3-D float Tensor with values ranging from [0, 1).
"""
# with tf.op_scope([image_buffer], scope, 'decode_jpeg'):
# with tf.name_scope(scope, 'decode_jpeg', [image_buffer]):
with tf.name_scope(scope or 'decode_jpeg'):
# Decode the string as an RGB JPEG.
# Note that the resulting image contains an unknown height and width
# that is set dynamically by decode_jpeg. In other words, the height
# and width of image is unknown at compile-time.
image = tf.image.decode_jpeg(image_buffer, channels=3,
fancy_upscaling=False,
dct_method='INTEGER_FAST')
# image = tf.Print(image, [tf.shape(image)], 'Image shape: ')
return image
_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94
_CHANNEL_MEANS = [_R_MEAN, _G_MEAN, _B_MEAN]
def normalized_image(images):
# Rescale from [0, 255] to [0, 2]
images = tf.multiply(images, 1. / 127.5)
# Rescale to [-1, 1]
mlperf.logger.log(key=mlperf.tags.INPUT_MEAN_SUBTRACTION, value=[1.0] * 3)
return tf.subtract(images, 1.0)
def eval_image(image,
height,
width,
batch_position,
resize_method,
summary_verbosity=0):
"""Get the image for model evaluation.
We preprocess the image simiarly to Slim, see
https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/vgg_preprocessing.py
Validation images do not have bounding boxes, so to crop the image, we first
resize the image such that the aspect ratio is maintained and the resized
height and width are both at least 1.145 times `height` and `width`
respectively. Then, we do a central crop to size (`height`, `width`).
Args:
image: 3-D float Tensor representing the image.
height: The height of the image that will be returned.
width: The width of the image that will be returned.
batch_position: position of the image in a batch, which affects how images
are distorted and resized. NOTE: this argument can be an integer or a
tensor
resize_method: one of the strings 'round_robin', 'nearest', 'bilinear',
'bicubic', or 'area'.
summary_verbosity: Verbosity level for summary ops. Pass 0 to disable both
summaries and checkpoints.
Returns:
An image of size (output_height, output_width, 3) that is resized and
cropped as described above.
"""
# TODO(reedwm): Currently we resize then crop. Investigate if it's faster to
# crop then resize.
with tf.name_scope('eval_image'):
if summary_verbosity >= 3:
tf.summary.image(
'original_image', tf.expand_dims(image, 0))
shape = tf.shape(image)
image_height = shape[0]
image_width = shape[1]
image_height_float = tf.cast(image_height, tf.float32)
image_width_float = tf.cast(image_width, tf.float32)
# This value is chosen so that in resnet, images are cropped to a size of
# 256 x 256, which matches what other implementations do. The final image
# size for resnet is 224 x 224, and floor(224 * 1.145) = 256.
scale_factor = 1.145
# Compute resize_height and resize_width to be the minimum values such that
# 1. The aspect ratio is maintained (i.e. resize_height / resize_width is
# image_height / image_width), and
# 2. resize_height >= height * `scale_factor`, and
# 3. resize_width >= width * `scale_factor`
max_ratio = tf.maximum(height / image_height_float,
width / image_width_float)
resize_height = tf.cast(image_height_float * max_ratio * scale_factor,
tf.int32)
resize_width = tf.cast(image_width_float * max_ratio * scale_factor,
tf.int32)
mlperf.logger.log_input_resize_aspect_preserving(height, width,
scale_factor)
# Resize the image to shape (`resize_height`, `resize_width`)
image_resize_method = get_image_resize_method(resize_method, batch_position)
distorted_image = tf.image.resize_images(image,
[resize_height, resize_width],
image_resize_method,
align_corners=False)
# Do a central crop of the image to size (height, width).
# MLPerf requires us to log (height, width) with two different keys.
mlperf.logger.log(key=mlperf.tags.INPUT_CENTRAL_CROP, value=[height, width])
mlperf.logger.log(key=mlperf.tags.INPUT_RESIZE, value=[height, width])
total_crop_height = (resize_height - height)
crop_top = total_crop_height // 2
total_crop_width = (resize_width - width)
crop_left = total_crop_width // 2
distorted_image = tf.slice(distorted_image, [crop_top, crop_left, 0],
[height, width, 3])
distorted_image.set_shape([height, width, 3])
if summary_verbosity >= 3:
tf.summary.image(
'cropped_resized_image', tf.expand_dims(distorted_image, 0))
image = distorted_image
return image
def train_image(image_buffer,
height,
width,
bbox,
batch_position,
resize_method,
distortions,
scope=None,
summary_verbosity=0,
distort_color_in_yiq=False,
fuse_decode_and_crop=False):
"""Distort one image for training a network.
Distorting images provides a useful technique for augmenting the data
set during training in order to make the network invariant to aspects
of the image that do not effect the label.
Args:
image_buffer: scalar string Tensor representing the raw JPEG image buffer.
height: integer
width: integer
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged
as [ymin, xmin, ymax, xmax].
batch_position: position of the image in a batch, which affects how images
are distorted and resized. NOTE: this argument can be an integer or a
tensor
resize_method: round_robin, nearest, bilinear, bicubic, or area.
distortions: If true, apply full distortions for image colors.
scope: Optional scope for op_scope.
summary_verbosity: Verbosity level for summary ops. Pass 0 to disable both
summaries and checkpoints.
distort_color_in_yiq: distort color of input images in YIQ space.
fuse_decode_and_crop: fuse the decode/crop operation.
Returns:
3-D float Tensor of distorted image used for training.
"""
# with tf.op_scope([image, height, width, bbox], scope, 'distort_image'):
# with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]):
with tf.name_scope(scope or 'distort_image'):
# A large fraction of image datasets contain a human-annotated bounding box
# delineating the region of the image containing the object of interest. We
# choose to create a new bounding box for the object which is a randomly
# distorted version of the human-annotated bounding box that obeys an
# allowed range of aspect ratios, sizes and overlap with the human-annotated
# bounding box. If no box is supplied, then we assume the bounding box is
# the entire image.
min_object_covered = 0.1
aspect_ratio_range = [0.75, 1.33]
area_range = [0.05, 1.0]
max_attempts = 100
mlperf.logger.log(key=mlperf.tags.INPUT_DISTORTED_CROP_MIN_OBJ_COV,
value=min_object_covered)
mlperf.logger.log(key=mlperf.tags.INPUT_DISTORTED_CROP_RATIO_RANGE,
value=aspect_ratio_range)
mlperf.logger.log(key=mlperf.tags.INPUT_DISTORTED_CROP_AREA_RANGE,
value=area_range)
mlperf.logger.log(key=mlperf.tags.INPUT_DISTORTED_CROP_MAX_ATTEMPTS,
value=max_attempts)
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
tf.image.extract_jpeg_shape(image_buffer),
bounding_boxes=bbox,
min_object_covered=min_object_covered,
aspect_ratio_range=aspect_ratio_range,
area_range=area_range,
max_attempts=max_attempts,
use_image_if_no_bounding_boxes=True)
bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
if summary_verbosity >= 3:
image = tf.image.decode_jpeg(image_buffer, channels=3,
dct_method='INTEGER_FAST')
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image_with_distorted_box = tf.image.draw_bounding_boxes(
tf.expand_dims(image, 0), distort_bbox)
tf.summary.image(
'images_with_distorted_bounding_box',
image_with_distorted_box)
# Crop the image to the specified bounding box.
if fuse_decode_and_crop:
offset_y, offset_x, _ = tf.unstack(bbox_begin)
target_height, target_width, _ = tf.unstack(bbox_size)
crop_window = tf.stack([offset_y, offset_x, target_height, target_width])
image = tf.image.decode_and_crop_jpeg(
image_buffer, crop_window, channels=3)
else:
image = tf.image.decode_jpeg(image_buffer, channels=3,
dct_method='INTEGER_FAST')
image = tf.slice(image, bbox_begin, bbox_size)
mlperf.logger.log(key=mlperf.tags.INPUT_RANDOM_FLIP)
distorted_image = tf.image.random_flip_left_right(image)
# This resizing operation may distort the images because the aspect
# ratio is not respected.
mlperf.logger.log(key=mlperf.tags.INPUT_RESIZE, value=[height, width])
image_resize_method = get_image_resize_method(resize_method, batch_position)
distorted_image = tf.image.resize_images(
distorted_image, [height, width],
image_resize_method,
align_corners=False)
# Restore the shape since the dynamic slice based upon the bbox_size loses
# the third dimension.
distorted_image.set_shape([height, width, 3])
if summary_verbosity >= 3:
tf.summary.image('cropped_resized_maybe_flipped_image',
tf.expand_dims(distorted_image, 0))
if distortions:
distorted_image = tf.cast(distorted_image, dtype=tf.float32)
# Images values are expected to be in [0,1] for color distortion.
distorted_image /= 255.
# Randomly distort the colors.
distorted_image = distort_color(distorted_image, batch_position,
distort_color_in_yiq=distort_color_in_yiq)
# Note: This ensures the scaling matches the output of eval_image
distorted_image *= 255
if summary_verbosity >= 3:
tf.summary.image(
'final_distorted_image',
tf.expand_dims(distorted_image, 0))
return distorted_image
def distort_color(image, batch_position=0, distort_color_in_yiq=False,
scope=None):
"""Distort the color of the image.
Each color distortion is non-commutative and thus ordering of the color ops
matters. Ideally we would randomly permute the ordering of the color ops.
Rather then adding that level of complication, we select a distinct ordering
of color ops based on the position of the image in a batch.
Args:
image: float32 Tensor containing single image. Tensor values should be in
range [0, 1].
batch_position: the position of the image in a batch. NOTE: this argument
can be an integer or a tensor
distort_color_in_yiq: distort color of input images in YIQ space.
scope: Optional scope for op_scope.
Returns:
color-distorted image
"""
if distort_color_in_yiq:
try:
from tensorflow.contrib.image.python.ops import distort_image_ops # pylint: disable=g-import-not-at-top
except ImportError:
raise ValueError(
'In TF2, you cannot pass --distortions unless you also pass '
'--nodistort_color_in_yiq. This is because the random_hsv_in_yiq was '
'removed in TF2. --distortions does not improve accuracy on resnet '
'so it is not recommended. --nodistort_color_in_yiq also has no '
'impact on accuracy, but may hurt performance.')
with tf.name_scope(scope or 'distort_color'):
def distort_fn_0(image=image):
"""Variant 0 of distort function."""
image = tf.image.random_brightness(image, max_delta=32. / 255.)
if distort_color_in_yiq:
image = distort_image_ops.random_hsv_in_yiq(
image, lower_saturation=0.5, upper_saturation=1.5,
max_delta_hue=0.2 * math.pi)
else:
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
return image
def distort_fn_1(image=image):
"""Variant 1 of distort function."""
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
if distort_color_in_yiq:
image = distort_image_ops.random_hsv_in_yiq(
image, lower_saturation=0.5, upper_saturation=1.5,
max_delta_hue=0.2 * math.pi)
else:
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
return image
image = utils.smart_cond(batch_position % 2 == 0, distort_fn_0,
distort_fn_1)
# The random_* ops do not necessarily clamp.
image = tf.clip_by_value(image, 0.0, 1.0)
return image
class InputPreprocessor(object):
"""Base class for all model preprocessors."""
def __init__(self, batch_size, output_shapes):
self.batch_size = batch_size
self.output_shapes = output_shapes
def supports_datasets(self):
"""Whether this preprocessor supports dataset."""
return False
def minibatch(self, dataset, subset, params, shift_ratio=-1):
"""Returns tensors representing a minibatch of all the input."""
raise NotImplementedError('Must be implemented by subclass.')
# The methods added below are only supported/used if supports_datasets()
# returns True.
# TODO(laigd): refactor benchmark_cnn.py and put the logic of
# _build_input_processing() into InputPreprocessor.
def parse_and_preprocess(self, value, batch_position):
"""Function to parse and preprocess an Example proto in input pipeline."""
raise NotImplementedError('Must be implemented by subclass.')
# TODO(laigd): figure out how to remove these parameters, since the
# preprocessor itself has self.batch_size, self.num_splits, etc defined.
def build_multi_device_iterator(self, batch_size, num_splits, cpu_device,
params, gpu_devices, dataset, doing_eval):
"""Creates a MultiDeviceIterator."""
assert self.supports_datasets()
assert num_splits == len(gpu_devices)
with tf.name_scope('batch_processing'):
if doing_eval:
subset = 'validation'
else:
subset = 'train'
batch_size_per_split = batch_size // num_splits
ds = self.create_dataset(
batch_size,
num_splits,
batch_size_per_split,
dataset,
subset,
train=(not doing_eval),
datasets_repeat_cached_sample=params.datasets_repeat_cached_sample,
num_threads=params.datasets_num_private_threads,
datasets_use_caching=params.datasets_use_caching,
datasets_parallel_interleave_cycle_length=(
params.datasets_parallel_interleave_cycle_length),
datasets_sloppy_parallel_interleave=(
params.datasets_sloppy_parallel_interleave),
datasets_parallel_interleave_prefetch=(
params.datasets_parallel_interleave_prefetch))
multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
ds,
gpu_devices,
source_device=cpu_device,
max_buffer_size=params.multi_device_iterator_max_buffer_size)
tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS,
multi_device_iterator.initializer)
return multi_device_iterator
def create_dataset(self,
batch_size,
num_splits,
batch_size_per_split,
dataset,
subset,
train,
datasets_repeat_cached_sample,
num_threads=None,
datasets_use_caching=False,
datasets_parallel_interleave_cycle_length=None,
datasets_sloppy_parallel_interleave=False,
datasets_parallel_interleave_prefetch=None):
"""Creates a dataset for the benchmark."""
raise NotImplementedError('Must be implemented by subclass.')
def create_iterator(self, ds):
ds_iterator = tf.data.make_initializable_iterator(ds)
tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS,
ds_iterator.initializer)
return ds_iterator
def minibatch_fn(self, batch_size, model_input_shapes, num_splits,
dataset, subset, train, datasets_repeat_cached_sample,
num_threads, datasets_use_caching,
datasets_parallel_interleave_cycle_length,
datasets_sloppy_parallel_interleave,
datasets_parallel_interleave_prefetch):
"""Returns a function and list of args for the fn to create a minibatch."""
assert self.supports_datasets()
batch_size_per_split = batch_size // num_splits
assert batch_size_per_split == model_input_shapes[0][0]
with tf.name_scope('batch_processing'):
ds = self.create_dataset(batch_size, num_splits, batch_size_per_split,
dataset, subset, train,
datasets_repeat_cached_sample, num_threads,
datasets_use_caching,
datasets_parallel_interleave_cycle_length,
datasets_sloppy_parallel_interleave,
datasets_parallel_interleave_prefetch)
ds_iterator = self.create_iterator(ds)
ds_iterator_string_handle = ds_iterator.string_handle()
@function.Defun(tf.string)
def _fn(h):
remote_iterator = tf.data.Iterator.from_string_handle(
h, ds_iterator.output_types, ds_iterator.output_shapes)
input_list = remote_iterator.get_next()
reshaped_input_list = [
tf.reshape(input_list[i], shape=model_input_shapes[i])
for i in range(len(input_list))
]
return reshaped_input_list
return _fn, [ds_iterator_string_handle]
class BaseImagePreprocessor(InputPreprocessor):
"""Base class for all image model preprocessors."""
def __init__(self,
batch_size,
output_shapes,
num_splits,
dtype,
train,
distortions,
resize_method,
shift_ratio=-1,
summary_verbosity=0,
distort_color_in_yiq=True,
fuse_decode_and_crop=True,
match_mlperf=False):
super(BaseImagePreprocessor, self).__init__(batch_size, output_shapes)
image_shape = output_shapes[0]
# image_shape is in form (batch_size, height, width, depth)
self.height = image_shape[1]
self.width = image_shape[2]
self.depth = image_shape[3]
self.num_splits = num_splits
self.dtype = dtype
self.train = train
self.resize_method = resize_method
self.shift_ratio = shift_ratio
self.distortions = distortions
self.distort_color_in_yiq = distort_color_in_yiq
self.fuse_decode_and_crop = fuse_decode_and_crop
if self.batch_size % self.num_splits != 0:
raise ValueError(
('batch_size must be a multiple of num_splits: '
'batch_size %d, num_splits: %d') %
(self.batch_size, self.num_splits))
self.batch_size_per_split = self.batch_size // self.num_splits
self.summary_verbosity = summary_verbosity
self.match_mlperf = match_mlperf
def parse_and_preprocess(self, value, batch_position):
assert self.supports_datasets()
image_buffer, label_index, bbox, _ = parse_example_proto(value)
if self.match_mlperf:
bbox = tf.zeros((1, 0, 4), dtype=bbox.dtype)
mlperf.logger.log(key=mlperf.tags.INPUT_CROP_USES_BBOXES, value=False)
else:
mlperf.logger.log(key=mlperf.tags.INPUT_CROP_USES_BBOXES, value=True)
image = self.preprocess(image_buffer, bbox, batch_position)
return (image, label_index)
def preprocess(self, image_buffer, bbox, batch_position):
raise NotImplementedError('Must be implemented by subclass.')
def create_dataset(self,
batch_size,
num_splits,
batch_size_per_split,
dataset,
subset,
train,
datasets_repeat_cached_sample,
num_threads=None,
datasets_use_caching=False,
datasets_parallel_interleave_cycle_length=None,
datasets_sloppy_parallel_interleave=False,
datasets_parallel_interleave_prefetch=None):
"""Creates a dataset for the benchmark."""
assert self.supports_datasets()
glob_pattern = dataset.tf_record_pattern(subset)
file_names = gfile.Glob(glob_pattern)
if not file_names:
raise ValueError('Found no files in --data_dir matching: {}'
.format(glob_pattern))
ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=train)
ds = ds.apply(
tf.data.experimental.parallel_interleave(
tf.data.TFRecordDataset,
cycle_length=datasets_parallel_interleave_cycle_length or 10,
sloppy=datasets_sloppy_parallel_interleave,
prefetch_input_elements=datasets_parallel_interleave_prefetch))
if datasets_repeat_cached_sample:
# Repeat a single sample element indefinitely to emulate memory-speed IO.
ds = ds.take(1).cache().repeat()
counter = tf.data.Dataset.range(batch_size)
counter = counter.repeat()
ds = tf.data.Dataset.zip((ds, counter))
ds = ds.prefetch(buffer_size=batch_size)
if datasets_use_caching:
ds = ds.cache()
if train:
buffer_size = 10000
mlperf.logger.log(key=mlperf.tags.INPUT_SHARD, value=buffer_size)
ds = ds.apply(
tf.data.experimental.shuffle_and_repeat(buffer_size=buffer_size))
else:
ds = ds.repeat()
ds = ds.apply(
tf.data.experimental.map_and_batch(
map_func=self.parse_and_preprocess,
batch_size=batch_size_per_split,
num_parallel_batches=num_splits))
ds = ds.prefetch(buffer_size=num_splits)
if num_threads:
options = tf.data.Options()
options.experimental_threading.private_threadpool_size = num_threads
ds = ds.with_options(options)
return ds
class RecordInputImagePreprocessor(BaseImagePreprocessor):
"""Preprocessor for images with RecordInput format."""
def preprocess(self, image_buffer, bbox, batch_position):
"""Preprocessing image_buffer as a function of its batch position."""
if self.train:
image = train_image(image_buffer, self.height, self.width, bbox,
batch_position, self.resize_method, self.distortions,
None, summary_verbosity=self.summary_verbosity,
distort_color_in_yiq=self.distort_color_in_yiq,
fuse_decode_and_crop=self.fuse_decode_and_crop)
else:
image = tf.image.decode_jpeg(
image_buffer, channels=3, dct_method='INTEGER_FAST')
image = eval_image(image, self.height, self.width, batch_position,
self.resize_method,
summary_verbosity=self.summary_verbosity)
# Note: image is now float32 [height,width,3] with range [0, 255]
# image = tf.cast(image, tf.uint8) # HACK TESTING
if self.match_mlperf:
mlperf.logger.log(key=mlperf.tags.INPUT_MEAN_SUBTRACTION,
value=_CHANNEL_MEANS)
normalized = image - _CHANNEL_MEANS
else:
normalized = normalized_image(image)
return tf.cast(normalized, self.dtype)
def minibatch(self,
dataset,
subset,
params,
shift_ratio=-1):
if shift_ratio < 0:
shift_ratio = self.shift_ratio
with tf.name_scope('batch_processing'):
# Build final results per split.
images = [[] for _ in range(self.num_splits)]
labels = [[] for _ in range(self.num_splits)]
if params.use_datasets:
ds = self.create_dataset(
self.batch_size, self.num_splits, self.batch_size_per_split,
dataset, subset, self.train,
datasets_repeat_cached_sample=params.datasets_repeat_cached_sample,
num_threads=params.datasets_num_private_threads,
datasets_use_caching=params.datasets_use_caching,
datasets_parallel_interleave_cycle_length=(
params.datasets_parallel_interleave_cycle_length),
datasets_sloppy_parallel_interleave=(
params.datasets_sloppy_parallel_interleave),
datasets_parallel_interleave_prefetch=(
params.datasets_parallel_interleave_prefetch))
ds_iterator = self.create_iterator(ds)
for d in xrange(self.num_splits):
images[d], labels[d] = ds_iterator.get_next()
# TODO(laigd): consider removing the --use_datasets option, it should
# always use datasets.
else:
record_input = data_flow_ops.RecordInput(
file_pattern=dataset.tf_record_pattern(subset),
seed=301,
parallelism=64,
buffer_size=10000,
batch_size=self.batch_size,
shift_ratio=shift_ratio,
name='record_input')
records = record_input.get_yield_op()
records = tf.split(records, self.batch_size, 0)
records = [tf.reshape(record, []) for record in records]
for idx in xrange(self.batch_size):
value = records[idx]
(image, label) = self.parse_and_preprocess(value, idx)
split_index = idx % self.num_splits
labels[split_index].append(label)
images[split_index].append(image)
for split_index in xrange(self.num_splits):
if not params.use_datasets:
images[split_index] = tf.parallel_stack(images[split_index])
labels[split_index] = tf.concat(labels[split_index], 0)
images[split_index] = tf.reshape(
images[split_index],
shape=[self.batch_size_per_split, self.height, self.width,
self.depth])
labels[split_index] = tf.reshape(labels[split_index],
[self.batch_size_per_split])
return images, labels
def supports_datasets(self):
return True
class ImagenetPreprocessor(RecordInputImagePreprocessor):
def preprocess(self, image_buffer, bbox, batch_position):
# pylint: disable=g-import-not-at-top
try:
from official.r1.resnet.imagenet_preprocessing import preprocess_image
except ImportError:
tf.logging.fatal('Please include tensorflow/models to the PYTHONPATH.')
raise
if self.train:
image = preprocess_image(
image_buffer, bbox, self.height, self.width, self.depth,
is_training=True)
else:
image = preprocess_image(
image_buffer, bbox, self.height, self.width, self.depth,
is_training=False)
return tf.cast(image, self.dtype)
class Cifar10ImagePreprocessor(BaseImagePreprocessor):
"""Preprocessor for Cifar10 input images."""
def _distort_image(self, image):
"""Distort one image for training a network.
Adopted the standard data augmentation scheme that is widely used for
this dataset: the images are first zero-padded with 4 pixels on each side,
then randomly cropped to again produce distorted images; half of the images
are then horizontally mirrored.
Args:
image: input image.
Returns:
distorted image.
"""
image = tf.image.resize_image_with_crop_or_pad(
image, self.height + 8, self.width + 8)
distorted_image = tf.random_crop(image,
[self.height, self.width, self.depth])
# Randomly flip the image horizontally.
distorted_image = tf.image.random_flip_left_right(distorted_image)
if self.summary_verbosity >= 3:
tf.summary.image('distorted_image', tf.expand_dims(distorted_image, 0))
return distorted_image
def _eval_image(self, image):
"""Get the image for model evaluation."""
distorted_image = tf.image.resize_image_with_crop_or_pad(
image, self.width, self.height)
if self.summary_verbosity >= 3:
tf.summary.image('cropped.image', tf.expand_dims(distorted_image, 0))
return distorted_image
def preprocess(self, raw_image):
"""Preprocessing raw image."""
if self.summary_verbosity >= 3:
tf.summary.image('raw.image', tf.expand_dims(raw_image, 0))
if self.train and self.distortions:
image = self._distort_image(raw_image)
else:
image = self._eval_image(raw_image)
normalized = normalized_image(image)
return tf.cast(normalized, self.dtype)
def minibatch(self,
dataset,
subset,
params,
shift_ratio=-1):
# TODO(jsimsa): Implement datasets code path
del shift_ratio, params
with tf.name_scope('batch_processing'):
all_images, all_labels = dataset.read_data_files(subset)
all_images = tf.constant(all_images)
all_labels = tf.constant(all_labels)
input_image, input_label = tf.train.slice_input_producer(
[all_images, all_labels])
input_image = tf.cast(input_image, self.dtype)
input_label = tf.cast(input_label, tf.int32)
# Ensure that the random shuffling has good mixing properties.
min_fraction_of_examples_in_queue = 0.4
min_queue_examples = int(dataset.num_examples_per_epoch(subset) *
min_fraction_of_examples_in_queue)
raw_images, raw_labels = tf.train.shuffle_batch(
[input_image, input_label], batch_size=self.batch_size,
capacity=min_queue_examples + 3 * self.batch_size,
min_after_dequeue=min_queue_examples)
images = [[] for i in range(self.num_splits)]
labels = [[] for i in range(self.num_splits)]
# Create a list of size batch_size, each containing one image of the
# batch. Without the unstack call, raw_images[i] would still access the
# same image via a strided_slice op, but would be slower.
raw_images = tf.unstack(raw_images, axis=0)
raw_labels = tf.unstack(raw_labels, axis=0)
for i in xrange(self.batch_size):
split_index = i % self.num_splits
# The raw image read from data has the format [depth, height, width]
# reshape to the format returned by minibatch.
raw_image = tf.reshape(raw_images[i],
[dataset.depth, dataset.height, dataset.width])
raw_image = tf.transpose(raw_image, [1, 2, 0])
image = self.preprocess(raw_image)
images[split_index].append(image)
labels[split_index].append(raw_labels[i])
for split_index in xrange(self.num_splits):
images[split_index] = tf.parallel_stack(images[split_index])
labels[split_index] = tf.parallel_stack(labels[split_index])
return images, labels
class COCOPreprocessor(BaseImagePreprocessor):
"""Preprocessor for COCO dataset input images, boxes, and labels."""
def minibatch(self,
dataset,
subset,
params,
shift_ratio=-1):
del shift_ratio # Not used when using datasets instead of data_flow_ops
with tf.name_scope('batch_processing'):
ds = self.create_dataset(
batch_size=self.batch_size,
num_splits=self.num_splits,
batch_size_per_split=self.batch_size_per_split,
dataset=dataset,
subset=subset,
train=self.train,
datasets_repeat_cached_sample=params.datasets_repeat_cached_sample,
num_threads=params.datasets_num_private_threads,
datasets_use_caching=params.datasets_use_caching,
datasets_parallel_interleave_cycle_length=(
params.datasets_parallel_interleave_cycle_length),
datasets_sloppy_parallel_interleave=(
params.datasets_sloppy_parallel_interleave),
datasets_parallel_interleave_prefetch=(
params.datasets_parallel_interleave_prefetch))
ds_iterator = self.create_iterator(ds)
# Training data: 4 tuple
# Validation data: 5 tuple
# See get_input_shapes in models/ssd_model.py for details.
input_len = 4 if subset == 'train' else 5
input_lists = [[None for _ in range(self.num_splits)]
for _ in range(input_len)]
for d in xrange(self.num_splits):
input_list = ds_iterator.get_next()
for i in range(input_len):
input_lists[i][d] = input_list[i]
return input_lists
def preprocess(self, data):
try:
import ssd_dataloader # pylint: disable=g-import-not-at-top
import ssd_constants # pylint: disable=g-import-not-at-top
from object_detection.core import preprocessor # pylint: disable=g-import-not-at-top
except ImportError:
raise ImportError('To use the COCO dataset, you must clone the '
'repo https://github.com/tensorflow/models and add '
'tensorflow/models and tensorflow/models/research to '
'the PYTHONPATH, and compile the protobufs by '
'following https://github.com/tensorflow/models/blob/'
'master/research/object_detection/g3doc/installation.md'
'#protobuf-compilation')
image_buffer = data['image_buffer']
boxes = data['groundtruth_boxes']
classes = tf.reshape(data['groundtruth_classes'], [-1, 1])
source_id = tf.string_to_number(data['source_id'])
raw_shape = data['raw_shape']
ssd_encoder = ssd_dataloader.Encoder()
# Only 80 of the 90 COCO classes are used.
class_map = tf.convert_to_tensor(ssd_constants.CLASS_MAP)
classes = tf.gather(class_map, classes)
classes = tf.cast(classes, dtype=tf.float32)
if self.train:
image, boxes, classes = ssd_dataloader.ssd_decode_and_crop(
image_buffer, boxes, classes, raw_shape)
# ssd_crop resizes and returns image of dtype float32 and does not change
# its range (i.e., value in between 0--255). Divide by 255. converts it
# to [0, 1] range. Not doing this before cropping to avoid dtype cast
# (which incurs additional memory copy).
image /= 255.
image, boxes = preprocessor.random_horizontal_flip(
image=image, boxes=boxes)
# Random horizontal flip probability is 50%
# See https://github.com/tensorflow/models/blob/master/research/object_detection/core/preprocessor.py # pylint: disable=line-too-long
mlperf.logger.log(key=mlperf.tags.RANDOM_FLIP_PROBABILITY, value=0.5)
image = tf.cast(image, self.dtype)
encoded_returns = ssd_encoder.encode_labels(boxes, classes)
encoded_classes, encoded_boxes, num_matched_boxes = encoded_returns
# Shape of image: [width, height, channel]
# Shape of encoded_boxes: [NUM_SSD_BOXES, 4]
# Shape of encoded_classes: [NUM_SSD_BOXES, 1]
# Shape of num_matched_boxes: [1]
return (image, encoded_boxes, encoded_classes, num_matched_boxes)
else:
image = tf.image.decode_jpeg(image_buffer)
image = tf.image.resize_images(
image, size=(ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE))
# resize_image returns image of dtype float32 and does not change its
# range. Divide by 255 to convert image to [0, 1] range.
image /= 255.
image = ssd_dataloader.normalize_image(image)
image = tf.cast(image, self.dtype)
def trim_and_pad(inp_tensor):
"""Limit the number of boxes, and pad if necessary."""
inp_tensor = inp_tensor[:ssd_constants.MAX_NUM_EVAL_BOXES]
num_pad = ssd_constants.MAX_NUM_EVAL_BOXES - tf.shape(inp_tensor)[0]
inp_tensor = tf.pad(inp_tensor, [[0, num_pad], [0, 0]])
return tf.reshape(inp_tensor, [ssd_constants.MAX_NUM_EVAL_BOXES,
inp_tensor.get_shape()[1]])
boxes, classes = trim_and_pad(boxes), trim_and_pad(classes)
# Shape of boxes: [MAX_NUM_EVAL_BOXES, 4]
# Shape of classes: [MAX_NUM_EVAL_BOXES, 1]
# Shape of source_id: [] (scalar tensor)
# Shape of raw_shape: [3]
return (image, boxes, classes, source_id, raw_shape)
def create_dataset(self,
batch_size,
num_splits,
batch_size_per_split,
dataset,
subset,
train,
datasets_repeat_cached_sample,
num_threads=None,
datasets_use_caching=False,
datasets_parallel_interleave_cycle_length=None,
datasets_sloppy_parallel_interleave=False,
datasets_parallel_interleave_prefetch=None):
"""Creates a dataset for the benchmark."""
try:
import ssd_dataloader # pylint: disable=g-import-not-at-top
except ImportError:
raise ImportError('To use the COCO dataset, you must clone the '
'repo https://github.com/tensorflow/models and add '
'tensorflow/models and tensorflow/models/research to '
'the PYTHONPATH, and compile the protobufs by '
'following https://github.com/tensorflow/models/blob/'
'master/research/object_detection/g3doc/installation.md'
'#protobuf-compilation')
assert self.supports_datasets()
glob_pattern = dataset.tf_record_pattern(subset)
ds = tf.data.TFRecordDataset.list_files(glob_pattern, shuffle=train)
# TODO(haoyuzhang): Enable map+filter fusion after cl/218399112 in release
# options = tf.data.Options()
# options.experimental_optimization = tf.data.experimental.OptimizationOptions() # pylint: disable=line-too-long
# options.experimental_optimization.map_and_filter_fusion = True
# ds = ds.with_options(options)
ds = ds.apply(
tf.data.experimental.parallel_interleave(
tf.data.TFRecordDataset,
cycle_length=datasets_parallel_interleave_cycle_length or 10,
sloppy=datasets_sloppy_parallel_interleave))
mlperf.logger.log(key=mlperf.tags.INPUT_ORDER)
if datasets_repeat_cached_sample:
# Repeat a single sample element indefinitely to emulate memory-speed IO.
ds = ds.take(1).cache().repeat()
ds = ds.prefetch(buffer_size=batch_size)
if datasets_use_caching:
ds = ds.cache()
if train:
ds = ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=10000))
mlperf.logger.log(key=mlperf.tags.INPUT_SHARD, value=10000)
mlperf.logger.log(key=mlperf.tags.INPUT_ORDER)
else:
ds = ds.repeat()
ds = ds.map(ssd_dataloader.ssd_parse_example_proto, num_parallel_calls=64)
ds = ds.filter(
lambda data: tf.greater(tf.shape(data['groundtruth_boxes'])[0], 0))
ds = ds.apply(
tf.data.experimental.map_and_batch(
map_func=self.preprocess,
batch_size=batch_size_per_split,
num_parallel_batches=num_splits,
drop_remainder=train))
ds = ds.prefetch(buffer_size=num_splits)
if num_threads:
options = tf.data.Options()
options.experimental_threading.private_threadpool_size = num_threads
ds = ds.with_options(options)
return ds
def supports_datasets(self):
return True
class TestImagePreprocessor(BaseImagePreprocessor):
"""Preprocessor used for testing.
set_fake_data() sets which images and labels will be output by minibatch(),
and must be called before minibatch(). This allows tests to easily specify
a set of images to use for training, without having to create any files.
Queue runners must be started for this preprocessor to work.
"""
def __init__(self,
batch_size,
output_shapes,
num_splits,
dtype,
train=None,
distortions=None,
resize_method=None,
shift_ratio=0,
summary_verbosity=0,
distort_color_in_yiq=False,
fuse_decode_and_crop=False,
match_mlperf=False):
super(TestImagePreprocessor, self).__init__(
batch_size, output_shapes, num_splits, dtype, train, distortions,
resize_method, shift_ratio, summary_verbosity=summary_verbosity,
distort_color_in_yiq=distort_color_in_yiq,
fuse_decode_and_crop=fuse_decode_and_crop, match_mlperf=match_mlperf)
self.expected_subset = None
def set_fake_data(self, fake_images, fake_labels):
assert len(fake_images.shape) == 4
assert len(fake_labels.shape) == 1
num_images = fake_images.shape[0]
assert num_images == fake_labels.shape[0]
assert num_images % self.batch_size == 0
self.fake_images = fake_images
self.fake_labels = fake_labels
def minibatch(self,
dataset,
subset,
params,
shift_ratio=0):
"""Get test image batches."""
del dataset, params
if (not hasattr(self, 'fake_images') or
not hasattr(self, 'fake_labels')):
raise ValueError('Must call set_fake_data() before calling minibatch '
'on TestImagePreprocessor')
if self.expected_subset is not None:
assert subset == self.expected_subset
shift_ratio = shift_ratio or self.shift_ratio
fake_images = cnn_util.roll_numpy_batches(self.fake_images, self.batch_size,
shift_ratio)
fake_labels = cnn_util.roll_numpy_batches(self.fake_labels, self.batch_size,
shift_ratio)
with tf.name_scope('batch_processing'):
image_slice, label_slice = tf.train.slice_input_producer(
[fake_images, fake_labels],
shuffle=False,
name='image_slice')
raw_images, raw_labels = tf.train.batch(
[image_slice, label_slice], batch_size=self.batch_size,
name='image_batch')
images = [[] for _ in range(self.num_splits)]
labels = [[] for _ in range(self.num_splits)]
for i in xrange(self.batch_size):
split_index = i % self.num_splits
raw_image = tf.cast(raw_images[i], self.dtype)
images[split_index].append(raw_image)
labels[split_index].append(raw_labels[i])
for split_index in xrange(self.num_splits):
images[split_index] = tf.parallel_stack(images[split_index])
labels[split_index] = tf.parallel_stack(labels[split_index])
normalized = [normalized_image(part) for part in images]
return [[tf.cast(part, self.dtype) for part in normalized], labels]
class LibrispeechPreprocessor(InputPreprocessor):
"""Preprocessor for librispeech class for all image model preprocessors."""
def __init__(self, batch_size, output_shapes, num_splits, dtype, train,
**kwargs):
del kwargs
super(LibrispeechPreprocessor, self).__init__(batch_size, output_shapes)
self.num_splits = num_splits
self.dtype = dtype
self.is_train = train
if self.batch_size % self.num_splits != 0:
raise ValueError(('batch_size must be a multiple of num_splits: '
'batch_size %d, num_splits: %d') % (self.batch_size,
self.num_splits))
self.batch_size_per_split = self.batch_size // self.num_splits
def create_dataset(self,
batch_size,
num_splits,
batch_size_per_split,
dataset,
subset,
train,
datasets_repeat_cached_sample,
num_threads=None,
datasets_use_caching=False,
datasets_parallel_interleave_cycle_length=None,
datasets_sloppy_parallel_interleave=False,
datasets_parallel_interleave_prefetch=None):
"""Creates a dataset for the benchmark."""
# TODO(laigd): currently the only difference between this and the one in
# BaseImagePreprocessor is, this uses map() and padded_batch() while the
# latter uses tf.data.experimental.map_and_batch(). Try to merge them.
assert self.supports_datasets()
glob_pattern = dataset.tf_record_pattern(subset)
file_names = gfile.Glob(glob_pattern)
if not file_names:
raise ValueError('Found no files in --data_dir matching: {}'
.format(glob_pattern))
ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=train)
ds = ds.apply(
tf.data.experimental.parallel_interleave(
tf.data.TFRecordDataset,
cycle_length=datasets_parallel_interleave_cycle_length or 10,
sloppy=datasets_sloppy_parallel_interleave,
prefetch_input_elements=datasets_parallel_interleave_prefetch))
if datasets_repeat_cached_sample:
# Repeat a single sample element indefinitely to emulate memory-speed IO.
ds = ds.take(1).cache().repeat()
counter = tf.data.Dataset.range(batch_size)
counter = counter.repeat()
ds = tf.data.Dataset.zip((ds, counter))
ds = ds.prefetch(buffer_size=batch_size)
if datasets_use_caching:
ds = ds.cache()
if train:
ds = ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=10000))
else:
ds = ds.repeat()
ds = ds.map(map_func=self.parse_and_preprocess,
num_parallel_calls=batch_size_per_split*num_splits)
ds = ds.padded_batch(
batch_size=batch_size_per_split,
padded_shapes=tuple([
tf.TensorShape(output_shape[1:])
for output_shape in self.output_shapes
]),
drop_remainder=True)
ds = ds.prefetch(buffer_size=num_splits)
if num_threads:
options = tf.data.Options()
options.experimental_threading.private_threadpool_size = num_threads
ds = ds.with_options(options)
return ds
def minibatch(self, dataset, subset, params, shift_ratio=-1):
assert params.use_datasets
# TODO(laigd): unify this with CNNModel's minibatch()
# TODO(laigd): in distributed mode we use shift_ratio so different workers
# won't work on same inputs, so we should respect that.
del shift_ratio
with tf.name_scope('batch_processing'):
ds = self.create_dataset(
self.batch_size,
self.num_splits,
self.batch_size_per_split,
dataset,
subset,
self.is_train,
datasets_repeat_cached_sample=params.datasets_repeat_cached_sample,
num_threads=params.datasets_num_private_threads,
datasets_use_caching=params.datasets_use_caching,
datasets_parallel_interleave_cycle_length=(
params.datasets_parallel_interleave_cycle_length),
datasets_sloppy_parallel_interleave=(
params.datasets_sloppy_parallel_interleave),
datasets_parallel_interleave_prefetch=(
params.datasets_parallel_interleave_prefetch))
ds_iterator = self.create_iterator(ds)
# The four lists are: input spectrogram feature, labels, input lengths,
# label lengths
input_lists = [[None for _ in range(self.num_splits)] for _ in range(4)]
for d in xrange(self.num_splits):
input_list = ds_iterator.get_next()
for i in range(4):
input_lists[i][d] = input_list[i]
assert self.output_shapes == [
input_lists[i][0].shape.as_list() for i in range(4)
]
return tuple(input_lists)
def supports_datasets(self):
return True
def parse_and_preprocess(self, value, batch_position):
"""Parse an TFRecord."""
del batch_position
assert self.supports_datasets()
context_features = {
'labels': tf.VarLenFeature(dtype=tf.int64),
'input_length': tf.FixedLenFeature([], dtype=tf.int64),
'label_length': tf.FixedLenFeature([], dtype=tf.int64),
}
sequence_features = {
'features': tf.FixedLenSequenceFeature([161], dtype=tf.float32)
}
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=value,
context_features=context_features,
sequence_features=sequence_features,
)
return [
# Input
tf.expand_dims(sequence_parsed['features'], axis=2),
# Label
tf.cast(
tf.reshape(
tf.sparse_tensor_to_dense(context_parsed['labels']), [-1]),
dtype=tf.int32),
# Input length
tf.cast(
tf.reshape(context_parsed['input_length'], [1]),
dtype=tf.int32),
# Label length
tf.cast(
tf.reshape(context_parsed['label_length'], [1]),
dtype=tf.int32),
]
#!/bin/bash
source /public/home/qianyj/virtualenv/dtk21.10.1/dtk21.10.1_tf1.15/venv/bin/activate
export ROCM_PATH=/public/home/qianyj/package/dtk-21.10.1/dtk-21.10.1
export HIP_PATH=${ROCM_PATH}/hip
export CPACK_INSTLL_PREFIX=$ROCM_PATH
export AMDGPU_TARGETS="gfx900;gfx906"
export PATH=${ROCM_PATH}/bin:${ROCM_PATH}/llvm/bin:${ROCM_PATH}/hip/bin:$PATH
export LD_LIBRARY_PATH=${ROCM_PATH}/lib:${ROCM_PATH}/lib64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=${ROCM_PATH}/hip/lib:${ROCM_PATH}/llvm/lib:$LD_LIBRARY_PATH
export C_INCLUDE_PATH=${ROCM_PATH}/include:${ROCM_PATH}/llvm/include${C_INCLUDE_PATH:+:${C_INCLUDE_PATH}}
export CPLUS_INCLUDE_PATH=${ROCM_PATH}/include:${ROCM_PATH}/llvm/include${CPLUS_INCLUDE_PATH:+:${CPLUS_INCLUDE_PATH}}
export HSA_FORCE_FINE_GRAIN_PCIE=1
export MIOPEN_FIND_MODE=3
export TF_CPP_MIN_VLOG_LEVEL=2
HIP_VISIBLE_DEVICES=0,1,2,3 numactl --cpunodebind=0,1,2,3 --membind=0,1,2,3 nohup python3 tf_cnn_benchmarks.py --data_format=NCHW --batch_size=128 --model=resnet50 --save_model_steps=20000 --optimizer=momentum --variable_update=replicated --print_training_accuracy=true --eval_during_training_every_n_epochs=1 --nodistortions --num_gpus=4 --num_epochs=90 --weight_decay=1e-4 --data_dir=/public/software/apps/DeepLearning/Data/ImageNet-tensorflow/ --use_fp16=False --data_name=imagenet --train_dir=/public/home/qianyj/TF_test/dtk21.10.1/tf1.15/benchmarks-master/scripts/checkpoint >logfile 2>&1 &
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs the tf_cnn_benchmarks tests."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import unittest
from absl import app
from absl import flags as absl_flags
import tensorflow.compat.v1 as tf
import all_reduce_benchmark_test
import allreduce_test
import benchmark_cnn_distributed_test
import benchmark_cnn_test
import cnn_util_test
import variable_mgr_util_test
from models import model_config
# Ideally, we wouldn't need this option, and run both distributed tests and non-
# distributed tests. But, TensorFlow allocates all the GPU memory by default, so
# the non-distributed tests allocate all the GPU memory. The distributed tests
# spawn processes that run TensorFlow, and cannot run if all the GPU memory is
# already allocated. If a non-distributed test is run, then a distributed test
# is run in the same process, the distributed test will fail because there is no
# more GPU memory for the spawned processes to allocate.
absl_flags.DEFINE_boolean('run_distributed_tests', False,
'If True, run the distributed tests. If False, the'
'non-distributed tests.')
absl_flags.DEFINE_boolean('full_tests', False,
'If True, all distributed or non-distributed tests '
'are run, which can take hours. If False, only a '
'subset of tests will be run. This subset runs much '
'faster and tests almost all the functionality as '
'the full set of tests, so it is recommended to keep '
'this option set to False.')
FLAGS = absl_flags.FLAGS
def main(_):
loader = unittest.defaultTestLoader
if FLAGS.full_tests:
suite = unittest.TestSuite([
loader.loadTestsFromModule(allreduce_test),
loader.loadTestsFromModule(cnn_util_test),
loader.loadTestsFromModule(variable_mgr_util_test),
loader.loadTestsFromModule(benchmark_cnn_test),
loader.loadTestsFromModule(all_reduce_benchmark_test),
])
if model_config.can_import_contrib:
from models.tf1_only import nasnet_test # pylint: disable=g-import-not-at-top
suite.addTest(loader.loadTestsFromModule(nasnet_test))
dist_suite = unittest.TestSuite([
loader.loadTestsFromModule(benchmark_cnn_distributed_test),
])
else:
suite = unittest.TestSuite([
loader.loadTestsFromModule(allreduce_test),
loader.loadTestsFromModule(cnn_util_test),
loader.loadTestsFromModule(all_reduce_benchmark_test),
loader.loadTestsFromModule(variable_mgr_util_test),
loader.loadTestsFromTestCase(benchmark_cnn_test.TestAlexnetModel),
loader.loadTestsFromTestCase(benchmark_cnn_test.TfCnnBenchmarksTest),
loader.loadTestsFromTestCase(benchmark_cnn_test.VariableUpdateTest),
loader.loadTestsFromTestCase(
benchmark_cnn_test.VariableMgrLocalReplicatedTest),
])
dist_suite = unittest.TestSuite([
loader.loadTestsFromNames([
'benchmark_cnn_distributed_test.DistributedVariableUpdateTest'
'.testVarUpdateDefault',
'benchmark_cnn_distributed_test.TfCnnBenchmarksDistributedTest'
'.testParameterServer',
]),
])
if FLAGS.run_distributed_tests:
print('Running distributed tests')
result = unittest.TextTestRunner(verbosity=2).run(dist_suite)
else:
print('Running non-distributed tests')
result = unittest.TextTestRunner(verbosity=2).run(suite)
sys.exit(not result.wasSuccessful())
if __name__ == '__main__':
tf.disable_v2_behavior()
app.run(main)
# Copyright 2018 Google. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Central location for all constants related to MLPerf SSD."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# ==============================================================================
# == Model =====================================================================
# ==============================================================================
IMAGE_SIZE = 300
# TODO(taylorrobie): MLPerf uses 80, but COCO documents 90. (RetinaNet uses 90)
# Update(taylorrobie): Labels > 81 show up in the pipeline. This will need to
# be resolved.
NUM_CLASSES = 81 # Including "no class". Not all COCO classes are used.
# Note: Zero is special. (Background class) CLASS_INV_MAP[0] must be zero.
CLASS_INV_MAP = (
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87,
88, 89, 90)
_MAP = {j: i for i, j in enumerate(CLASS_INV_MAP)}
CLASS_MAP = tuple(_MAP.get(i, -1) for i in range(max(CLASS_INV_MAP) + 1))
NUM_SSD_BOXES = 8732
RESNET_DEPTH = 34
"""SSD specific"""
MIN_LEVEL = 3
MAX_LEVEL = 8
FEATURE_SIZES = (38, 19, 10, 5, 3, 1)
STEPS = (8, 16, 32, 64, 100, 300)
# https://github.com/amdegroot/ssd.pytorch/blob/master/data/config.py
SCALES = (21, 45, 99, 153, 207, 261, 315)
ASPECT_RATIOS = ((2,), (2, 3), (2, 3), (2, 3), (2,), (2,))
NUM_DEFAULTS = (4, 6, 6, 6, 4, 4)
NUM_DEFAULTS_BY_LEVEL = {3: 4, 4: 6, 5: 6, 6: 6, 7: 4, 8: 4}
SCALE_XY = 0.1
SCALE_HW = 0.2
BOX_CODER_SCALES = (1 / SCALE_XY, 1 / SCALE_XY, 1 / SCALE_HW, 1 / SCALE_HW)
MATCH_THRESHOLD = 0.5
# https://discuss.pytorch.org/t/how-to-preprocess-input-for-pre-trained-networks/683
NORMALIZATION_MEAN = (0.485, 0.456, 0.406)
NORMALIZATION_STD = (0.229, 0.224, 0.225)
# SSD Cropping
NUM_CROP_PASSES = 50
CROP_MIN_IOU_CHOICES = (0, 0.1, 0.3, 0.5, 0.7, 0.9)
P_NO_CROP_PER_PASS = 1 / (len(CROP_MIN_IOU_CHOICES) + 1)
# Hard example mining
NEGS_PER_POSITIVE = 3
# Batch normalization
BATCH_NORM_DECAY = 0.997
BATCH_NORM_EPSILON = 1e-4
# ==============================================================================
# == Optimizer =================================================================
# ==============================================================================
LEARNING_RATE_SCHEDULE = (
(0, 1e-3),
(160000, 1e-4),
(200000, 1e-5),
)
MOMENTUM = 0.9
WEIGHT_DECAY = 5e-4
# ==============================================================================
# == Keys ======================================================================
# ==============================================================================
BOXES = "boxes"
CLASSES = "classes"
NUM_MATCHED_BOXES = "num_matched_boxes"
IMAGE = "image"
SOURCE_ID = "source_id"
RAW_SHAPE = "raw_shape"
PRED_BOXES = "pred_boxes"
PRED_SCORES = "pred_scores"
# ==============================================================================
# == Evaluation ================================================================
# ==============================================================================
# Note: This is based on a batch size of 32
# https://github.com/mlperf/reference/blob/master/single_stage_detector/ssd/train.py#L21-L37
CHECKPOINT_FREQUENCY = 20000
MAX_NUM_EVAL_BOXES = 200
OVERLAP_CRITERIA = 0.5 # Used for nonmax supression
MIN_SCORE = 0.05 # Minimum score to be considered during evaluation.
DUMMY_SCORE = -1e5 # If no boxes are matched.
ANNOTATION_FILE = "annotations/instances_val2017.json"
COCO_NUM_TRAIN_IMAGES = 118287
COCO_NUM_VAL_IMAGES = 4952
# Copyright 2018 Google. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Data loader and processing."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import itertools as it
import math
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.box_coders import faster_rcnn_box_coder
from object_detection.core import box_list
from object_detection.core import region_similarity_calculator
from object_detection.core import target_assigner
from object_detection.matchers import argmax_matcher
import mlperf
import ssd_constants
class DefaultBoxes(object):
"""Default bounding boxes for 300x300 5 layer SSD.
Default bounding boxes generation follows the order of (W, H, anchor_sizes).
Therefore, the tensor converted from DefaultBoxes has a shape of
[anchor_sizes, H, W, 4]. The last dimension is the box coordinates; 'ltrb'
is [ymin, xmin, ymax, xmax] while 'xywh' is [cy, cx, h, w].
"""
def __init__(self):
fk = ssd_constants.IMAGE_SIZE / np.array(ssd_constants.STEPS)
self.default_boxes = []
# size of feature and number of feature
for idx, feature_size in enumerate(ssd_constants.FEATURE_SIZES):
sk1 = ssd_constants.SCALES[idx] / ssd_constants.IMAGE_SIZE
sk2 = ssd_constants.SCALES[idx+1] / ssd_constants.IMAGE_SIZE
sk3 = math.sqrt(sk1*sk2)
all_sizes = [(sk1, sk1), (sk3, sk3)]
for alpha in ssd_constants.ASPECT_RATIOS[idx]:
w, h = sk1 * math.sqrt(alpha), sk1 / math.sqrt(alpha)
all_sizes.append((w, h))
all_sizes.append((h, w))
assert len(all_sizes) == ssd_constants.NUM_DEFAULTS[idx]
for w, h in all_sizes:
for i, j in it.product(range(feature_size), repeat=2):
cx, cy = (j + 0.5) / fk[idx], (i + 0.5) / fk[idx]
box = tuple(np.clip(k, 0, 1) for k in (cy, cx, h, w))
self.default_boxes.append(box)
assert len(self.default_boxes) == ssd_constants.NUM_SSD_BOXES
mlperf.logger.log(key=mlperf.tags.FEATURE_SIZES,
value=ssd_constants.FEATURE_SIZES)
mlperf.logger.log(key=mlperf.tags.STEPS,
value=ssd_constants.STEPS)
mlperf.logger.log(key=mlperf.tags.SCALES,
value=ssd_constants.SCALES)
mlperf.logger.log(key=mlperf.tags.ASPECT_RATIOS,
value=ssd_constants.ASPECT_RATIOS)
mlperf.logger.log(key=mlperf.tags.NUM_DEFAULTS,
value=ssd_constants.NUM_SSD_BOXES)
def to_ltrb(cy, cx, h, w):
return cy - h / 2, cx - w / 2, cy + h / 2, cx + w / 2
# For IoU calculation
self.default_boxes_ltrb = tuple(to_ltrb(*i) for i in self.default_boxes)
def __call__(self, order='ltrb'):
if order == 'ltrb': return self.default_boxes_ltrb
if order == 'xywh': return self.default_boxes
def calc_iou_tensor(boxes1, boxes2):
"""Calculation of IoU based on two boxes tensor.
Reference to https://github.com/kuangliu/pytorch-ssd
Args:
boxes1: shape (N, 4), four coordinates of N boxes
boxes2: shape (M, 4), four coordinates of M boxes
Returns:
IoU: shape (N, M), IoU of the i-th box in `boxes1` and j-th box in `boxes2`
"""
b1_left, b1_top, b1_right, b1_bottom = tf.split(boxes1, 4, axis=1)
b2_left, b2_top, b2_right, b2_bottom = tf.split(boxes2, 4, axis=1)
# Shape of intersect_* (N, M)
intersect_left = tf.maximum(b1_left, tf.transpose(b2_left))
intersect_top = tf.maximum(b1_top, tf.transpose(b2_top))
intersect_right = tf.minimum(b1_right, tf.transpose(b2_right))
intersect_bottom = tf.minimum(b1_bottom, tf.transpose(b2_bottom))
boxes1_area = (b1_right - b1_left) * (b1_bottom - b1_top)
boxes2_area = (b2_right - b2_left) * (b2_bottom - b2_top)
intersect = tf.multiply(tf.maximum((intersect_right - intersect_left), 0),
tf.maximum((intersect_bottom - intersect_top), 0))
union = boxes1_area + tf.transpose(boxes2_area) - intersect
iou = intersect / union
return iou
def ssd_parse_example_proto(example_serialized):
"""Parses an Example proto containing a training example of an image.
Each Example proto contains the following fields that we care about:
image/encoded: <JPEG encoded string>
image/source_id: tf.string
image/height: tf.int64
image/width: tf.int64
image/object/bbox/xmin: tf.VarLenFeature(tf.float32)
image/object/bbox/xmax: tf.VarLenFeature(tf.float32)
image/object/bbox/ymin: tf.VarLenFeature(tf.float32
image/object/bbox/ymax: tf.VarLenFeature(tf.float32)
image/object/class/label: tf.VarLenFeature(tf.int64)
image/object/class/text: tf.VarLenFeature(tf.string)
Complete decoder can be found in:
https://github.com/tensorflow/models/blob/master/research/object_detection/data_decoders/tf_example_decoder.py
Args:
example_serialized: scalar Tensor tf.string containing a serialized
Example protocol buffer.
Returns:
A dictionary with the following key-values:
image_buffer: Tensor tf.string containing the contents of a JPEG file.
groundtruth_boxes: Tensor tf.float32 of shape [num_boxes, 4], containing
coordinates of object bounding boxes.
groundtruth_classeS: Tensor tf.int64 of shape [num_boxes, 1], containing
class labels of objects.
source_id: unique image identifier.
raw_shape: [height, width, 3].
"""
feature_map = {
'image/encoded': tf.FixedLenFeature(
(), dtype=tf.string, default_value=''),
'image/source_id': tf.FixedLenFeature((), tf.string, default_value=''),
'image/height': tf.FixedLenFeature((), tf.int64, default_value=1),
'image/width': tf.FixedLenFeature((), tf.int64, default_value=1),
'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
'image/object/class/label': tf.VarLenFeature(dtype=tf.int64),
}
features = tf.parse_single_example(example_serialized, feature_map)
xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 1)
ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 1)
xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 1)
ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 1)
image_buffer = features['image/encoded']
# Bounding box coordinates should be in ltrb order
boxes = tf.concat([ymin, xmin, ymax, xmax], 1)
classes = tf.expand_dims(features['image/object/class/label'].values, 1)
source_id = features['image/source_id']
raw_shape = tf.stack([features['image/height'], features['image/width'], 3])
return {'image_buffer': image_buffer,
'groundtruth_boxes': boxes,
'groundtruth_classes': classes,
'source_id': source_id,
'raw_shape': raw_shape}
def ssd_decode_and_crop(image_buffer, boxes, classes, raw_shape):
"""Crop image randomly and decode the cropped region.
This function will crop an image to meet the following requirements:
1. height to width ratio between 0.5 and 2;
2. IoUs of some boxes exceed specified threshold;
3. At least one box center is in the cropped region.
We defer the jpeg decoding task until after the crop to avoid wasted work.
Reference: https://github.com/chauhan-utk/ssd.DomainAdaptation
Args:
image_buffer: Tensor tf.string containing the contents of a JPEG file.
boxes: Tensor tf.float32 of shape [num_boxes, 4], containing coordinates of
object bounding boxes.
classes: Tensor tf.int64 of shape [num_boxes, 1], containing class labels
of objects.
raw_shape: [height, width, 3].
Returns:
resized_image: decoded, cropped, and resized image Tensor tf.float32 of
shape [ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE, 3], value
range 0--255.
cropped_boxes: box coordinates for objects in the cropped region.
cropped_classes: class labels for objects in the cropped region.
"""
num_boxes = tf.shape(boxes)[0]
def no_crop_check():
return (tf.random_uniform(shape=(), minval=0, maxval=1, dtype=tf.float32)
< ssd_constants.P_NO_CROP_PER_PASS)
def no_crop_proposal():
return (
tf.ones((), tf.bool),
tf.convert_to_tensor([0, 0, 1, 1], dtype=tf.float32),
tf.ones((num_boxes,), tf.bool),
)
def crop_proposal():
rand_vec = lambda minval, maxval: tf.random_uniform(
shape=(ssd_constants.NUM_CROP_PASSES, 1), minval=minval, maxval=maxval,
dtype=tf.float32)
width, height = rand_vec(0.3, 1), rand_vec(0.3, 1)
left, top = rand_vec(0, 1-width), rand_vec(0, 1-height)
right = left + width
bottom = top + height
ltrb = tf.concat([left, top, right, bottom], axis=1)
min_iou = tf.random_shuffle(ssd_constants.CROP_MIN_IOU_CHOICES)[0]
ious = calc_iou_tensor(ltrb, boxes)
# discard any bboxes whose center not in the cropped image
xc, yc = [tf.tile(0.5 * (boxes[:, i + 0] + boxes[:, i + 2])[tf.newaxis, :],
(ssd_constants.NUM_CROP_PASSES, 1)) for i in range(2)]
masks = tf.reduce_all(tf.stack([
tf.greater(xc, tf.tile(left, (1, num_boxes))),
tf.less(xc, tf.tile(right, (1, num_boxes))),
tf.greater(yc, tf.tile(top, (1, num_boxes))),
tf.less(yc, tf.tile(bottom, (1, num_boxes))),
], axis=2), axis=2)
# Checks of whether a crop is valid.
valid_aspect = tf.logical_and(tf.less(height/width, 2),
tf.less(width/height, 2))
valid_ious = tf.reduce_all(tf.greater(ious, min_iou), axis=1, keepdims=True)
valid_masks = tf.reduce_any(masks, axis=1, keepdims=True)
valid_all = tf.cast(tf.reduce_all(tf.concat(
[valid_aspect, valid_ious, valid_masks], axis=1), axis=1), tf.int32)
# One indexed, as zero is needed for the case of no matches.
index = tf.range(1, 1 + ssd_constants.NUM_CROP_PASSES, dtype=tf.int32)
# Either one-hot, or zeros if there is no valid crop.
selection = tf.equal(tf.reduce_max(index * valid_all), index)
use_crop = tf.reduce_any(selection)
output_ltrb = tf.reduce_sum(tf.multiply(ltrb, tf.tile(tf.cast(
selection, tf.float32)[:, tf.newaxis], (1, 4))), axis=0)
output_masks = tf.reduce_any(tf.logical_and(masks, tf.tile(
selection[:, tf.newaxis], (1, num_boxes))), axis=0)
return use_crop, output_ltrb, output_masks
def proposal(*args):
return tf.cond(
pred=no_crop_check(),
true_fn=no_crop_proposal,
false_fn=crop_proposal,
)
_, crop_bounds, box_masks = tf.while_loop(
cond=lambda x, *_: tf.logical_not(x),
body=proposal,
loop_vars=[tf.zeros((), tf.bool), tf.zeros((4,), tf.float32), tf.zeros((num_boxes,), tf.bool)],
)
filtered_boxes = tf.boolean_mask(boxes, box_masks, axis=0)
mlperf.logger.log(key=mlperf.tags.NUM_CROPPING_ITERATIONS,
value=ssd_constants.NUM_CROP_PASSES)
# Clip boxes to the cropped region.
filtered_boxes = tf.stack([
tf.maximum(filtered_boxes[:, 0], crop_bounds[0]),
tf.maximum(filtered_boxes[:, 1], crop_bounds[1]),
tf.minimum(filtered_boxes[:, 2], crop_bounds[2]),
tf.minimum(filtered_boxes[:, 3], crop_bounds[3]),
], axis=1)
left = crop_bounds[0]
top = crop_bounds[1]
width = crop_bounds[2] - left
height = crop_bounds[3] - top
cropped_boxes = tf.stack([
(filtered_boxes[:, 0] - left) / width,
(filtered_boxes[:, 1] - top) / height,
(filtered_boxes[:, 2] - left) / width,
(filtered_boxes[:, 3] - top) / height,
], axis=1)
# crop_window containing integer coordinates of cropped region. A normalized
# coordinate value of y should be mapped to the image coordinate at
# y * (height - 1).
raw_shape = tf.cast(raw_shape, tf.float32)
crop_window = tf.stack([left * (raw_shape[0] - 1),
top * (raw_shape[1] - 1),
width * raw_shape[0],
height * raw_shape[1]])
crop_window = tf.cast(crop_window, tf.int32)
# Fused op only decodes the cropped portion of an image
cropped_image = tf.image.decode_and_crop_jpeg(
image_buffer, crop_window, channels=3)
# Resize converts image dtype from uint8 to float32, without rescaling values.
resized_image = tf.image.resize_images(
cropped_image, [ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE])
mlperf.logger.log(key=mlperf.tags.INPUT_SIZE,
value=ssd_constants.IMAGE_SIZE)
cropped_classes = tf.boolean_mask(classes, box_masks, axis=0)
return resized_image, cropped_boxes, cropped_classes
def color_jitter(image, brightness=0, contrast=0, saturation=0, hue=0):
"""Distort the color of the image."""
with tf.name_scope('distort_color'):
if brightness > 0:
image = tf.image.random_brightness(image, max_delta=brightness)
if contrast > 0:
image = tf.image.random_contrast(
image, lower=1-contrast, upper=1+contrast)
if saturation > 0:
image = tf.image.random_saturation(
image, lower=1-saturation, upper=1+saturation)
if hue > 0:
image = tf.image.random_hue(image, max_delta=hue)
return image
def normalize_image(images):
"""Normalize image to zero mean and unit variance.
Args:
images: a tensor representing images, at least 3-D.
Returns:
images normalized by mean and stdev.
"""
data_type = images.dtype
mean = tf.constant(ssd_constants.NORMALIZATION_MEAN, data_type)
std = tf.constant(ssd_constants.NORMALIZATION_STD, data_type)
images = tf.divide(tf.subtract(images, mean), std)
mlperf.logger.log(key=mlperf.tags.DATA_NORMALIZATION_MEAN,
value=ssd_constants.NORMALIZATION_MEAN)
mlperf.logger.log(key=mlperf.tags.DATA_NORMALIZATION_STD,
value=ssd_constants.NORMALIZATION_STD)
return images
class Encoder(object):
"""Encoder for SSD boxes and labels."""
def __init__(self):
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(
matched_threshold=ssd_constants.MATCH_THRESHOLD,
unmatched_threshold=ssd_constants.MATCH_THRESHOLD,
negatives_lower_than_unmatched=True,
force_match_for_each_row=True)
box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=ssd_constants.BOX_CODER_SCALES)
self.default_boxes = DefaultBoxes()('ltrb')
self.default_boxes = box_list.BoxList(
tf.convert_to_tensor(self.default_boxes))
self.assigner = target_assigner.TargetAssigner(
similarity_calc, matcher, box_coder)
def encode_labels(self, gt_boxes, gt_labels):
target_boxes = box_list.BoxList(gt_boxes)
encoded_classes, _, encoded_boxes, _, matches = self.assigner.assign(
self.default_boxes, target_boxes, gt_labels)
num_matched_boxes = tf.reduce_sum(
tf.cast(tf.not_equal(matches, -1), tf.float32))
return encoded_classes, encoded_boxes, num_matched_boxes
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment