Unverified Commit 97760186 authored by Jonathan Huang's avatar Jonathan Huang Committed by GitHub
Browse files

Merge pull request #4460 from pkulzc/master

Release evaluation code for OI Challenge 2018 and minor fixes. 
parents ed901b73 a703fc0c
...@@ -56,15 +56,26 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None, ...@@ -56,15 +56,26 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None,
else: else:
height, width = spatial_image_shape # pylint: disable=unpacking-non-sequence height, width = spatial_image_shape # pylint: disable=unpacking-non-sequence
num_additional_channels = 0
if fields.InputDataFields.image_additional_channels in dataset.output_shapes:
num_additional_channels = dataset.output_shapes[
fields.InputDataFields.image_additional_channels].dims[2].value
padding_shapes = { padding_shapes = {
fields.InputDataFields.image: [height, width, 3], # Additional channels are merged before batching.
fields.InputDataFields.image: [
height, width, 3 + num_additional_channels
],
fields.InputDataFields.image_additional_channels: [
height, width, num_additional_channels
],
fields.InputDataFields.source_id: [], fields.InputDataFields.source_id: [],
fields.InputDataFields.filename: [], fields.InputDataFields.filename: [],
fields.InputDataFields.key: [], fields.InputDataFields.key: [],
fields.InputDataFields.groundtruth_difficult: [max_num_boxes], fields.InputDataFields.groundtruth_difficult: [max_num_boxes],
fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4], fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4],
fields.InputDataFields.groundtruth_instance_masks: [max_num_boxes, height, fields.InputDataFields.groundtruth_instance_masks: [
width], max_num_boxes, height, width
],
fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes], fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes],
fields.InputDataFields.groundtruth_group_of: [max_num_boxes], fields.InputDataFields.groundtruth_group_of: [max_num_boxes],
fields.InputDataFields.groundtruth_area: [max_num_boxes], fields.InputDataFields.groundtruth_area: [max_num_boxes],
...@@ -74,7 +85,8 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None, ...@@ -74,7 +85,8 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None,
fields.InputDataFields.groundtruth_label_scores: [max_num_boxes], fields.InputDataFields.groundtruth_label_scores: [max_num_boxes],
fields.InputDataFields.true_image_shape: [3], fields.InputDataFields.true_image_shape: [3],
fields.InputDataFields.multiclass_scores: [ fields.InputDataFields.multiclass_scores: [
max_num_boxes, num_classes + 1 if num_classes is not None else None], max_num_boxes, num_classes + 1 if num_classes is not None else None
],
} }
# Determine whether groundtruth_classes are integers or one-hot encodings, and # Determine whether groundtruth_classes are integers or one-hot encodings, and
# apply batching appropriately. # apply batching appropriately.
...@@ -90,7 +102,9 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None, ...@@ -90,7 +102,9 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None,
'rank 2 tensor (one-hot encodings)') 'rank 2 tensor (one-hot encodings)')
if fields.InputDataFields.original_image in dataset.output_shapes: if fields.InputDataFields.original_image in dataset.output_shapes:
padding_shapes[fields.InputDataFields.original_image] = [None, None, 3] padding_shapes[fields.InputDataFields.original_image] = [
None, None, 3 + num_additional_channels
]
if fields.InputDataFields.groundtruth_keypoints in dataset.output_shapes: if fields.InputDataFields.groundtruth_keypoints in dataset.output_shapes:
tensor_shape = dataset.output_shapes[fields.InputDataFields. tensor_shape = dataset.output_shapes[fields.InputDataFields.
groundtruth_keypoints] groundtruth_keypoints]
...@@ -108,9 +122,13 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None, ...@@ -108,9 +122,13 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None,
for tensor_key, _ in dataset.output_shapes.items()} for tensor_key, _ in dataset.output_shapes.items()}
def build(input_reader_config, transform_input_data_fn=None, def build(input_reader_config,
batch_size=None, max_num_boxes=None, num_classes=None, transform_input_data_fn=None,
spatial_image_shape=None): batch_size=None,
max_num_boxes=None,
num_classes=None,
spatial_image_shape=None,
num_additional_channels=0):
"""Builds a tf.data.Dataset. """Builds a tf.data.Dataset.
Builds a tf.data.Dataset by applying the `transform_input_data_fn` on all Builds a tf.data.Dataset by applying the `transform_input_data_fn` on all
...@@ -128,6 +146,7 @@ def build(input_reader_config, transform_input_data_fn=None, ...@@ -128,6 +146,7 @@ def build(input_reader_config, transform_input_data_fn=None,
spatial_image_shape: A list of two integers of the form [height, width] spatial_image_shape: A list of two integers of the form [height, width]
containing expected spatial shape of the image after applying containing expected spatial shape of the image after applying
transform_input_data_fn. If None, will use dynamic shapes. transform_input_data_fn. If None, will use dynamic shapes.
num_additional_channels: Number of additional channels to use in the input.
Returns: Returns:
A tf.data.Dataset based on the input_reader_config. A tf.data.Dataset based on the input_reader_config.
...@@ -152,7 +171,9 @@ def build(input_reader_config, transform_input_data_fn=None, ...@@ -152,7 +171,9 @@ def build(input_reader_config, transform_input_data_fn=None,
decoder = tf_example_decoder.TfExampleDecoder( decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=input_reader_config.load_instance_masks, load_instance_masks=input_reader_config.load_instance_masks,
instance_mask_type=input_reader_config.mask_type, instance_mask_type=input_reader_config.mask_type,
label_map_proto_file=label_map_proto_file) label_map_proto_file=label_map_proto_file,
use_display_name=input_reader_config.use_display_name,
num_additional_channels=num_additional_channels)
def process_fn(value): def process_fn(value):
processed = decoder.decode(value) processed = decoder.decode(value)
......
...@@ -30,49 +30,50 @@ from object_detection.utils import dataset_util ...@@ -30,49 +30,50 @@ from object_detection.utils import dataset_util
class DatasetBuilderTest(tf.test.TestCase): class DatasetBuilderTest(tf.test.TestCase):
def create_tf_record(self): def create_tf_record(self, has_additional_channels=False):
path = os.path.join(self.get_temp_dir(), 'tfrecord') path = os.path.join(self.get_temp_dir(), 'tfrecord')
writer = tf.python_io.TFRecordWriter(path) writer = tf.python_io.TFRecordWriter(path)
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
additional_channels_tensor = np.random.randint(
255, size=(4, 5, 1)).astype(np.uint8)
flat_mask = (4 * 5) * [1.0] flat_mask = (4 * 5) * [1.0]
with self.test_session(): with self.test_session():
encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
encoded_additional_channels_jpeg = tf.image.encode_jpeg(
tf.constant(additional_channels_tensor)).eval()
features = {
'image/encoded':
feature_pb2.Feature(
bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])),
'image/format':
feature_pb2.Feature(
bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')])
),
'image/height':
feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[4])),
'image/width':
feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[5])),
'image/object/bbox/xmin':
feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[0.0])),
'image/object/bbox/xmax':
feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[1.0])),
'image/object/bbox/ymin':
feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[0.0])),
'image/object/bbox/ymax':
feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[1.0])),
'image/object/class/label':
feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[2])),
'image/object/mask':
feature_pb2.Feature(
float_list=feature_pb2.FloatList(value=flat_mask)),
}
if has_additional_channels:
features['image/additional_channels/encoded'] = feature_pb2.Feature(
bytes_list=feature_pb2.BytesList(
value=[encoded_additional_channels_jpeg] * 2))
example = example_pb2.Example( example = example_pb2.Example(
features=feature_pb2.Features( features=feature_pb2.Features(feature=features))
feature={
'image/encoded':
feature_pb2.Feature(
bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])),
'image/format':
feature_pb2.Feature(
bytes_list=feature_pb2.BytesList(
value=['jpeg'.encode('utf-8')])),
'image/height':
feature_pb2.Feature(
int64_list=feature_pb2.Int64List(value=[4])),
'image/width':
feature_pb2.Feature(
int64_list=feature_pb2.Int64List(value=[5])),
'image/object/bbox/xmin':
feature_pb2.Feature(
float_list=feature_pb2.FloatList(value=[0.0])),
'image/object/bbox/xmax':
feature_pb2.Feature(
float_list=feature_pb2.FloatList(value=[1.0])),
'image/object/bbox/ymin':
feature_pb2.Feature(
float_list=feature_pb2.FloatList(value=[0.0])),
'image/object/bbox/ymax':
feature_pb2.Feature(
float_list=feature_pb2.FloatList(value=[1.0])),
'image/object/class/label':
feature_pb2.Feature(
int64_list=feature_pb2.Int64List(value=[2])),
'image/object/mask':
feature_pb2.Feature(
float_list=feature_pb2.FloatList(value=flat_mask)),
}))
writer.write(example.SerializeToString()) writer.write(example.SerializeToString())
writer.close() writer.close()
...@@ -218,6 +219,31 @@ class DatasetBuilderTest(tf.test.TestCase): ...@@ -218,6 +219,31 @@ class DatasetBuilderTest(tf.test.TestCase):
[2, 2, 4, 5], [2, 2, 4, 5],
output_dict[fields.InputDataFields.groundtruth_instance_masks].shape) output_dict[fields.InputDataFields.groundtruth_instance_masks].shape)
def test_build_tf_record_input_reader_with_additional_channels(self):
tf_record_path = self.create_tf_record(has_additional_channels=True)
input_reader_text_proto = """
shuffle: false
num_readers: 1
tf_record_input_reader {{
input_path: '{0}'
}}
""".format(tf_record_path)
input_reader_proto = input_reader_pb2.InputReader()
text_format.Merge(input_reader_text_proto, input_reader_proto)
tensor_dict = dataset_util.make_initializable_iterator(
dataset_builder.build(
input_reader_proto, batch_size=2,
num_additional_channels=2)).get_next()
sv = tf.train.Supervisor(logdir=self.get_temp_dir())
with sv.prepare_or_wait_for_session() as sess:
sv.start_queue_runners(sess)
output_dict = sess.run(tensor_dict)
self.assertEquals((2, 4, 5, 5),
output_dict[fields.InputDataFields.image].shape)
def test_raises_error_with_no_input_paths(self): def test_raises_error_with_no_input_paths(self):
input_reader_text_proto = """ input_reader_text_proto = """
shuffle: false shuffle: false
......
...@@ -79,12 +79,17 @@ def build(image_resizer_config): ...@@ -79,12 +79,17 @@ def build(image_resizer_config):
keep_aspect_ratio_config.max_dimension): keep_aspect_ratio_config.max_dimension):
raise ValueError('min_dimension > max_dimension') raise ValueError('min_dimension > max_dimension')
method = _tf_resize_method(keep_aspect_ratio_config.resize_method) method = _tf_resize_method(keep_aspect_ratio_config.resize_method)
per_channel_pad_value = (0, 0, 0)
if keep_aspect_ratio_config.per_channel_pad_value:
per_channel_pad_value = tuple(keep_aspect_ratio_config.
per_channel_pad_value)
image_resizer_fn = functools.partial( image_resizer_fn = functools.partial(
preprocessor.resize_to_range, preprocessor.resize_to_range,
min_dimension=keep_aspect_ratio_config.min_dimension, min_dimension=keep_aspect_ratio_config.min_dimension,
max_dimension=keep_aspect_ratio_config.max_dimension, max_dimension=keep_aspect_ratio_config.max_dimension,
method=method, method=method,
pad_to_max_dimension=keep_aspect_ratio_config.pad_to_max_dimension) pad_to_max_dimension=keep_aspect_ratio_config.pad_to_max_dimension,
per_channel_pad_value=per_channel_pad_value)
if not keep_aspect_ratio_config.convert_to_grayscale: if not keep_aspect_ratio_config.convert_to_grayscale:
return image_resizer_fn return image_resizer_fn
elif image_resizer_oneof == 'fixed_shape_resizer': elif image_resizer_oneof == 'fixed_shape_resizer':
......
...@@ -52,6 +52,9 @@ class ImageResizerBuilderTest(tf.test.TestCase): ...@@ -52,6 +52,9 @@ class ImageResizerBuilderTest(tf.test.TestCase):
min_dimension: 10 min_dimension: 10
max_dimension: 20 max_dimension: 20
pad_to_max_dimension: true pad_to_max_dimension: true
per_channel_pad_value: 3
per_channel_pad_value: 4
per_channel_pad_value: 5
} }
""" """
input_shape = (50, 25, 3) input_shape = (50, 25, 3)
......
...@@ -778,7 +778,7 @@ def to_absolute_coordinates(boxlist, ...@@ -778,7 +778,7 @@ def to_absolute_coordinates(boxlist,
height, height,
width, width,
check_range=True, check_range=True,
maximum_normalized_coordinate=1.01, maximum_normalized_coordinate=1.1,
scope=None): scope=None):
"""Converts normalized box coordinates to absolute pixel coordinates. """Converts normalized box coordinates to absolute pixel coordinates.
...@@ -792,7 +792,7 @@ def to_absolute_coordinates(boxlist, ...@@ -792,7 +792,7 @@ def to_absolute_coordinates(boxlist,
width: Maximum value for width of absolute box coordinates. width: Maximum value for width of absolute box coordinates.
check_range: If True, checks if the coordinates are normalized or not. check_range: If True, checks if the coordinates are normalized or not.
maximum_normalized_coordinate: Maximum coordinate value to be considered maximum_normalized_coordinate: Maximum coordinate value to be considered
as normalized, default to 1.01. as normalized, default to 1.1.
scope: name scope. scope: name scope.
Returns: Returns:
......
...@@ -931,6 +931,21 @@ class CoordinatesConversionTest(tf.test.TestCase): ...@@ -931,6 +931,21 @@ class CoordinatesConversionTest(tf.test.TestCase):
out = sess.run(boxlist.get()) out = sess.run(boxlist.get())
self.assertAllClose(out, coordinates) self.assertAllClose(out, coordinates)
def test_to_absolute_coordinates_maximum_coordinate_check(self):
coordinates = tf.constant([[0, 0, 1.2, 1.2],
[0.25, 0.25, 0.75, 0.75]], tf.float32)
img = tf.ones((128, 100, 100, 3))
boxlist = box_list.BoxList(coordinates)
absolute_boxlist = box_list_ops.to_absolute_coordinates(
boxlist,
tf.shape(img)[1],
tf.shape(img)[2],
maximum_normalized_coordinate=1.1)
with self.test_session() as sess:
with self.assertRaisesOpError('assertion failed'):
sess.run(absolute_boxlist.get())
class BoxRefinementTest(tf.test.TestCase): class BoxRefinementTest(tf.test.TestCase):
......
...@@ -79,10 +79,12 @@ class BoxPredictor(object): ...@@ -79,10 +79,12 @@ class BoxPredictor(object):
Returns: Returns:
A dictionary containing at least the following tensors. A dictionary containing at least the following tensors.
box_encodings: A list of float tensors of shape box_encodings: A list of float tensors. Each entry in the list
[batch_size, num_anchors_i, q, code_size] representing the location of corresponds to a feature map in the input `image_features` list. All
the objects, where q is 1 or the number of classes. Each entry in the tensors in the list have one of the two following shapes:
list corresponds to a feature map in the input `image_features` list. a. [batch_size, num_anchors_i, q, code_size] representing the location
of the objects, where q is 1 or the number of classes.
b. [batch_size, num_anchors_i, code_size].
class_predictions_with_background: A list of float tensors of shape class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class [batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a predictions for the proposals. Each entry in the list corresponds to a
...@@ -120,10 +122,12 @@ class BoxPredictor(object): ...@@ -120,10 +122,12 @@ class BoxPredictor(object):
Returns: Returns:
A dictionary containing at least the following tensors. A dictionary containing at least the following tensors.
box_encodings: A list of float tensors of shape box_encodings: A list of float tensors. Each entry in the list
[batch_size, num_anchors_i, q, code_size] representing the location of corresponds to a feature map in the input `image_features` list. All
the objects, where q is 1 or the number of classes. Each entry in the tensors in the list have one of the two following shapes:
list corresponds to a feature map in the input `image_features` list. a. [batch_size, num_anchors_i, q, code_size] representing the location
of the objects, where q is 1 or the number of classes.
b. [batch_size, num_anchors_i, code_size].
class_predictions_with_background: A list of float tensors of shape class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class [batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a predictions for the proposals. Each entry in the list corresponds to a
...@@ -765,6 +769,13 @@ class ConvolutionalBoxPredictor(BoxPredictor): ...@@ -765,6 +769,13 @@ class ConvolutionalBoxPredictor(BoxPredictor):
} }
# TODO(rathodv): Replace with slim.arg_scope_func_key once its available
# externally.
def _arg_scope_func_key(op):
"""Returns a key that can be used to index arg_scope dictionary."""
return getattr(op, '_key_op', str(op))
# TODO(rathodv): Merge the implementation with ConvolutionalBoxPredictor above # TODO(rathodv): Merge the implementation with ConvolutionalBoxPredictor above
# since they are very similar. # since they are very similar.
class WeightSharedConvolutionalBoxPredictor(BoxPredictor): class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
...@@ -773,8 +784,12 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -773,8 +784,12 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
Defines the box predictor as defined in Defines the box predictor as defined in
https://arxiv.org/abs/1708.02002. This class differs from https://arxiv.org/abs/1708.02002. This class differs from
ConvolutionalBoxPredictor in that it shares weights and biases while ConvolutionalBoxPredictor in that it shares weights and biases while
predicting from different feature maps. Separate multi-layer towers are predicting from different feature maps. However, batch_norm parameters are not
constructed for the box encoding and class predictors respectively. shared because the statistics of the activations vary among the different
feature maps.
Also note that separate multi-layer towers are constructed for the box
encoding and class predictors respectively.
""" """
def __init__(self, def __init__(self,
...@@ -833,14 +848,15 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -833,14 +848,15 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
Returns: Returns:
box_encodings: A list of float tensors of shape box_encodings: A list of float tensors of shape
[batch_size, num_anchors_i, q, code_size] representing the location of [batch_size, num_anchors_i, code_size] representing the location of
the objects, where q is 1 or the number of classes. Each entry in the the objects. Each entry in the list corresponds to a feature map in the
list corresponds to a feature map in the input `image_features` list. input `image_features` list.
class_predictions_with_background: A list of float tensors of shape class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class [batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list. feature map in the input `image_features` list.
Raises: Raises:
ValueError: If the image feature maps do not have the same number of ValueError: If the image feature maps do not have the same number of
channels or if the num predictions per locations is differs between the channels or if the num predictions per locations is differs between the
...@@ -858,15 +874,18 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -858,15 +874,18 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
'channels, found: {}'.format(feature_channels)) 'channels, found: {}'.format(feature_channels))
box_encodings_list = [] box_encodings_list = []
class_predictions_list = [] class_predictions_list = []
for (image_feature, num_predictions_per_location) in zip( for feature_index, (image_feature,
image_features, num_predictions_per_location_list): num_predictions_per_location) in enumerate(
zip(image_features,
num_predictions_per_location_list)):
# Add a slot for the background class. # Add a slot for the background class.
with tf.variable_scope('WeightSharedConvolutionalBoxPredictor', with tf.variable_scope('WeightSharedConvolutionalBoxPredictor',
reuse=tf.AUTO_REUSE): reuse=tf.AUTO_REUSE):
num_class_slots = self.num_classes + 1 num_class_slots = self.num_classes + 1
box_encodings_net = image_feature box_encodings_net = image_feature
class_predictions_net = image_feature class_predictions_net = image_feature
with slim.arg_scope(self._conv_hyperparams_fn()): with slim.arg_scope(self._conv_hyperparams_fn()) as sc:
apply_batch_norm = _arg_scope_func_key(slim.batch_norm) in sc
for i in range(self._num_layers_before_predictor): for i in range(self._num_layers_before_predictor):
box_encodings_net = slim.conv2d( box_encodings_net = slim.conv2d(
box_encodings_net, box_encodings_net,
...@@ -874,14 +893,22 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -874,14 +893,22 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
[self._kernel_size, self._kernel_size], [self._kernel_size, self._kernel_size],
stride=1, stride=1,
padding='SAME', padding='SAME',
scope='BoxEncodingPredictionTower/conv2d_{}'.format(i)) activation_fn=None,
normalizer_fn=(tf.identity if apply_batch_norm else None),
scope='BoxPredictionTower/conv2d_{}'.format(i))
if apply_batch_norm:
box_encodings_net = slim.batch_norm(
box_encodings_net,
scope='BoxPredictionTower/conv2d_{}/BatchNorm/feature_{}'.
format(i, feature_index))
box_encodings_net = tf.nn.relu6(box_encodings_net)
box_encodings = slim.conv2d( box_encodings = slim.conv2d(
box_encodings_net, box_encodings_net,
num_predictions_per_location * self._box_code_size, num_predictions_per_location * self._box_code_size,
[self._kernel_size, self._kernel_size], [self._kernel_size, self._kernel_size],
activation_fn=None, stride=1, padding='SAME', activation_fn=None, stride=1, padding='SAME',
normalizer_fn=None, normalizer_fn=None,
scope='BoxEncodingPredictor') scope='BoxPredictor')
for i in range(self._num_layers_before_predictor): for i in range(self._num_layers_before_predictor):
class_predictions_net = slim.conv2d( class_predictions_net = slim.conv2d(
...@@ -890,7 +917,15 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -890,7 +917,15 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
[self._kernel_size, self._kernel_size], [self._kernel_size, self._kernel_size],
stride=1, stride=1,
padding='SAME', padding='SAME',
activation_fn=None,
normalizer_fn=(tf.identity if apply_batch_norm else None),
scope='ClassPredictionTower/conv2d_{}'.format(i)) scope='ClassPredictionTower/conv2d_{}'.format(i))
if apply_batch_norm:
class_predictions_net = slim.batch_norm(
class_predictions_net,
scope='ClassPredictionTower/conv2d_{}/BatchNorm/feature_{}'
.format(i, feature_index))
class_predictions_net = tf.nn.relu6(class_predictions_net)
if self._use_dropout: if self._use_dropout:
class_predictions_net = slim.dropout( class_predictions_net = slim.dropout(
class_predictions_net, keep_prob=self._dropout_keep_prob) class_predictions_net, keep_prob=self._dropout_keep_prob)
...@@ -912,7 +947,7 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -912,7 +947,7 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
combined_feature_map_shape[1] * combined_feature_map_shape[1] *
combined_feature_map_shape[2] * combined_feature_map_shape[2] *
num_predictions_per_location, num_predictions_per_location,
1, self._box_code_size])) self._box_code_size]))
box_encodings_list.append(box_encodings) box_encodings_list.append(box_encodings)
class_predictions_with_background = tf.reshape( class_predictions_with_background = tf.reshape(
class_predictions_with_background, class_predictions_with_background,
......
...@@ -442,6 +442,24 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -442,6 +442,24 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.build(conv_hyperparams, is_training=True) return hyperparams_builder.build(conv_hyperparams, is_training=True)
def _build_conv_arg_scope_no_batch_norm(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.build(conv_hyperparams, is_training=True)
def test_get_boxes_for_five_aspect_ratios_per_location(self): def test_get_boxes_for_five_aspect_ratios_per_location(self):
def graph_fn(image_features): def graph_fn(image_features):
...@@ -463,7 +481,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -463,7 +481,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
(box_encodings, objectness_predictions) = self.execute( (box_encodings, objectness_predictions) = self.execute(
graph_fn, [image_features]) graph_fn, [image_features])
self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4]) self.assertAllEqual(box_encodings.shape, [4, 320, 4])
self.assertAllEqual(objectness_predictions.shape, [4, 320, 1]) self.assertAllEqual(objectness_predictions.shape, [4, 320, 1])
def test_bias_predictions_to_background_with_sigmoid_score_conversion(self): def test_bias_predictions_to_background_with_sigmoid_score_conversion(self):
...@@ -512,7 +530,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -512,7 +530,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
(box_encodings, class_predictions_with_background) = self.execute( (box_encodings, class_predictions_with_background) = self.execute(
graph_fn, [image_features]) graph_fn, [image_features])
self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4]) self.assertAllEqual(box_encodings.shape, [4, 320, 4])
self.assertAllEqual(class_predictions_with_background.shape, self.assertAllEqual(class_predictions_with_background.shape,
[4, 320, num_classes_without_background+1]) [4, 320, num_classes_without_background+1])
...@@ -543,11 +561,12 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -543,11 +561,12 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
image_features2 = np.random.rand(4, 8, 8, 64).astype(np.float32) image_features2 = np.random.rand(4, 8, 8, 64).astype(np.float32)
(box_encodings, class_predictions_with_background) = self.execute( (box_encodings, class_predictions_with_background) = self.execute(
graph_fn, [image_features1, image_features2]) graph_fn, [image_features1, image_features2])
self.assertAllEqual(box_encodings.shape, [4, 640, 1, 4]) self.assertAllEqual(box_encodings.shape, [4, 640, 4])
self.assertAllEqual(class_predictions_with_background.shape, self.assertAllEqual(class_predictions_with_background.shape,
[4, 640, num_classes_without_background+1]) [4, 640, num_classes_without_background+1])
def test_predictions_from_multiple_feature_maps_share_weights(self): def test_predictions_from_multiple_feature_maps_share_weights_not_batchnorm(
self):
num_classes_without_background = 6 num_classes_without_background = 6
def graph_fn(image_features1, image_features2): def graph_fn(image_features1, image_features2):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor( conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
...@@ -574,26 +593,95 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -574,26 +593,95 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
actual_variable_set = set( actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()]) [var.op.name for var in tf.trainable_variables()])
expected_variable_set = set([ expected_variable_set = set([
# Box prediction tower
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/BatchNorm/feature_0/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictionTower/conv2d_0/weights'), 'BoxPredictionTower/conv2d_0/BatchNorm/feature_1/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictionTower/conv2d_0/BatchNorm/beta'), 'BoxPredictionTower/conv2d_1/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictionTower/conv2d_1/weights'), 'BoxPredictionTower/conv2d_1/BatchNorm/feature_0/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictionTower/conv2d_1/BatchNorm/beta'), 'BoxPredictionTower/conv2d_1/BatchNorm/feature_1/beta'),
# Box prediction head
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictor/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictor/biases'),
# Class prediction tower
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/weights'), 'ClassPredictionTower/conv2d_0/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/BatchNorm/beta'), 'ClassPredictionTower/conv2d_0/BatchNorm/feature_0/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/BatchNorm/feature_1/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/weights'), 'ClassPredictionTower/conv2d_1/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/BatchNorm/beta'), 'ClassPredictionTower/conv2d_1/BatchNorm/feature_0/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/BatchNorm/feature_1/beta'),
# Class prediction head
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/biases')])
self.assertEqual(expected_variable_set, actual_variable_set)
def test_no_batchnorm_params_when_batchnorm_is_not_configured(self):
num_classes_without_background = 6
def graph_fn(image_features1, image_features2):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams_fn=self._build_conv_arg_scope_no_batch_norm(),
depth=32,
num_layers_before_predictor=2,
box_code_size=4)
box_predictions = conv_box_predictor.predict(
[image_features1, image_features2],
num_predictions_per_location=[5, 5],
scope='BoxPredictor')
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
class_predictions_with_background = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
return (box_encodings, class_predictions_with_background)
with self.test_session(graph=tf.Graph()):
graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32),
tf.random_uniform([4, 16, 16, 3], dtype=tf.float32))
actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()])
expected_variable_set = set([
# Box prediction tower
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/biases'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_1/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictor/weights'), 'BoxPredictionTower/conv2d_1/biases'),
# Box prediction head
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictor/biases'), 'BoxPredictor/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictor/biases'),
# Class prediction tower
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/biases'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/biases'),
# Class prediction head
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/weights'), 'ClassPredictor/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
...@@ -628,7 +716,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -628,7 +716,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
[tf.shape(box_encodings), tf.shape(objectness_predictions)], [tf.shape(box_encodings), tf.shape(objectness_predictions)],
feed_dict={image_features: feed_dict={image_features:
np.random.rand(4, resolution, resolution, 64)}) np.random.rand(4, resolution, resolution, 64)})
self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4]) self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 4])
self.assertAllEqual(objectness_predictions_shape, self.assertAllEqual(objectness_predictions_shape,
[4, expected_num_anchors, 1]) [4, expected_num_anchors, 1])
......
...@@ -2128,7 +2128,8 @@ def resize_to_range(image, ...@@ -2128,7 +2128,8 @@ def resize_to_range(image,
max_dimension=None, max_dimension=None,
method=tf.image.ResizeMethod.BILINEAR, method=tf.image.ResizeMethod.BILINEAR,
align_corners=False, align_corners=False,
pad_to_max_dimension=False): pad_to_max_dimension=False,
per_channel_pad_value=(0, 0, 0)):
"""Resizes an image so its dimensions are within the provided value. """Resizes an image so its dimensions are within the provided value.
The output size can be described by two cases: The output size can be described by two cases:
...@@ -2153,6 +2154,8 @@ def resize_to_range(image, ...@@ -2153,6 +2154,8 @@ def resize_to_range(image,
so the resulting image is of the spatial size so the resulting image is of the spatial size
[max_dimension, max_dimension]. If masks are included they are padded [max_dimension, max_dimension]. If masks are included they are padded
similarly. similarly.
per_channel_pad_value: A tuple of per-channel scalar value to use for
padding. By default pads zeros.
Returns: Returns:
Note that the position of the resized_image_shape changes based on whether Note that the position of the resized_image_shape changes based on whether
...@@ -2181,8 +2184,20 @@ def resize_to_range(image, ...@@ -2181,8 +2184,20 @@ def resize_to_range(image,
image, new_size[:-1], method=method, align_corners=align_corners) image, new_size[:-1], method=method, align_corners=align_corners)
if pad_to_max_dimension: if pad_to_max_dimension:
new_image = tf.image.pad_to_bounding_box( channels = tf.unstack(new_image, axis=2)
new_image, 0, 0, max_dimension, max_dimension) if len(channels) != len(per_channel_pad_value):
raise ValueError('Number of channels must be equal to the length of '
'per-channel pad value.')
new_image = tf.stack(
[
tf.pad(
channels[i], [[0, max_dimension - new_size[0]],
[0, max_dimension - new_size[1]]],
constant_values=per_channel_pad_value[i])
for i in range(len(channels))
],
axis=2)
new_image.set_shape([max_dimension, max_dimension, 3])
result = [new_image] result = [new_image]
if masks is not None: if masks is not None:
......
...@@ -2316,6 +2316,46 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -2316,6 +2316,46 @@ class PreprocessorTest(tf.test.TestCase):
np.random.randn(*in_shape)}) np.random.randn(*in_shape)})
self.assertAllEqual(out_image_shape, expected_shape) self.assertAllEqual(out_image_shape, expected_shape)
def testResizeToRangeWithPadToMaxDimensionReturnsCorrectShapes(self):
in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
min_dim = 50
max_dim = 100
expected_shape_list = [[100, 100, 3], [100, 100, 3], [100, 100, 3]]
for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
out_image, _ = preprocessor.resize_to_range(
in_image,
min_dimension=min_dim,
max_dimension=max_dim,
pad_to_max_dimension=True)
self.assertAllEqual(out_image.shape.as_list(), expected_shape)
out_image_shape = tf.shape(out_image)
with self.test_session() as sess:
out_image_shape = sess.run(
out_image_shape, feed_dict={in_image: np.random.randn(*in_shape)})
self.assertAllEqual(out_image_shape, expected_shape)
def testResizeToRangeWithPadToMaxDimensionReturnsCorrectTensor(self):
in_image_np = np.array([[[0, 1, 2]]], np.float32)
ex_image_np = np.array(
[[[0, 1, 2], [123.68, 116.779, 103.939]],
[[123.68, 116.779, 103.939], [123.68, 116.779, 103.939]]], np.float32)
min_dim = 1
max_dim = 2
in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
out_image, _ = preprocessor.resize_to_range(
in_image,
min_dimension=min_dim,
max_dimension=max_dim,
pad_to_max_dimension=True,
per_channel_pad_value=(123.68, 116.779, 103.939))
with self.test_session() as sess:
out_image_np = sess.run(out_image, feed_dict={in_image: in_image_np})
self.assertAllClose(ex_image_np, out_image_np)
def testResizeToRangeWithMasksPreservesStaticSpatialShape(self): def testResizeToRangeWithMasksPreservesStaticSpatialShape(self):
"""Tests image resizing, checking output sizes.""" """Tests image resizing, checking output sizes."""
in_image_shape_list = [[60, 40, 3], [15, 30, 3]] in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
......
...@@ -34,6 +34,7 @@ class InputDataFields(object): ...@@ -34,6 +34,7 @@ class InputDataFields(object):
Attributes: Attributes:
image: image. image: image.
image_additional_channels: additional channels.
original_image: image in the original input size. original_image: image in the original input size.
key: unique key corresponding to image. key: unique key corresponding to image.
source_id: source of the original image. source_id: source of the original image.
...@@ -66,6 +67,7 @@ class InputDataFields(object): ...@@ -66,6 +67,7 @@ class InputDataFields(object):
multiclass_scores: the label score per class for each box. multiclass_scores: the label score per class for each box.
""" """
image = 'image' image = 'image'
image_additional_channels = 'image_additional_channels'
original_image = 'original_image' original_image = 'original_image'
key = 'key' key = 'key'
source_id = 'source_id' source_id = 'source_id'
...@@ -161,6 +163,8 @@ class TfExampleFields(object): ...@@ -161,6 +163,8 @@ class TfExampleFields(object):
height: height of image in pixels, e.g. 462 height: height of image in pixels, e.g. 462
width: width of image in pixels, e.g. 581 width: width of image in pixels, e.g. 581
source_id: original source of the image source_id: original source of the image
image_class_text: image-level label in text format
image_class_label: image-level label in numerical format
object_class_text: labels in text format, e.g. ["person", "cat"] object_class_text: labels in text format, e.g. ["person", "cat"]
object_class_label: labels in numbers, e.g. [16, 8] object_class_label: labels in numbers, e.g. [16, 8]
object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30 object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
...@@ -195,6 +199,8 @@ class TfExampleFields(object): ...@@ -195,6 +199,8 @@ class TfExampleFields(object):
height = 'image/height' height = 'image/height'
width = 'image/width' width = 'image/width'
source_id = 'image/source_id' source_id = 'image/source_id'
image_class_text = 'image/class/text'
image_class_label = 'image/class/label'
object_class_text = 'image/object/class/text' object_class_text = 'image/object/class/text'
object_class_label = 'image/object/class/label' object_class_label = 'image/object/class/label'
object_bbox_ymin = 'image/object/bbox/ymin' object_bbox_ymin = 'image/object/bbox/ymin'
......
...@@ -112,7 +112,8 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -112,7 +112,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
label_map_proto_file=None, label_map_proto_file=None,
use_display_name=False, use_display_name=False,
dct_method='', dct_method='',
num_keypoints=0): num_keypoints=0,
num_additional_channels=0):
"""Constructor sets keys_to_features and items_to_handlers. """Constructor sets keys_to_features and items_to_handlers.
Args: Args:
...@@ -133,6 +134,7 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -133,6 +134,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for
example, the jpeg library does not have that specific option. example, the jpeg library does not have that specific option.
num_keypoints: the number of keypoints per object. num_keypoints: the number of keypoints per object.
num_additional_channels: how many additional channels to use.
Raises: Raises:
ValueError: If `instance_mask_type` option is not one of ValueError: If `instance_mask_type` option is not one of
...@@ -178,15 +180,28 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -178,15 +180,28 @@ class TfExampleDecoder(data_decoder.DataDecoder):
'image/object/weight': 'image/object/weight':
tf.VarLenFeature(tf.float32), tf.VarLenFeature(tf.float32),
} }
# We are checking `dct_method` instead of passing it directly in order to
# ensure TF version 1.6 compatibility.
if dct_method: if dct_method:
image = slim_example_decoder.Image( image = slim_example_decoder.Image(
image_key='image/encoded', image_key='image/encoded',
format_key='image/format', format_key='image/format',
channels=3, channels=3,
dct_method=dct_method) dct_method=dct_method)
additional_channel_image = slim_example_decoder.Image(
image_key='image/additional_channels/encoded',
format_key='image/format',
channels=1,
repeated=True,
dct_method=dct_method)
else: else:
image = slim_example_decoder.Image( image = slim_example_decoder.Image(
image_key='image/encoded', format_key='image/format', channels=3) image_key='image/encoded', format_key='image/format', channels=3)
additional_channel_image = slim_example_decoder.Image(
image_key='image/additional_channels/encoded',
format_key='image/format',
channels=1,
repeated=True)
self.items_to_handlers = { self.items_to_handlers = {
fields.InputDataFields.image: fields.InputDataFields.image:
image, image,
...@@ -211,6 +226,13 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -211,6 +226,13 @@ class TfExampleDecoder(data_decoder.DataDecoder):
fields.InputDataFields.groundtruth_weights: ( fields.InputDataFields.groundtruth_weights: (
slim_example_decoder.Tensor('image/object/weight')), slim_example_decoder.Tensor('image/object/weight')),
} }
if num_additional_channels > 0:
self.keys_to_features[
'image/additional_channels/encoded'] = tf.FixedLenFeature(
(num_additional_channels,), tf.string)
self.items_to_handlers[
fields.InputDataFields.
image_additional_channels] = additional_channel_image
self._num_keypoints = num_keypoints self._num_keypoints = num_keypoints
if num_keypoints > 0: if num_keypoints > 0:
self.keys_to_features['image/object/keypoint/x'] = ( self.keys_to_features['image/object/keypoint/x'] = (
...@@ -294,6 +316,9 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -294,6 +316,9 @@ class TfExampleDecoder(data_decoder.DataDecoder):
[None] indicating if the boxes enclose a crowd. [None] indicating if the boxes enclose a crowd.
Optional: Optional:
fields.InputDataFields.image_additional_channels - 3D uint8 tensor of
shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim
is width; 3rd dim is the number of additional channels.
fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
[None] indicating if the boxes represent `difficult` instances. [None] indicating if the boxes represent `difficult` instances.
fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
...@@ -316,6 +341,12 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -316,6 +341,12 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape( tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
tensor_dict[fields.InputDataFields.groundtruth_boxes])[0] tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
if fields.InputDataFields.image_additional_channels in tensor_dict:
channels = tensor_dict[fields.InputDataFields.image_additional_channels]
channels = tf.squeeze(channels, axis=3)
channels = tf.transpose(channels, perm=[1, 2, 0])
tensor_dict[fields.InputDataFields.image_additional_channels] = channels
def default_groundtruth_weights(): def default_groundtruth_weights():
return tf.ones( return tf.ones(
[tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]], [tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]],
......
...@@ -23,6 +23,7 @@ from tensorflow.core.example import example_pb2 ...@@ -23,6 +23,7 @@ from tensorflow.core.example import example_pb2
from tensorflow.core.example import feature_pb2 from tensorflow.core.example import feature_pb2
from tensorflow.python.framework import constant_op from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes from tensorflow.python.framework import dtypes
from tensorflow.python.framework import test_util
from tensorflow.python.ops import array_ops from tensorflow.python.ops import array_ops
from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import lookup_ops
from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import parsing_ops
...@@ -72,10 +73,41 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -72,10 +73,41 @@ class TfExampleDecoderTest(tf.test.TestCase):
def _BytesFeatureFromList(self, ndarray): def _BytesFeatureFromList(self, ndarray):
values = ndarray.flatten().tolist() values = ndarray.flatten().tolist()
for i in range(len(values)):
values[i] = values[i].encode('utf-8')
return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=values)) return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=values))
def testDecodeAdditionalChannels(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
additional_channel_tensor = np.random.randint(
256, size=(4, 5, 1)).astype(np.uint8)
encoded_additional_channel = self._EncodeImage(additional_channel_tensor)
decoded_additional_channel = self._DecodeImage(encoded_additional_channel)
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
self._BytesFeature(encoded_jpeg),
'image/additional_channels/encoded':
self._BytesFeatureFromList(
np.array([encoded_additional_channel] * 2)),
'image/format':
self._BytesFeature('jpeg'),
'image/source_id':
self._BytesFeature('image_id'),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
num_additional_channels=2)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
np.concatenate([decoded_additional_channel] * 2, axis=2),
tensor_dict[fields.InputDataFields.image_additional_channels])
def testDecodeExampleWithBranchedBackupHandler(self): def testDecodeExampleWithBranchedBackupHandler(self):
example1 = example_pb2.Example( example1 = example_pb2.Example(
features=feature_pb2.Features( features=feature_pb2.Features(
...@@ -304,6 +336,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -304,6 +336,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual( self.assertAllEqual(
2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes]) 2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes])
@test_util.enable_c_shapes
def testDecodeKeypoint(self): def testDecodeKeypoint(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
...@@ -331,7 +364,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -331,7 +364,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
get_shape().as_list()), [None, 4]) get_shape().as_list()), [None, 4])
self.assertAllEqual((tensor_dict[fields.InputDataFields. self.assertAllEqual((tensor_dict[fields.InputDataFields.
groundtruth_keypoints]. groundtruth_keypoints].
get_shape().as_list()), [None, 3, 2]) get_shape().as_list()), [2, 3, 2])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
...@@ -376,6 +409,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -376,6 +409,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights], self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights],
np.ones(2, dtype=np.float32)) np.ones(2, dtype=np.float32))
@test_util.enable_c_shapes
def testDecodeObjectLabel(self): def testDecodeObjectLabel(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
...@@ -391,7 +425,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -391,7 +425,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual((tensor_dict[ self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_classes].get_shape().as_list()), fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
[None]) [2])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
...@@ -522,6 +556,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -522,6 +556,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual([3, 1], self.assertAllEqual([3, 1],
tensor_dict[fields.InputDataFields.groundtruth_classes]) tensor_dict[fields.InputDataFields.groundtruth_classes])
@test_util.enable_c_shapes
def testDecodeObjectArea(self): def testDecodeObjectArea(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
...@@ -536,13 +571,14 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -536,13 +571,14 @@ class TfExampleDecoderTest(tf.test.TestCase):
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area]. self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area].
get_shape().as_list()), [None]) get_shape().as_list()), [2])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(object_area, self.assertAllEqual(object_area,
tensor_dict[fields.InputDataFields.groundtruth_area]) tensor_dict[fields.InputDataFields.groundtruth_area])
@test_util.enable_c_shapes
def testDecodeObjectIsCrowd(self): def testDecodeObjectIsCrowd(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
...@@ -558,7 +594,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -558,7 +594,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual((tensor_dict[ self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()), fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()),
[None]) [2])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
...@@ -566,6 +602,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -566,6 +602,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
tensor_dict[ tensor_dict[
fields.InputDataFields.groundtruth_is_crowd]) fields.InputDataFields.groundtruth_is_crowd])
@test_util.enable_c_shapes
def testDecodeObjectDifficult(self): def testDecodeObjectDifficult(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
...@@ -581,7 +618,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -581,7 +618,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual((tensor_dict[ self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_difficult].get_shape().as_list()), fields.InputDataFields.groundtruth_difficult].get_shape().as_list()),
[None]) [2])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
...@@ -589,6 +626,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -589,6 +626,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
tensor_dict[ tensor_dict[
fields.InputDataFields.groundtruth_difficult]) fields.InputDataFields.groundtruth_difficult])
@test_util.enable_c_shapes
def testDecodeObjectGroupOf(self): def testDecodeObjectGroupOf(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
...@@ -605,7 +643,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -605,7 +643,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual((tensor_dict[ self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_group_of].get_shape().as_list()), fields.InputDataFields.groundtruth_group_of].get_shape().as_list()),
[None]) [2])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
...@@ -637,6 +675,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -637,6 +675,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
object_weights, object_weights,
tensor_dict[fields.InputDataFields.groundtruth_weights]) tensor_dict[fields.InputDataFields.groundtruth_weights])
@test_util.enable_c_shapes
def testDecodeInstanceSegmentation(self): def testDecodeInstanceSegmentation(self):
num_instances = 4 num_instances = 4
image_height = 5 image_height = 5
...@@ -673,11 +712,11 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -673,11 +712,11 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual(( self.assertAllEqual((
tensor_dict[fields.InputDataFields.groundtruth_instance_masks]. tensor_dict[fields.InputDataFields.groundtruth_instance_masks].
get_shape().as_list()), [None, None, None]) get_shape().as_list()), [4, 5, 3])
self.assertAllEqual(( self.assertAllEqual((
tensor_dict[fields.InputDataFields.groundtruth_classes]. tensor_dict[fields.InputDataFields.groundtruth_classes].
get_shape().as_list()), [None]) get_shape().as_list()), [4])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
......
...@@ -16,7 +16,8 @@ r"""Creates TFRecords of Open Images dataset for object detection. ...@@ -16,7 +16,8 @@ r"""Creates TFRecords of Open Images dataset for object detection.
Example usage: Example usage:
python object_detection/dataset_tools/create_oid_tf_record.py \ python object_detection/dataset_tools/create_oid_tf_record.py \
--input_annotations_csv=/path/to/input/annotations-human-bbox.csv \ --input_box_annotations_csv=/path/to/input/annotations-human-bbox.csv \
--input_image_label_annotations_csv=/path/to/input/annotations-label.csv \
--input_images_directory=/path/to/input/image_pixels_directory \ --input_images_directory=/path/to/input/image_pixels_directory \
--input_label_map=/path/to/input/labels_bbox_545.labelmap \ --input_label_map=/path/to/input/labels_bbox_545.labelmap \
--output_tf_record_path_prefix=/path/to/output/prefix.tfrecord --output_tf_record_path_prefix=/path/to/output/prefix.tfrecord
...@@ -27,7 +28,9 @@ https://github.com/openimages/dataset ...@@ -27,7 +28,9 @@ https://github.com/openimages/dataset
This script will include every image found in the input_images_directory in the This script will include every image found in the input_images_directory in the
output TFRecord, even if the image has no corresponding bounding box annotations output TFRecord, even if the image has no corresponding bounding box annotations
in the input_annotations_csv. in the input_annotations_csv. If input_image_label_annotations_csv is specified,
it will add image-level labels as well. Note that the information of whether a
label is positivelly or negativelly verified is NOT added to tfrecord.
""" """
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -40,13 +43,16 @@ import pandas as pd ...@@ -40,13 +43,16 @@ import pandas as pd
import tensorflow as tf import tensorflow as tf
from object_detection.dataset_tools import oid_tfrecord_creation from object_detection.dataset_tools import oid_tfrecord_creation
from object_detection.dataset_tools import tf_record_creation_util
from object_detection.utils import label_map_util from object_detection.utils import label_map_util
tf.flags.DEFINE_string('input_annotations_csv', None, tf.flags.DEFINE_string('input_box_annotations_csv', None,
'Path to CSV containing image bounding box annotations') 'Path to CSV containing image bounding box annotations')
tf.flags.DEFINE_string('input_images_directory', None, tf.flags.DEFINE_string('input_images_directory', None,
'Directory containing the image pixels ' 'Directory containing the image pixels '
'downloaded from the OpenImages GitHub repository.') 'downloaded from the OpenImages GitHub repository.')
tf.flags.DEFINE_string('input_image_label_annotations_csv', None,
'Path to CSV containing image-level labels annotations')
tf.flags.DEFINE_string('input_label_map', None, 'Path to the label map proto') tf.flags.DEFINE_string('input_label_map', None, 'Path to the label map proto')
tf.flags.DEFINE_string( tf.flags.DEFINE_string(
'output_tf_record_path_prefix', None, 'output_tf_record_path_prefix', None,
...@@ -61,7 +67,7 @@ def main(_): ...@@ -61,7 +67,7 @@ def main(_):
tf.logging.set_verbosity(tf.logging.INFO) tf.logging.set_verbosity(tf.logging.INFO)
required_flags = [ required_flags = [
'input_annotations_csv', 'input_images_directory', 'input_label_map', 'input_box_annotations_csv', 'input_images_directory', 'input_label_map',
'output_tf_record_path_prefix' 'output_tf_record_path_prefix'
] ]
for flag_name in required_flags: for flag_name in required_flags:
...@@ -69,17 +75,24 @@ def main(_): ...@@ -69,17 +75,24 @@ def main(_):
raise ValueError('Flag --{} is required'.format(flag_name)) raise ValueError('Flag --{} is required'.format(flag_name))
label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map) label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map)
all_annotations = pd.read_csv(FLAGS.input_annotations_csv) all_box_annotations = pd.read_csv(FLAGS.input_box_annotations_csv)
if FLAGS.input_image_label_annotations_csv:
all_label_annotations = pd.read_csv(FLAGS.input_image_label_annotations_csv)
all_label_annotations.rename(
columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True)
else:
all_label_annotations = None
all_images = tf.gfile.Glob( all_images = tf.gfile.Glob(
os.path.join(FLAGS.input_images_directory, '*.jpg')) os.path.join(FLAGS.input_images_directory, '*.jpg'))
all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images] all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images]
all_image_ids = pd.DataFrame({'ImageID': all_image_ids}) all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
all_annotations = pd.concat([all_annotations, all_image_ids]) all_annotations = pd.concat(
[all_box_annotations, all_image_ids, all_label_annotations])
tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids)) tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))
with contextlib2.ExitStack() as tf_record_close_stack: with contextlib2.ExitStack() as tf_record_close_stack:
output_tfrecords = oid_tfrecord_creation.open_sharded_output_tfrecords( output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
tf_record_close_stack, FLAGS.output_tf_record_path_prefix, tf_record_close_stack, FLAGS.output_tf_record_path_prefix,
FLAGS.num_shards) FLAGS.num_shards)
......
...@@ -33,11 +33,13 @@ import os ...@@ -33,11 +33,13 @@ import os
import random import random
import re import re
import contextlib2
from lxml import etree from lxml import etree
import numpy as np import numpy as np
import PIL.Image import PIL.Image
import tensorflow as tf import tensorflow as tf
from object_detection.dataset_tools import tf_record_creation_util
from object_detection.utils import dataset_util from object_detection.utils import dataset_util
from object_detection.utils import label_map_util from object_detection.utils import label_map_util
...@@ -52,6 +54,8 @@ flags.DEFINE_boolean('faces_only', True, 'If True, generates bounding boxes ' ...@@ -52,6 +54,8 @@ flags.DEFINE_boolean('faces_only', True, 'If True, generates bounding boxes '
'in the latter case, the resulting files are much larger.') 'in the latter case, the resulting files are much larger.')
flags.DEFINE_string('mask_type', 'png', 'How to represent instance ' flags.DEFINE_string('mask_type', 'png', 'How to represent instance '
'segmentation masks. Options are "png" or "numerical".') 'segmentation masks. Options are "png" or "numerical".')
flags.DEFINE_integer('num_shards', 10, 'Number of TFRecord shards')
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
...@@ -208,6 +212,7 @@ def dict_to_tf_example(data, ...@@ -208,6 +212,7 @@ def dict_to_tf_example(data,
def create_tf_record(output_filename, def create_tf_record(output_filename,
num_shards,
label_map_dict, label_map_dict,
annotations_dir, annotations_dir,
image_dir, image_dir,
...@@ -218,6 +223,7 @@ def create_tf_record(output_filename, ...@@ -218,6 +223,7 @@ def create_tf_record(output_filename,
Args: Args:
output_filename: Path to where output file is saved. output_filename: Path to where output file is saved.
num_shards: Number of shards for output file.
label_map_dict: The label map dictionary. label_map_dict: The label map dictionary.
annotations_dir: Directory where annotation files are stored. annotations_dir: Directory where annotation files are stored.
image_dir: Directory where image files are stored. image_dir: Directory where image files are stored.
...@@ -227,34 +233,36 @@ def create_tf_record(output_filename, ...@@ -227,34 +233,36 @@ def create_tf_record(output_filename,
mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
smaller file sizes. smaller file sizes.
""" """
writer = tf.python_io.TFRecordWriter(output_filename) with contextlib2.ExitStack() as tf_record_close_stack:
for idx, example in enumerate(examples): output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
if idx % 100 == 0: tf_record_close_stack, output_filename, num_shards)
logging.info('On image %d of %d', idx, len(examples)) for idx, example in enumerate(examples):
xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml') if idx % 100 == 0:
mask_path = os.path.join(annotations_dir, 'trimaps', example + '.png') logging.info('On image %d of %d', idx, len(examples))
xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')
if not os.path.exists(xml_path): mask_path = os.path.join(annotations_dir, 'trimaps', example + '.png')
logging.warning('Could not find %s, ignoring example.', xml_path)
continue if not os.path.exists(xml_path):
with tf.gfile.GFile(xml_path, 'r') as fid: logging.warning('Could not find %s, ignoring example.', xml_path)
xml_str = fid.read() continue
xml = etree.fromstring(xml_str) with tf.gfile.GFile(xml_path, 'r') as fid:
data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation'] xml_str = fid.read()
xml = etree.fromstring(xml_str)
try: data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
tf_example = dict_to_tf_example(
data, try:
mask_path, tf_example = dict_to_tf_example(
label_map_dict, data,
image_dir, mask_path,
faces_only=faces_only, label_map_dict,
mask_type=mask_type) image_dir,
writer.write(tf_example.SerializeToString()) faces_only=faces_only,
except ValueError: mask_type=mask_type)
logging.warning('Invalid example: %s, ignoring.', xml_path) if tf_example:
shard_idx = idx % num_shards
writer.close() output_tfrecords[shard_idx].write(tf_example.SerializeToString())
except ValueError:
logging.warning('Invalid example: %s, ignoring.', xml_path)
# TODO(derekjchow): Add test for pet/PASCAL main files. # TODO(derekjchow): Add test for pet/PASCAL main files.
...@@ -279,15 +287,16 @@ def main(_): ...@@ -279,15 +287,16 @@ def main(_):
logging.info('%d training and %d validation examples.', logging.info('%d training and %d validation examples.',
len(train_examples), len(val_examples)) len(train_examples), len(val_examples))
train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record') train_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_train.record')
val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record') val_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_val.record')
if FLAGS.faces_only: if not FLAGS.faces_only:
train_output_path = os.path.join(FLAGS.output_dir, train_output_path = os.path.join(FLAGS.output_dir,
'pet_train_with_masks.record') 'pets_fullbody_with_masks_train.record')
val_output_path = os.path.join(FLAGS.output_dir, val_output_path = os.path.join(FLAGS.output_dir,
'pet_val_with_masks.record') 'pets_fullbody_with_masks_val.record')
create_tf_record( create_tf_record(
train_output_path, train_output_path,
FLAGS.num_shards,
label_map_dict, label_map_dict,
annotations_dir, annotations_dir,
image_dir, image_dir,
...@@ -296,6 +305,7 @@ def main(_): ...@@ -296,6 +305,7 @@ def main(_):
mask_type=FLAGS.mask_type) mask_type=FLAGS.mask_type)
create_tf_record( create_tf_record(
val_output_path, val_output_path,
FLAGS.num_shards,
label_map_dict, label_map_dict,
annotations_dir, annotations_dir,
image_dir, image_dir,
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A class and executable to expand hierarchically image-level labels and boxes.
Example usage:
./hierarchical_labels_expansion <path to JSON hierarchy> <input csv file>
<output csv file> [optional]labels_file
"""
import json
import sys
def _update_dict(initial_dict, update):
"""Updates dictionary with update content.
Args:
initial_dict: initial dictionary.
update: updated dictionary.
"""
for key, value_list in update.iteritems():
if key in initial_dict:
initial_dict[key].extend(value_list)
else:
initial_dict[key] = value_list
def _build_plain_hierarchy(hierarchy, skip_root=False):
"""Expands tree hierarchy representation to parent-child dictionary.
Args:
hierarchy: labels hierarchy as JSON file.
skip_root: if true skips root from the processing (done for the case when all
classes under hierarchy are collected under virtual node).
Returns:
keyed_parent - dictionary of parent - all its children nodes.
keyed_child - dictionary of children - all its parent nodes
children - all children of the current node.
"""
all_children = []
all_keyed_parent = {}
all_keyed_child = {}
if 'Subcategory' in hierarchy:
for node in hierarchy['Subcategory']:
keyed_parent, keyed_child, children = _build_plain_hierarchy(node)
# Update is not done through dict.update() since some children have multi-
# ple parents in the hiearchy.
_update_dict(all_keyed_parent, keyed_parent)
_update_dict(all_keyed_child, keyed_child)
all_children.extend(children)
if not skip_root:
all_keyed_parent[hierarchy['LabelName']] = all_children
all_children = [hierarchy['LabelName']] + all_children
for child, _ in all_keyed_child.iteritems():
all_keyed_child[child].append(hierarchy['LabelName'])
all_keyed_child[hierarchy['LabelName']] = []
return all_keyed_parent, all_keyed_child, all_children
class OIDHierarchicalLabelsExpansion(object):
""" Main class to perform labels hierachical expansion."""
def __init__(self, hierarchy):
"""Constructor.
Args:
hierarchy: labels hierarchy as JSON file.
"""
self._hierarchy_keyed_parent, self._hierarchy_keyed_child, _ = (
_build_plain_hierarchy(hierarchy, skip_root=True))
def expand_boxes_from_csv(self, csv_row):
"""Expands a row containing bounding boxes from CSV file.
Args:
csv_row: a single row of Open Images released groundtruth file.
Returns:
a list of strings (including the initial row) corresponding to the ground
truth expanded to multiple annotation for evaluation with Open Images
Challenge 2018 metric.
"""
# Row header is expected to be exactly:
# ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,
# IsTruncated,IsGroupOf,IsDepiction,IsInside
cvs_row_splited = csv_row.split(',')
assert len(cvs_row_splited) == 13
result = [csv_row]
assert cvs_row_splited[2] in self._hierarchy_keyed_child
parent_nodes = self._hierarchy_keyed_child[cvs_row_splited[2]]
for parent_node in parent_nodes:
cvs_row_splited[2] = parent_node
result.append(','.join(cvs_row_splited))
return result
def expand_labels_from_csv(self, csv_row):
"""Expands a row containing bounding boxes from CSV file.
Args:
csv_row: a single row of Open Images released groundtruth file.
Returns:
a list of strings (including the initial row) corresponding to the ground
truth expanded to multiple annotation for evaluation with Open Images
Challenge 2018 metric.
"""
# Row header is expected to be exactly:
# ImageID,Source,LabelName,Confidence
cvs_row_splited = csv_row.split(',')
assert len(cvs_row_splited) == 4
result = [csv_row]
if int(cvs_row_splited[3]) == 1:
assert cvs_row_splited[2] in self._hierarchy_keyed_child
parent_nodes = self._hierarchy_keyed_child[cvs_row_splited[2]]
for parent_node in parent_nodes:
cvs_row_splited[2] = parent_node
result.append(','.join(cvs_row_splited))
else:
assert cvs_row_splited[2] in self._hierarchy_keyed_parent
child_nodes = self._hierarchy_keyed_parent[cvs_row_splited[2]]
for child_node in child_nodes:
cvs_row_splited[2] = child_node
result.append(','.join(cvs_row_splited))
return result
def main(argv):
if len(argv) < 4:
print """Missing arguments. \n
Usage: ./hierarchical_labels_expansion <path to JSON hierarchy>
<input csv file> <output csv file> [optional]labels_file"""
return
with open(argv[1]) as f:
hierarchy = json.load(f)
expansion_generator = OIDHierarchicalLabelsExpansion(hierarchy)
labels_file = False
if len(argv) > 4 and argv[4] == 'labels_file':
labels_file = True
with open(argv[2], 'r') as source:
with open(argv[3], 'w') as target:
header_skipped = False
for line in source:
if not header_skipped:
header_skipped = True
continue
if labels_file:
expanded_lines = expansion_generator.expand_labels_from_csv(line)
else:
expanded_lines = expansion_generator.expand_boxes_from_csv(line)
target.writelines(expanded_lines)
if __name__ == '__main__':
main(sys.argv)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for the OpenImages label expansion (OIDHierarchicalLabelsExpansion)."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from object_detection.dataset_tools import oid_hierarchical_labels_expansion
def create_test_data():
hierarchy = {
'LabelName':
'a',
'Subcategory': [{
'LabelName': 'b'
}, {
'LabelName': 'c',
'Subcategory': [{
'LabelName': 'd'
}, {
'LabelName': 'e'
}]
}, {
'LabelName': 'f',
'Subcategory': [{
'LabelName': 'd'
},]
}]
}
bbox_rows = [
'123,xclick,b,1,0.1,0.2,0.1,0.2,1,1,0,0,0',
'123,xclick,d,1,0.2,0.3,0.1,0.2,1,1,0,0,0'
]
label_rows = [
'123,verification,b,0', '123,verification,c,0', '124,verification,d,1'
]
return hierarchy, bbox_rows, label_rows
class HierarchicalLabelsExpansionTest(tf.test.TestCase):
def test_bbox_expansion(self):
hierarchy, bbox_rows, _ = create_test_data()
expansion_generator = (
oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion(
hierarchy))
all_result_rows = []
for row in bbox_rows:
all_result_rows.extend(expansion_generator.expand_boxes_from_csv(row))
self.assertItemsEqual([
'123,xclick,b,1,0.1,0.2,0.1,0.2,1,1,0,0,0',
'123,xclick,d,1,0.2,0.3,0.1,0.2,1,1,0,0,0',
'123,xclick,f,1,0.2,0.3,0.1,0.2,1,1,0,0,0',
'123,xclick,c,1,0.2,0.3,0.1,0.2,1,1,0,0,0'
], all_result_rows)
def test_labels_expansion(self):
hierarchy, _, label_rows = create_test_data()
expansion_generator = (
oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion(
hierarchy))
all_result_rows = []
for row in label_rows:
all_result_rows.extend(expansion_generator.expand_labels_from_csv(row))
self.assertItemsEqual([
'123,verification,b,0', '123,verification,c,0', '123,verification,d,0',
'123,verification,e,0', '124,verification,d,1', '124,verification,f,1',
'124,verification,c,1'
], all_result_rows)
if __name__ == '__main__':
tf.test.main()
...@@ -41,24 +41,31 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map, ...@@ -41,24 +41,31 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
filtered_data_frame = annotations_data_frame[ filtered_data_frame = annotations_data_frame[
annotations_data_frame.LabelName.isin(label_map)] annotations_data_frame.LabelName.isin(label_map)]
filtered_data_frame_boxes = filtered_data_frame[
~filtered_data_frame.YMin.isnull()]
filtered_data_frame_labels = filtered_data_frame[
filtered_data_frame.YMin.isnull()]
image_id = annotations_data_frame.ImageID.iloc[0] image_id = annotations_data_frame.ImageID.iloc[0]
feature_map = { feature_map = {
standard_fields.TfExampleFields.object_bbox_ymin: standard_fields.TfExampleFields.object_bbox_ymin:
dataset_util.float_list_feature(filtered_data_frame.YMin.as_matrix()), dataset_util.float_list_feature(
filtered_data_frame_boxes.YMin.as_matrix()),
standard_fields.TfExampleFields.object_bbox_xmin: standard_fields.TfExampleFields.object_bbox_xmin:
dataset_util.float_list_feature(filtered_data_frame.XMin.as_matrix()), dataset_util.float_list_feature(
filtered_data_frame_boxes.XMin.as_matrix()),
standard_fields.TfExampleFields.object_bbox_ymax: standard_fields.TfExampleFields.object_bbox_ymax:
dataset_util.float_list_feature(filtered_data_frame.YMax.as_matrix()), dataset_util.float_list_feature(
filtered_data_frame_boxes.YMax.as_matrix()),
standard_fields.TfExampleFields.object_bbox_xmax: standard_fields.TfExampleFields.object_bbox_xmax:
dataset_util.float_list_feature(filtered_data_frame.XMax.as_matrix()), dataset_util.float_list_feature(
filtered_data_frame_boxes.XMax.as_matrix()),
standard_fields.TfExampleFields.object_class_text: standard_fields.TfExampleFields.object_class_text:
dataset_util.bytes_list_feature( dataset_util.bytes_list_feature(
filtered_data_frame.LabelName.as_matrix()), filtered_data_frame_boxes.LabelName.as_matrix()),
standard_fields.TfExampleFields.object_class_label: standard_fields.TfExampleFields.object_class_label:
dataset_util.int64_list_feature( dataset_util.int64_list_feature(
filtered_data_frame.LabelName.map(lambda x: label_map[x]) filtered_data_frame_boxes.LabelName.map(lambda x: label_map[x])
.as_matrix()), .as_matrix()),
standard_fields.TfExampleFields.filename: standard_fields.TfExampleFields.filename:
dataset_util.bytes_feature('{}.jpg'.format(image_id)), dataset_util.bytes_feature('{}.jpg'.format(image_id)),
...@@ -71,43 +78,29 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map, ...@@ -71,43 +78,29 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
if 'IsGroupOf' in filtered_data_frame.columns: if 'IsGroupOf' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields. feature_map[standard_fields.TfExampleFields.
object_group_of] = dataset_util.int64_list_feature( object_group_of] = dataset_util.int64_list_feature(
filtered_data_frame.IsGroupOf.as_matrix().astype(int)) filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(int))
if 'IsOccluded' in filtered_data_frame.columns: if 'IsOccluded' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields. feature_map[standard_fields.TfExampleFields.
object_occluded] = dataset_util.int64_list_feature( object_occluded] = dataset_util.int64_list_feature(
filtered_data_frame.IsOccluded.as_matrix().astype(int)) filtered_data_frame_boxes.IsOccluded.as_matrix().astype(
int))
if 'IsTruncated' in filtered_data_frame.columns: if 'IsTruncated' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields. feature_map[standard_fields.TfExampleFields.
object_truncated] = dataset_util.int64_list_feature( object_truncated] = dataset_util.int64_list_feature(
filtered_data_frame.IsTruncated.as_matrix().astype(int)) filtered_data_frame_boxes.IsTruncated.as_matrix().astype(
int))
if 'IsDepiction' in filtered_data_frame.columns: if 'IsDepiction' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields. feature_map[standard_fields.TfExampleFields.
object_depiction] = dataset_util.int64_list_feature( object_depiction] = dataset_util.int64_list_feature(
filtered_data_frame.IsDepiction.as_matrix().astype(int)) filtered_data_frame_boxes.IsDepiction.as_matrix().astype(
int))
if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns:
feature_map[standard_fields.TfExampleFields.
image_class_label] = dataset_util.int64_list_feature(
filtered_data_frame_labels.LabelName.map(
lambda x: label_map[x]).as_matrix())
feature_map[standard_fields.TfExampleFields.
image_class_text] = dataset_util.bytes_list_feature(
filtered_data_frame_labels.LabelName.as_matrix()),
return tf.train.Example(features=tf.train.Features(feature=feature_map)) return tf.train.Example(features=tf.train.Features(feature=feature_map))
def open_sharded_output_tfrecords(exit_stack, base_path, num_shards):
"""Opens all TFRecord shards for writing and adds them to an exit stack.
Args:
exit_stack: A context2.ExitStack used to automatically closed the TFRecords
opened in this function.
base_path: The base path for all shards
num_shards: The number of shards
Returns:
The list of opened TFRecords. Position k in the list corresponds to shard k.
"""
tf_record_output_filenames = [
'{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards)
for idx in range(num_shards)
]
tfrecords = [
exit_stack.enter_context(tf.python_io.TFRecordWriter(file_name))
for file_name in tf_record_output_filenames
]
return tfrecords
...@@ -14,8 +14,6 @@ ...@@ -14,8 +14,6 @@
# ============================================================================== # ==============================================================================
"""Tests for oid_tfrecord_creation.py.""" """Tests for oid_tfrecord_creation.py."""
import os
import contextlib2
import pandas as pd import pandas as pd
import tensorflow as tf import tensorflow as tf
...@@ -24,16 +22,17 @@ from object_detection.dataset_tools import oid_tfrecord_creation ...@@ -24,16 +22,17 @@ from object_detection.dataset_tools import oid_tfrecord_creation
def create_test_data(): def create_test_data():
data = { data = {
'ImageID': ['i1', 'i1', 'i1', 'i1', 'i2', 'i2'], 'ImageID': ['i1', 'i1', 'i1', 'i1', 'i1', 'i2', 'i2'],
'LabelName': ['a', 'a', 'b', 'b', 'b', 'c'], 'LabelName': ['a', 'a', 'b', 'b', 'c', 'b', 'c'],
'YMin': [0.3, 0.6, 0.8, 0.1, 0.0, 0.0], 'YMin': [0.3, 0.6, 0.8, 0.1, None, 0.0, 0.0],
'XMin': [0.1, 0.3, 0.7, 0.0, 0.1, 0.1], 'XMin': [0.1, 0.3, 0.7, 0.0, None, 0.1, 0.1],
'XMax': [0.2, 0.3, 0.8, 0.5, 0.9, 0.9], 'XMax': [0.2, 0.3, 0.8, 0.5, None, 0.9, 0.9],
'YMax': [0.3, 0.6, 1, 0.8, 0.8, 0.8], 'YMax': [0.3, 0.6, 1, 0.8, None, 0.8, 0.8],
'IsOccluded': [0, 1, 1, 0, 0, 0], 'IsOccluded': [0, 1, 1, 0, None, 0, 0],
'IsTruncated': [0, 0, 0, 1, 0, 0], 'IsTruncated': [0, 0, 0, 1, None, 0, 0],
'IsGroupOf': [0, 0, 0, 0, 0, 1], 'IsGroupOf': [0, 0, 0, 0, None, 0, 1],
'IsDepiction': [1, 0, 0, 0, 0, 0], 'IsDepiction': [1, 0, 0, 0, None, 0, 0],
'ConfidenceImageLabel': [None, None, None, None, 0, None, None],
} }
df = pd.DataFrame(data=data) df = pd.DataFrame(data=data)
label_map = {'a': 0, 'b': 1, 'c': 2} label_map = {'a': 0, 'b': 1, 'c': 2}
...@@ -47,7 +46,8 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase): ...@@ -47,7 +46,8 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
df[df.ImageID == 'i1'], label_map, 'encoded_image_test') df[df.ImageID == 'i1'], label_map, 'encoded_image_test')
self.assertProtoEquals(""" self.assertProtoEquals(
"""
features { features {
feature { feature {
key: "image/encoded" key: "image/encoded"
...@@ -87,7 +87,13 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase): ...@@ -87,7 +87,13 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
value { int64_list { value: [0, 1, 1, 0] } } } value { int64_list { value: [0, 1, 1, 0] } } }
feature { feature {
key: "image/object/truncated" key: "image/object/truncated"
value { int64_list { value: [0, 0, 0, 1] } } } } value { int64_list { value: [0, 0, 0, 1] } } }
feature {
key: "image/class/label"
value { int64_list { value: [2] } } }
feature {
key: "image/class/text"
value { bytes_list { value: ["c"] } } } }
""", tf_example) """, tf_example)
def test_no_attributes(self): def test_no_attributes(self):
...@@ -97,6 +103,7 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase): ...@@ -97,6 +103,7 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
del df['IsGroupOf'] del df['IsGroupOf']
del df['IsOccluded'] del df['IsOccluded']
del df['IsTruncated'] del df['IsTruncated']
del df['ConfidenceImageLabel']
tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
df[df.ImageID == 'i2'], label_map, 'encoded_image_test') df[df.ImageID == 'i2'], label_map, 'encoded_image_test')
...@@ -138,7 +145,8 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase): ...@@ -138,7 +145,8 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
df[df.ImageID == 'i1'], label_map, 'encoded_image_test') df[df.ImageID == 'i1'], label_map, 'encoded_image_test')
self.assertProtoEquals(""" self.assertProtoEquals(
"""
features { features {
feature { feature {
key: "image/encoded" key: "image/encoded"
...@@ -178,26 +186,15 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase): ...@@ -178,26 +186,15 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
value { int64_list { value: [0, 1] } } } value { int64_list { value: [0, 1] } } }
feature { feature {
key: "image/object/truncated" key: "image/object/truncated"
value { int64_list { value: [0, 0] } } } } value { int64_list { value: [0, 0] } } }
feature {
key: "image/class/label"
value { int64_list { } } }
feature {
key: "image/class/text"
value { bytes_list { } } } }
""", tf_example) """, tf_example)
class OpenOutputTfrecordsTests(tf.test.TestCase):
def test_sharded_tfrecord_writes(self):
with contextlib2.ExitStack() as tf_record_close_stack:
output_tfrecords = oid_tfrecord_creation.open_sharded_output_tfrecords(
tf_record_close_stack,
os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), 10)
for idx in range(10):
output_tfrecords[idx].write('test_{}'.format(idx))
for idx in range(10):
tf_record_path = '{}-{:05d}-of-00010'.format(
os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx)
records = list(tf.python_io.tf_record_iterator(tf_record_path))
self.assertAllEqual(records, ['test_{}'.format(idx)])
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment