Unverified Commit 97760186 authored by Jonathan Huang's avatar Jonathan Huang Committed by GitHub
Browse files

Merge pull request #4460 from pkulzc/master

Release evaluation code for OI Challenge 2018 and minor fixes. 
parents ed901b73 a703fc0c
......@@ -56,15 +56,26 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None,
else:
height, width = spatial_image_shape # pylint: disable=unpacking-non-sequence
num_additional_channels = 0
if fields.InputDataFields.image_additional_channels in dataset.output_shapes:
num_additional_channels = dataset.output_shapes[
fields.InputDataFields.image_additional_channels].dims[2].value
padding_shapes = {
fields.InputDataFields.image: [height, width, 3],
# Additional channels are merged before batching.
fields.InputDataFields.image: [
height, width, 3 + num_additional_channels
],
fields.InputDataFields.image_additional_channels: [
height, width, num_additional_channels
],
fields.InputDataFields.source_id: [],
fields.InputDataFields.filename: [],
fields.InputDataFields.key: [],
fields.InputDataFields.groundtruth_difficult: [max_num_boxes],
fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4],
fields.InputDataFields.groundtruth_instance_masks: [max_num_boxes, height,
width],
fields.InputDataFields.groundtruth_instance_masks: [
max_num_boxes, height, width
],
fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes],
fields.InputDataFields.groundtruth_group_of: [max_num_boxes],
fields.InputDataFields.groundtruth_area: [max_num_boxes],
......@@ -74,7 +85,8 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None,
fields.InputDataFields.groundtruth_label_scores: [max_num_boxes],
fields.InputDataFields.true_image_shape: [3],
fields.InputDataFields.multiclass_scores: [
max_num_boxes, num_classes + 1 if num_classes is not None else None],
max_num_boxes, num_classes + 1 if num_classes is not None else None
],
}
# Determine whether groundtruth_classes are integers or one-hot encodings, and
# apply batching appropriately.
......@@ -90,7 +102,9 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None,
'rank 2 tensor (one-hot encodings)')
if fields.InputDataFields.original_image in dataset.output_shapes:
padding_shapes[fields.InputDataFields.original_image] = [None, None, 3]
padding_shapes[fields.InputDataFields.original_image] = [
None, None, 3 + num_additional_channels
]
if fields.InputDataFields.groundtruth_keypoints in dataset.output_shapes:
tensor_shape = dataset.output_shapes[fields.InputDataFields.
groundtruth_keypoints]
......@@ -108,9 +122,13 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None,
for tensor_key, _ in dataset.output_shapes.items()}
def build(input_reader_config, transform_input_data_fn=None,
batch_size=None, max_num_boxes=None, num_classes=None,
spatial_image_shape=None):
def build(input_reader_config,
transform_input_data_fn=None,
batch_size=None,
max_num_boxes=None,
num_classes=None,
spatial_image_shape=None,
num_additional_channels=0):
"""Builds a tf.data.Dataset.
Builds a tf.data.Dataset by applying the `transform_input_data_fn` on all
......@@ -128,6 +146,7 @@ def build(input_reader_config, transform_input_data_fn=None,
spatial_image_shape: A list of two integers of the form [height, width]
containing expected spatial shape of the image after applying
transform_input_data_fn. If None, will use dynamic shapes.
num_additional_channels: Number of additional channels to use in the input.
Returns:
A tf.data.Dataset based on the input_reader_config.
......@@ -152,7 +171,9 @@ def build(input_reader_config, transform_input_data_fn=None,
decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=input_reader_config.load_instance_masks,
instance_mask_type=input_reader_config.mask_type,
label_map_proto_file=label_map_proto_file)
label_map_proto_file=label_map_proto_file,
use_display_name=input_reader_config.use_display_name,
num_additional_channels=num_additional_channels)
def process_fn(value):
processed = decoder.decode(value)
......
......@@ -30,49 +30,50 @@ from object_detection.utils import dataset_util
class DatasetBuilderTest(tf.test.TestCase):
def create_tf_record(self):
def create_tf_record(self, has_additional_channels=False):
path = os.path.join(self.get_temp_dir(), 'tfrecord')
writer = tf.python_io.TFRecordWriter(path)
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
additional_channels_tensor = np.random.randint(
255, size=(4, 5, 1)).astype(np.uint8)
flat_mask = (4 * 5) * [1.0]
with self.test_session():
encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
example = example_pb2.Example(
features=feature_pb2.Features(
feature={
encoded_additional_channels_jpeg = tf.image.encode_jpeg(
tf.constant(additional_channels_tensor)).eval()
features = {
'image/encoded':
feature_pb2.Feature(
bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])),
'image/format':
feature_pb2.Feature(
bytes_list=feature_pb2.BytesList(
value=['jpeg'.encode('utf-8')])),
bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')])
),
'image/height':
feature_pb2.Feature(
int64_list=feature_pb2.Int64List(value=[4])),
feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[4])),
'image/width':
feature_pb2.Feature(
int64_list=feature_pb2.Int64List(value=[5])),
feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[5])),
'image/object/bbox/xmin':
feature_pb2.Feature(
float_list=feature_pb2.FloatList(value=[0.0])),
feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[0.0])),
'image/object/bbox/xmax':
feature_pb2.Feature(
float_list=feature_pb2.FloatList(value=[1.0])),
feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[1.0])),
'image/object/bbox/ymin':
feature_pb2.Feature(
float_list=feature_pb2.FloatList(value=[0.0])),
feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[0.0])),
'image/object/bbox/ymax':
feature_pb2.Feature(
float_list=feature_pb2.FloatList(value=[1.0])),
feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[1.0])),
'image/object/class/label':
feature_pb2.Feature(
int64_list=feature_pb2.Int64List(value=[2])),
feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[2])),
'image/object/mask':
feature_pb2.Feature(
float_list=feature_pb2.FloatList(value=flat_mask)),
}))
}
if has_additional_channels:
features['image/additional_channels/encoded'] = feature_pb2.Feature(
bytes_list=feature_pb2.BytesList(
value=[encoded_additional_channels_jpeg] * 2))
example = example_pb2.Example(
features=feature_pb2.Features(feature=features))
writer.write(example.SerializeToString())
writer.close()
......@@ -218,6 +219,31 @@ class DatasetBuilderTest(tf.test.TestCase):
[2, 2, 4, 5],
output_dict[fields.InputDataFields.groundtruth_instance_masks].shape)
def test_build_tf_record_input_reader_with_additional_channels(self):
tf_record_path = self.create_tf_record(has_additional_channels=True)
input_reader_text_proto = """
shuffle: false
num_readers: 1
tf_record_input_reader {{
input_path: '{0}'
}}
""".format(tf_record_path)
input_reader_proto = input_reader_pb2.InputReader()
text_format.Merge(input_reader_text_proto, input_reader_proto)
tensor_dict = dataset_util.make_initializable_iterator(
dataset_builder.build(
input_reader_proto, batch_size=2,
num_additional_channels=2)).get_next()
sv = tf.train.Supervisor(logdir=self.get_temp_dir())
with sv.prepare_or_wait_for_session() as sess:
sv.start_queue_runners(sess)
output_dict = sess.run(tensor_dict)
self.assertEquals((2, 4, 5, 5),
output_dict[fields.InputDataFields.image].shape)
def test_raises_error_with_no_input_paths(self):
input_reader_text_proto = """
shuffle: false
......
......@@ -79,12 +79,17 @@ def build(image_resizer_config):
keep_aspect_ratio_config.max_dimension):
raise ValueError('min_dimension > max_dimension')
method = _tf_resize_method(keep_aspect_ratio_config.resize_method)
per_channel_pad_value = (0, 0, 0)
if keep_aspect_ratio_config.per_channel_pad_value:
per_channel_pad_value = tuple(keep_aspect_ratio_config.
per_channel_pad_value)
image_resizer_fn = functools.partial(
preprocessor.resize_to_range,
min_dimension=keep_aspect_ratio_config.min_dimension,
max_dimension=keep_aspect_ratio_config.max_dimension,
method=method,
pad_to_max_dimension=keep_aspect_ratio_config.pad_to_max_dimension)
pad_to_max_dimension=keep_aspect_ratio_config.pad_to_max_dimension,
per_channel_pad_value=per_channel_pad_value)
if not keep_aspect_ratio_config.convert_to_grayscale:
return image_resizer_fn
elif image_resizer_oneof == 'fixed_shape_resizer':
......
......@@ -52,6 +52,9 @@ class ImageResizerBuilderTest(tf.test.TestCase):
min_dimension: 10
max_dimension: 20
pad_to_max_dimension: true
per_channel_pad_value: 3
per_channel_pad_value: 4
per_channel_pad_value: 5
}
"""
input_shape = (50, 25, 3)
......
......@@ -778,7 +778,7 @@ def to_absolute_coordinates(boxlist,
height,
width,
check_range=True,
maximum_normalized_coordinate=1.01,
maximum_normalized_coordinate=1.1,
scope=None):
"""Converts normalized box coordinates to absolute pixel coordinates.
......@@ -792,7 +792,7 @@ def to_absolute_coordinates(boxlist,
width: Maximum value for width of absolute box coordinates.
check_range: If True, checks if the coordinates are normalized or not.
maximum_normalized_coordinate: Maximum coordinate value to be considered
as normalized, default to 1.01.
as normalized, default to 1.1.
scope: name scope.
Returns:
......
......@@ -931,6 +931,21 @@ class CoordinatesConversionTest(tf.test.TestCase):
out = sess.run(boxlist.get())
self.assertAllClose(out, coordinates)
def test_to_absolute_coordinates_maximum_coordinate_check(self):
coordinates = tf.constant([[0, 0, 1.2, 1.2],
[0.25, 0.25, 0.75, 0.75]], tf.float32)
img = tf.ones((128, 100, 100, 3))
boxlist = box_list.BoxList(coordinates)
absolute_boxlist = box_list_ops.to_absolute_coordinates(
boxlist,
tf.shape(img)[1],
tf.shape(img)[2],
maximum_normalized_coordinate=1.1)
with self.test_session() as sess:
with self.assertRaisesOpError('assertion failed'):
sess.run(absolute_boxlist.get())
class BoxRefinementTest(tf.test.TestCase):
......
......@@ -79,10 +79,12 @@ class BoxPredictor(object):
Returns:
A dictionary containing at least the following tensors.
box_encodings: A list of float tensors of shape
[batch_size, num_anchors_i, q, code_size] representing the location of
the objects, where q is 1 or the number of classes. Each entry in the
list corresponds to a feature map in the input `image_features` list.
box_encodings: A list of float tensors. Each entry in the list
corresponds to a feature map in the input `image_features` list. All
tensors in the list have one of the two following shapes:
a. [batch_size, num_anchors_i, q, code_size] representing the location
of the objects, where q is 1 or the number of classes.
b. [batch_size, num_anchors_i, code_size].
class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
......@@ -120,10 +122,12 @@ class BoxPredictor(object):
Returns:
A dictionary containing at least the following tensors.
box_encodings: A list of float tensors of shape
[batch_size, num_anchors_i, q, code_size] representing the location of
the objects, where q is 1 or the number of classes. Each entry in the
list corresponds to a feature map in the input `image_features` list.
box_encodings: A list of float tensors. Each entry in the list
corresponds to a feature map in the input `image_features` list. All
tensors in the list have one of the two following shapes:
a. [batch_size, num_anchors_i, q, code_size] representing the location
of the objects, where q is 1 or the number of classes.
b. [batch_size, num_anchors_i, code_size].
class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
......@@ -765,6 +769,13 @@ class ConvolutionalBoxPredictor(BoxPredictor):
}
# TODO(rathodv): Replace with slim.arg_scope_func_key once its available
# externally.
def _arg_scope_func_key(op):
"""Returns a key that can be used to index arg_scope dictionary."""
return getattr(op, '_key_op', str(op))
# TODO(rathodv): Merge the implementation with ConvolutionalBoxPredictor above
# since they are very similar.
class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
......@@ -773,8 +784,12 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
Defines the box predictor as defined in
https://arxiv.org/abs/1708.02002. This class differs from
ConvolutionalBoxPredictor in that it shares weights and biases while
predicting from different feature maps. Separate multi-layer towers are
constructed for the box encoding and class predictors respectively.
predicting from different feature maps. However, batch_norm parameters are not
shared because the statistics of the activations vary among the different
feature maps.
Also note that separate multi-layer towers are constructed for the box
encoding and class predictors respectively.
"""
def __init__(self,
......@@ -833,14 +848,15 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
Returns:
box_encodings: A list of float tensors of shape
[batch_size, num_anchors_i, q, code_size] representing the location of
the objects, where q is 1 or the number of classes. Each entry in the
list corresponds to a feature map in the input `image_features` list.
[batch_size, num_anchors_i, code_size] representing the location of
the objects. Each entry in the list corresponds to a feature map in the
input `image_features` list.
class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
Raises:
ValueError: If the image feature maps do not have the same number of
channels or if the num predictions per locations is differs between the
......@@ -858,15 +874,18 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
'channels, found: {}'.format(feature_channels))
box_encodings_list = []
class_predictions_list = []
for (image_feature, num_predictions_per_location) in zip(
image_features, num_predictions_per_location_list):
for feature_index, (image_feature,
num_predictions_per_location) in enumerate(
zip(image_features,
num_predictions_per_location_list)):
# Add a slot for the background class.
with tf.variable_scope('WeightSharedConvolutionalBoxPredictor',
reuse=tf.AUTO_REUSE):
num_class_slots = self.num_classes + 1
box_encodings_net = image_feature
class_predictions_net = image_feature
with slim.arg_scope(self._conv_hyperparams_fn()):
with slim.arg_scope(self._conv_hyperparams_fn()) as sc:
apply_batch_norm = _arg_scope_func_key(slim.batch_norm) in sc
for i in range(self._num_layers_before_predictor):
box_encodings_net = slim.conv2d(
box_encodings_net,
......@@ -874,14 +893,22 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
[self._kernel_size, self._kernel_size],
stride=1,
padding='SAME',
scope='BoxEncodingPredictionTower/conv2d_{}'.format(i))
activation_fn=None,
normalizer_fn=(tf.identity if apply_batch_norm else None),
scope='BoxPredictionTower/conv2d_{}'.format(i))
if apply_batch_norm:
box_encodings_net = slim.batch_norm(
box_encodings_net,
scope='BoxPredictionTower/conv2d_{}/BatchNorm/feature_{}'.
format(i, feature_index))
box_encodings_net = tf.nn.relu6(box_encodings_net)
box_encodings = slim.conv2d(
box_encodings_net,
num_predictions_per_location * self._box_code_size,
[self._kernel_size, self._kernel_size],
activation_fn=None, stride=1, padding='SAME',
normalizer_fn=None,
scope='BoxEncodingPredictor')
scope='BoxPredictor')
for i in range(self._num_layers_before_predictor):
class_predictions_net = slim.conv2d(
......@@ -890,7 +917,15 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
[self._kernel_size, self._kernel_size],
stride=1,
padding='SAME',
activation_fn=None,
normalizer_fn=(tf.identity if apply_batch_norm else None),
scope='ClassPredictionTower/conv2d_{}'.format(i))
if apply_batch_norm:
class_predictions_net = slim.batch_norm(
class_predictions_net,
scope='ClassPredictionTower/conv2d_{}/BatchNorm/feature_{}'
.format(i, feature_index))
class_predictions_net = tf.nn.relu6(class_predictions_net)
if self._use_dropout:
class_predictions_net = slim.dropout(
class_predictions_net, keep_prob=self._dropout_keep_prob)
......@@ -912,7 +947,7 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
combined_feature_map_shape[1] *
combined_feature_map_shape[2] *
num_predictions_per_location,
1, self._box_code_size]))
self._box_code_size]))
box_encodings_list.append(box_encodings)
class_predictions_with_background = tf.reshape(
class_predictions_with_background,
......
......@@ -442,6 +442,24 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.build(conv_hyperparams, is_training=True)
def _build_conv_arg_scope_no_batch_norm(self):
conv_hyperparams = hyperparams_pb2.Hyperparams()
conv_hyperparams_text_proto = """
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.build(conv_hyperparams, is_training=True)
def test_get_boxes_for_five_aspect_ratios_per_location(self):
def graph_fn(image_features):
......@@ -463,7 +481,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
(box_encodings, objectness_predictions) = self.execute(
graph_fn, [image_features])
self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4])
self.assertAllEqual(box_encodings.shape, [4, 320, 4])
self.assertAllEqual(objectness_predictions.shape, [4, 320, 1])
def test_bias_predictions_to_background_with_sigmoid_score_conversion(self):
......@@ -512,7 +530,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
(box_encodings, class_predictions_with_background) = self.execute(
graph_fn, [image_features])
self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4])
self.assertAllEqual(box_encodings.shape, [4, 320, 4])
self.assertAllEqual(class_predictions_with_background.shape,
[4, 320, num_classes_without_background+1])
......@@ -543,11 +561,12 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
image_features2 = np.random.rand(4, 8, 8, 64).astype(np.float32)
(box_encodings, class_predictions_with_background) = self.execute(
graph_fn, [image_features1, image_features2])
self.assertAllEqual(box_encodings.shape, [4, 640, 1, 4])
self.assertAllEqual(box_encodings.shape, [4, 640, 4])
self.assertAllEqual(class_predictions_with_background.shape,
[4, 640, num_classes_without_background+1])
def test_predictions_from_multiple_feature_maps_share_weights(self):
def test_predictions_from_multiple_feature_maps_share_weights_not_batchnorm(
self):
num_classes_without_background = 6
def graph_fn(image_features1, image_features2):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
......@@ -574,26 +593,95 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()])
expected_variable_set = set([
# Box prediction tower
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/BatchNorm/feature_0/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictionTower/conv2d_0/weights'),
'BoxPredictionTower/conv2d_0/BatchNorm/feature_1/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictionTower/conv2d_0/BatchNorm/beta'),
'BoxPredictionTower/conv2d_1/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictionTower/conv2d_1/weights'),
'BoxPredictionTower/conv2d_1/BatchNorm/feature_0/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictionTower/conv2d_1/BatchNorm/beta'),
'BoxPredictionTower/conv2d_1/BatchNorm/feature_1/beta'),
# Box prediction head
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictor/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictor/biases'),
# Class prediction tower
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/BatchNorm/beta'),
'ClassPredictionTower/conv2d_0/BatchNorm/feature_0/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/BatchNorm/feature_1/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/BatchNorm/beta'),
'ClassPredictionTower/conv2d_1/BatchNorm/feature_0/beta'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/BatchNorm/feature_1/beta'),
# Class prediction head
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/biases')])
self.assertEqual(expected_variable_set, actual_variable_set)
def test_no_batchnorm_params_when_batchnorm_is_not_configured(self):
num_classes_without_background = 6
def graph_fn(image_features1, image_features2):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams_fn=self._build_conv_arg_scope_no_batch_norm(),
depth=32,
num_layers_before_predictor=2,
box_code_size=4)
box_predictions = conv_box_predictor.predict(
[image_features1, image_features2],
num_predictions_per_location=[5, 5],
scope='BoxPredictor')
box_encodings = tf.concat(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
class_predictions_with_background = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
return (box_encodings, class_predictions_with_background)
with self.test_session(graph=tf.Graph()):
graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32),
tf.random_uniform([4, 16, 16, 3], dtype=tf.float32))
actual_variable_set = set(
[var.op.name for var in tf.trainable_variables()])
expected_variable_set = set([
# Box prediction tower
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/biases'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_1/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictor/weights'),
'BoxPredictionTower/conv2d_1/biases'),
# Box prediction head
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxEncodingPredictor/biases'),
'BoxPredictor/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictor/biases'),
# Class prediction tower
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/biases'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/biases'),
# Class prediction head
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/weights'),
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
......@@ -628,7 +716,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
[tf.shape(box_encodings), tf.shape(objectness_predictions)],
feed_dict={image_features:
np.random.rand(4, resolution, resolution, 64)})
self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4])
self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 4])
self.assertAllEqual(objectness_predictions_shape,
[4, expected_num_anchors, 1])
......
......@@ -2128,7 +2128,8 @@ def resize_to_range(image,
max_dimension=None,
method=tf.image.ResizeMethod.BILINEAR,
align_corners=False,
pad_to_max_dimension=False):
pad_to_max_dimension=False,
per_channel_pad_value=(0, 0, 0)):
"""Resizes an image so its dimensions are within the provided value.
The output size can be described by two cases:
......@@ -2153,6 +2154,8 @@ def resize_to_range(image,
so the resulting image is of the spatial size
[max_dimension, max_dimension]. If masks are included they are padded
similarly.
per_channel_pad_value: A tuple of per-channel scalar value to use for
padding. By default pads zeros.
Returns:
Note that the position of the resized_image_shape changes based on whether
......@@ -2181,8 +2184,20 @@ def resize_to_range(image,
image, new_size[:-1], method=method, align_corners=align_corners)
if pad_to_max_dimension:
new_image = tf.image.pad_to_bounding_box(
new_image, 0, 0, max_dimension, max_dimension)
channels = tf.unstack(new_image, axis=2)
if len(channels) != len(per_channel_pad_value):
raise ValueError('Number of channels must be equal to the length of '
'per-channel pad value.')
new_image = tf.stack(
[
tf.pad(
channels[i], [[0, max_dimension - new_size[0]],
[0, max_dimension - new_size[1]]],
constant_values=per_channel_pad_value[i])
for i in range(len(channels))
],
axis=2)
new_image.set_shape([max_dimension, max_dimension, 3])
result = [new_image]
if masks is not None:
......
......@@ -2316,6 +2316,46 @@ class PreprocessorTest(tf.test.TestCase):
np.random.randn(*in_shape)})
self.assertAllEqual(out_image_shape, expected_shape)
def testResizeToRangeWithPadToMaxDimensionReturnsCorrectShapes(self):
in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
min_dim = 50
max_dim = 100
expected_shape_list = [[100, 100, 3], [100, 100, 3], [100, 100, 3]]
for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
out_image, _ = preprocessor.resize_to_range(
in_image,
min_dimension=min_dim,
max_dimension=max_dim,
pad_to_max_dimension=True)
self.assertAllEqual(out_image.shape.as_list(), expected_shape)
out_image_shape = tf.shape(out_image)
with self.test_session() as sess:
out_image_shape = sess.run(
out_image_shape, feed_dict={in_image: np.random.randn(*in_shape)})
self.assertAllEqual(out_image_shape, expected_shape)
def testResizeToRangeWithPadToMaxDimensionReturnsCorrectTensor(self):
in_image_np = np.array([[[0, 1, 2]]], np.float32)
ex_image_np = np.array(
[[[0, 1, 2], [123.68, 116.779, 103.939]],
[[123.68, 116.779, 103.939], [123.68, 116.779, 103.939]]], np.float32)
min_dim = 1
max_dim = 2
in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
out_image, _ = preprocessor.resize_to_range(
in_image,
min_dimension=min_dim,
max_dimension=max_dim,
pad_to_max_dimension=True,
per_channel_pad_value=(123.68, 116.779, 103.939))
with self.test_session() as sess:
out_image_np = sess.run(out_image, feed_dict={in_image: in_image_np})
self.assertAllClose(ex_image_np, out_image_np)
def testResizeToRangeWithMasksPreservesStaticSpatialShape(self):
"""Tests image resizing, checking output sizes."""
in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
......
......@@ -34,6 +34,7 @@ class InputDataFields(object):
Attributes:
image: image.
image_additional_channels: additional channels.
original_image: image in the original input size.
key: unique key corresponding to image.
source_id: source of the original image.
......@@ -66,6 +67,7 @@ class InputDataFields(object):
multiclass_scores: the label score per class for each box.
"""
image = 'image'
image_additional_channels = 'image_additional_channels'
original_image = 'original_image'
key = 'key'
source_id = 'source_id'
......@@ -161,6 +163,8 @@ class TfExampleFields(object):
height: height of image in pixels, e.g. 462
width: width of image in pixels, e.g. 581
source_id: original source of the image
image_class_text: image-level label in text format
image_class_label: image-level label in numerical format
object_class_text: labels in text format, e.g. ["person", "cat"]
object_class_label: labels in numbers, e.g. [16, 8]
object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
......@@ -195,6 +199,8 @@ class TfExampleFields(object):
height = 'image/height'
width = 'image/width'
source_id = 'image/source_id'
image_class_text = 'image/class/text'
image_class_label = 'image/class/label'
object_class_text = 'image/object/class/text'
object_class_label = 'image/object/class/label'
object_bbox_ymin = 'image/object/bbox/ymin'
......
......@@ -112,7 +112,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
label_map_proto_file=None,
use_display_name=False,
dct_method='',
num_keypoints=0):
num_keypoints=0,
num_additional_channels=0):
"""Constructor sets keys_to_features and items_to_handlers.
Args:
......@@ -133,6 +134,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for
example, the jpeg library does not have that specific option.
num_keypoints: the number of keypoints per object.
num_additional_channels: how many additional channels to use.
Raises:
ValueError: If `instance_mask_type` option is not one of
......@@ -178,15 +180,28 @@ class TfExampleDecoder(data_decoder.DataDecoder):
'image/object/weight':
tf.VarLenFeature(tf.float32),
}
# We are checking `dct_method` instead of passing it directly in order to
# ensure TF version 1.6 compatibility.
if dct_method:
image = slim_example_decoder.Image(
image_key='image/encoded',
format_key='image/format',
channels=3,
dct_method=dct_method)
additional_channel_image = slim_example_decoder.Image(
image_key='image/additional_channels/encoded',
format_key='image/format',
channels=1,
repeated=True,
dct_method=dct_method)
else:
image = slim_example_decoder.Image(
image_key='image/encoded', format_key='image/format', channels=3)
additional_channel_image = slim_example_decoder.Image(
image_key='image/additional_channels/encoded',
format_key='image/format',
channels=1,
repeated=True)
self.items_to_handlers = {
fields.InputDataFields.image:
image,
......@@ -211,6 +226,13 @@ class TfExampleDecoder(data_decoder.DataDecoder):
fields.InputDataFields.groundtruth_weights: (
slim_example_decoder.Tensor('image/object/weight')),
}
if num_additional_channels > 0:
self.keys_to_features[
'image/additional_channels/encoded'] = tf.FixedLenFeature(
(num_additional_channels,), tf.string)
self.items_to_handlers[
fields.InputDataFields.
image_additional_channels] = additional_channel_image
self._num_keypoints = num_keypoints
if num_keypoints > 0:
self.keys_to_features['image/object/keypoint/x'] = (
......@@ -294,6 +316,9 @@ class TfExampleDecoder(data_decoder.DataDecoder):
[None] indicating if the boxes enclose a crowd.
Optional:
fields.InputDataFields.image_additional_channels - 3D uint8 tensor of
shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim
is width; 3rd dim is the number of additional channels.
fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
[None] indicating if the boxes represent `difficult` instances.
fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
......@@ -316,6 +341,12 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
if fields.InputDataFields.image_additional_channels in tensor_dict:
channels = tensor_dict[fields.InputDataFields.image_additional_channels]
channels = tf.squeeze(channels, axis=3)
channels = tf.transpose(channels, perm=[1, 2, 0])
tensor_dict[fields.InputDataFields.image_additional_channels] = channels
def default_groundtruth_weights():
return tf.ones(
[tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]],
......
......@@ -23,6 +23,7 @@ from tensorflow.core.example import example_pb2
from tensorflow.core.example import feature_pb2
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import test_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import lookup_ops
from tensorflow.python.ops import parsing_ops
......@@ -72,10 +73,41 @@ class TfExampleDecoderTest(tf.test.TestCase):
def _BytesFeatureFromList(self, ndarray):
values = ndarray.flatten().tolist()
for i in range(len(values)):
values[i] = values[i].encode('utf-8')
return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=values))
def testDecodeAdditionalChannels(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
additional_channel_tensor = np.random.randint(
256, size=(4, 5, 1)).astype(np.uint8)
encoded_additional_channel = self._EncodeImage(additional_channel_tensor)
decoded_additional_channel = self._DecodeImage(encoded_additional_channel)
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
self._BytesFeature(encoded_jpeg),
'image/additional_channels/encoded':
self._BytesFeatureFromList(
np.array([encoded_additional_channel] * 2)),
'image/format':
self._BytesFeature('jpeg'),
'image/source_id':
self._BytesFeature('image_id'),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
num_additional_channels=2)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
np.concatenate([decoded_additional_channel] * 2, axis=2),
tensor_dict[fields.InputDataFields.image_additional_channels])
def testDecodeExampleWithBranchedBackupHandler(self):
example1 = example_pb2.Example(
features=feature_pb2.Features(
......@@ -304,6 +336,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual(
2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes])
@test_util.enable_c_shapes
def testDecodeKeypoint(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
......@@ -331,7 +364,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
get_shape().as_list()), [None, 4])
self.assertAllEqual((tensor_dict[fields.InputDataFields.
groundtruth_keypoints].
get_shape().as_list()), [None, 3, 2])
get_shape().as_list()), [2, 3, 2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
......@@ -376,6 +409,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights],
np.ones(2, dtype=np.float32))
@test_util.enable_c_shapes
def testDecodeObjectLabel(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
......@@ -391,7 +425,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
[None])
[2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
......@@ -522,6 +556,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual([3, 1],
tensor_dict[fields.InputDataFields.groundtruth_classes])
@test_util.enable_c_shapes
def testDecodeObjectArea(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
......@@ -536,13 +571,14 @@ class TfExampleDecoderTest(tf.test.TestCase):
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area].
get_shape().as_list()), [None])
get_shape().as_list()), [2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(object_area,
tensor_dict[fields.InputDataFields.groundtruth_area])
@test_util.enable_c_shapes
def testDecodeObjectIsCrowd(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
......@@ -558,7 +594,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()),
[None])
[2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
......@@ -566,6 +602,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
tensor_dict[
fields.InputDataFields.groundtruth_is_crowd])
@test_util.enable_c_shapes
def testDecodeObjectDifficult(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
......@@ -581,7 +618,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_difficult].get_shape().as_list()),
[None])
[2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
......@@ -589,6 +626,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
tensor_dict[
fields.InputDataFields.groundtruth_difficult])
@test_util.enable_c_shapes
def testDecodeObjectGroupOf(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
......@@ -605,7 +643,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_group_of].get_shape().as_list()),
[None])
[2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
......@@ -637,6 +675,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
object_weights,
tensor_dict[fields.InputDataFields.groundtruth_weights])
@test_util.enable_c_shapes
def testDecodeInstanceSegmentation(self):
num_instances = 4
image_height = 5
......@@ -673,11 +712,11 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual((
tensor_dict[fields.InputDataFields.groundtruth_instance_masks].
get_shape().as_list()), [None, None, None])
get_shape().as_list()), [4, 5, 3])
self.assertAllEqual((
tensor_dict[fields.InputDataFields.groundtruth_classes].
get_shape().as_list()), [None])
get_shape().as_list()), [4])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
......
......@@ -16,7 +16,8 @@ r"""Creates TFRecords of Open Images dataset for object detection.
Example usage:
python object_detection/dataset_tools/create_oid_tf_record.py \
--input_annotations_csv=/path/to/input/annotations-human-bbox.csv \
--input_box_annotations_csv=/path/to/input/annotations-human-bbox.csv \
--input_image_label_annotations_csv=/path/to/input/annotations-label.csv \
--input_images_directory=/path/to/input/image_pixels_directory \
--input_label_map=/path/to/input/labels_bbox_545.labelmap \
--output_tf_record_path_prefix=/path/to/output/prefix.tfrecord
......@@ -27,7 +28,9 @@ https://github.com/openimages/dataset
This script will include every image found in the input_images_directory in the
output TFRecord, even if the image has no corresponding bounding box annotations
in the input_annotations_csv.
in the input_annotations_csv. If input_image_label_annotations_csv is specified,
it will add image-level labels as well. Note that the information of whether a
label is positivelly or negativelly verified is NOT added to tfrecord.
"""
from __future__ import absolute_import
from __future__ import division
......@@ -40,13 +43,16 @@ import pandas as pd
import tensorflow as tf
from object_detection.dataset_tools import oid_tfrecord_creation
from object_detection.dataset_tools import tf_record_creation_util
from object_detection.utils import label_map_util
tf.flags.DEFINE_string('input_annotations_csv', None,
tf.flags.DEFINE_string('input_box_annotations_csv', None,
'Path to CSV containing image bounding box annotations')
tf.flags.DEFINE_string('input_images_directory', None,
'Directory containing the image pixels '
'downloaded from the OpenImages GitHub repository.')
tf.flags.DEFINE_string('input_image_label_annotations_csv', None,
'Path to CSV containing image-level labels annotations')
tf.flags.DEFINE_string('input_label_map', None, 'Path to the label map proto')
tf.flags.DEFINE_string(
'output_tf_record_path_prefix', None,
......@@ -61,7 +67,7 @@ def main(_):
tf.logging.set_verbosity(tf.logging.INFO)
required_flags = [
'input_annotations_csv', 'input_images_directory', 'input_label_map',
'input_box_annotations_csv', 'input_images_directory', 'input_label_map',
'output_tf_record_path_prefix'
]
for flag_name in required_flags:
......@@ -69,17 +75,24 @@ def main(_):
raise ValueError('Flag --{} is required'.format(flag_name))
label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map)
all_annotations = pd.read_csv(FLAGS.input_annotations_csv)
all_box_annotations = pd.read_csv(FLAGS.input_box_annotations_csv)
if FLAGS.input_image_label_annotations_csv:
all_label_annotations = pd.read_csv(FLAGS.input_image_label_annotations_csv)
all_label_annotations.rename(
columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True)
else:
all_label_annotations = None
all_images = tf.gfile.Glob(
os.path.join(FLAGS.input_images_directory, '*.jpg'))
all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images]
all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
all_annotations = pd.concat([all_annotations, all_image_ids])
all_annotations = pd.concat(
[all_box_annotations, all_image_ids, all_label_annotations])
tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))
with contextlib2.ExitStack() as tf_record_close_stack:
output_tfrecords = oid_tfrecord_creation.open_sharded_output_tfrecords(
output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
tf_record_close_stack, FLAGS.output_tf_record_path_prefix,
FLAGS.num_shards)
......
......@@ -33,11 +33,13 @@ import os
import random
import re
import contextlib2
from lxml import etree
import numpy as np
import PIL.Image
import tensorflow as tf
from object_detection.dataset_tools import tf_record_creation_util
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
......@@ -52,6 +54,8 @@ flags.DEFINE_boolean('faces_only', True, 'If True, generates bounding boxes '
'in the latter case, the resulting files are much larger.')
flags.DEFINE_string('mask_type', 'png', 'How to represent instance '
'segmentation masks. Options are "png" or "numerical".')
flags.DEFINE_integer('num_shards', 10, 'Number of TFRecord shards')
FLAGS = flags.FLAGS
......@@ -208,6 +212,7 @@ def dict_to_tf_example(data,
def create_tf_record(output_filename,
num_shards,
label_map_dict,
annotations_dir,
image_dir,
......@@ -218,6 +223,7 @@ def create_tf_record(output_filename,
Args:
output_filename: Path to where output file is saved.
num_shards: Number of shards for output file.
label_map_dict: The label map dictionary.
annotations_dir: Directory where annotation files are stored.
image_dir: Directory where image files are stored.
......@@ -227,7 +233,9 @@ def create_tf_record(output_filename,
mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
smaller file sizes.
"""
writer = tf.python_io.TFRecordWriter(output_filename)
with contextlib2.ExitStack() as tf_record_close_stack:
output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
tf_record_close_stack, output_filename, num_shards)
for idx, example in enumerate(examples):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(examples))
......@@ -250,12 +258,12 @@ def create_tf_record(output_filename,
image_dir,
faces_only=faces_only,
mask_type=mask_type)
writer.write(tf_example.SerializeToString())
if tf_example:
shard_idx = idx % num_shards
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
except ValueError:
logging.warning('Invalid example: %s, ignoring.', xml_path)
writer.close()
# TODO(derekjchow): Add test for pet/PASCAL main files.
def main(_):
......@@ -279,15 +287,16 @@ def main(_):
logging.info('%d training and %d validation examples.',
len(train_examples), len(val_examples))
train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record')
val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record')
if FLAGS.faces_only:
train_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_train.record')
val_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_val.record')
if not FLAGS.faces_only:
train_output_path = os.path.join(FLAGS.output_dir,
'pet_train_with_masks.record')
'pets_fullbody_with_masks_train.record')
val_output_path = os.path.join(FLAGS.output_dir,
'pet_val_with_masks.record')
'pets_fullbody_with_masks_val.record')
create_tf_record(
train_output_path,
FLAGS.num_shards,
label_map_dict,
annotations_dir,
image_dir,
......@@ -296,6 +305,7 @@ def main(_):
mask_type=FLAGS.mask_type)
create_tf_record(
val_output_path,
FLAGS.num_shards,
label_map_dict,
annotations_dir,
image_dir,
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A class and executable to expand hierarchically image-level labels and boxes.
Example usage:
./hierarchical_labels_expansion <path to JSON hierarchy> <input csv file>
<output csv file> [optional]labels_file
"""
import json
import sys
def _update_dict(initial_dict, update):
"""Updates dictionary with update content.
Args:
initial_dict: initial dictionary.
update: updated dictionary.
"""
for key, value_list in update.iteritems():
if key in initial_dict:
initial_dict[key].extend(value_list)
else:
initial_dict[key] = value_list
def _build_plain_hierarchy(hierarchy, skip_root=False):
"""Expands tree hierarchy representation to parent-child dictionary.
Args:
hierarchy: labels hierarchy as JSON file.
skip_root: if true skips root from the processing (done for the case when all
classes under hierarchy are collected under virtual node).
Returns:
keyed_parent - dictionary of parent - all its children nodes.
keyed_child - dictionary of children - all its parent nodes
children - all children of the current node.
"""
all_children = []
all_keyed_parent = {}
all_keyed_child = {}
if 'Subcategory' in hierarchy:
for node in hierarchy['Subcategory']:
keyed_parent, keyed_child, children = _build_plain_hierarchy(node)
# Update is not done through dict.update() since some children have multi-
# ple parents in the hiearchy.
_update_dict(all_keyed_parent, keyed_parent)
_update_dict(all_keyed_child, keyed_child)
all_children.extend(children)
if not skip_root:
all_keyed_parent[hierarchy['LabelName']] = all_children
all_children = [hierarchy['LabelName']] + all_children
for child, _ in all_keyed_child.iteritems():
all_keyed_child[child].append(hierarchy['LabelName'])
all_keyed_child[hierarchy['LabelName']] = []
return all_keyed_parent, all_keyed_child, all_children
class OIDHierarchicalLabelsExpansion(object):
""" Main class to perform labels hierachical expansion."""
def __init__(self, hierarchy):
"""Constructor.
Args:
hierarchy: labels hierarchy as JSON file.
"""
self._hierarchy_keyed_parent, self._hierarchy_keyed_child, _ = (
_build_plain_hierarchy(hierarchy, skip_root=True))
def expand_boxes_from_csv(self, csv_row):
"""Expands a row containing bounding boxes from CSV file.
Args:
csv_row: a single row of Open Images released groundtruth file.
Returns:
a list of strings (including the initial row) corresponding to the ground
truth expanded to multiple annotation for evaluation with Open Images
Challenge 2018 metric.
"""
# Row header is expected to be exactly:
# ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,
# IsTruncated,IsGroupOf,IsDepiction,IsInside
cvs_row_splited = csv_row.split(',')
assert len(cvs_row_splited) == 13
result = [csv_row]
assert cvs_row_splited[2] in self._hierarchy_keyed_child
parent_nodes = self._hierarchy_keyed_child[cvs_row_splited[2]]
for parent_node in parent_nodes:
cvs_row_splited[2] = parent_node
result.append(','.join(cvs_row_splited))
return result
def expand_labels_from_csv(self, csv_row):
"""Expands a row containing bounding boxes from CSV file.
Args:
csv_row: a single row of Open Images released groundtruth file.
Returns:
a list of strings (including the initial row) corresponding to the ground
truth expanded to multiple annotation for evaluation with Open Images
Challenge 2018 metric.
"""
# Row header is expected to be exactly:
# ImageID,Source,LabelName,Confidence
cvs_row_splited = csv_row.split(',')
assert len(cvs_row_splited) == 4
result = [csv_row]
if int(cvs_row_splited[3]) == 1:
assert cvs_row_splited[2] in self._hierarchy_keyed_child
parent_nodes = self._hierarchy_keyed_child[cvs_row_splited[2]]
for parent_node in parent_nodes:
cvs_row_splited[2] = parent_node
result.append(','.join(cvs_row_splited))
else:
assert cvs_row_splited[2] in self._hierarchy_keyed_parent
child_nodes = self._hierarchy_keyed_parent[cvs_row_splited[2]]
for child_node in child_nodes:
cvs_row_splited[2] = child_node
result.append(','.join(cvs_row_splited))
return result
def main(argv):
if len(argv) < 4:
print """Missing arguments. \n
Usage: ./hierarchical_labels_expansion <path to JSON hierarchy>
<input csv file> <output csv file> [optional]labels_file"""
return
with open(argv[1]) as f:
hierarchy = json.load(f)
expansion_generator = OIDHierarchicalLabelsExpansion(hierarchy)
labels_file = False
if len(argv) > 4 and argv[4] == 'labels_file':
labels_file = True
with open(argv[2], 'r') as source:
with open(argv[3], 'w') as target:
header_skipped = False
for line in source:
if not header_skipped:
header_skipped = True
continue
if labels_file:
expanded_lines = expansion_generator.expand_labels_from_csv(line)
else:
expanded_lines = expansion_generator.expand_boxes_from_csv(line)
target.writelines(expanded_lines)
if __name__ == '__main__':
main(sys.argv)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for the OpenImages label expansion (OIDHierarchicalLabelsExpansion)."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from object_detection.dataset_tools import oid_hierarchical_labels_expansion
def create_test_data():
hierarchy = {
'LabelName':
'a',
'Subcategory': [{
'LabelName': 'b'
}, {
'LabelName': 'c',
'Subcategory': [{
'LabelName': 'd'
}, {
'LabelName': 'e'
}]
}, {
'LabelName': 'f',
'Subcategory': [{
'LabelName': 'd'
},]
}]
}
bbox_rows = [
'123,xclick,b,1,0.1,0.2,0.1,0.2,1,1,0,0,0',
'123,xclick,d,1,0.2,0.3,0.1,0.2,1,1,0,0,0'
]
label_rows = [
'123,verification,b,0', '123,verification,c,0', '124,verification,d,1'
]
return hierarchy, bbox_rows, label_rows
class HierarchicalLabelsExpansionTest(tf.test.TestCase):
def test_bbox_expansion(self):
hierarchy, bbox_rows, _ = create_test_data()
expansion_generator = (
oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion(
hierarchy))
all_result_rows = []
for row in bbox_rows:
all_result_rows.extend(expansion_generator.expand_boxes_from_csv(row))
self.assertItemsEqual([
'123,xclick,b,1,0.1,0.2,0.1,0.2,1,1,0,0,0',
'123,xclick,d,1,0.2,0.3,0.1,0.2,1,1,0,0,0',
'123,xclick,f,1,0.2,0.3,0.1,0.2,1,1,0,0,0',
'123,xclick,c,1,0.2,0.3,0.1,0.2,1,1,0,0,0'
], all_result_rows)
def test_labels_expansion(self):
hierarchy, _, label_rows = create_test_data()
expansion_generator = (
oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion(
hierarchy))
all_result_rows = []
for row in label_rows:
all_result_rows.extend(expansion_generator.expand_labels_from_csv(row))
self.assertItemsEqual([
'123,verification,b,0', '123,verification,c,0', '123,verification,d,0',
'123,verification,e,0', '124,verification,d,1', '124,verification,f,1',
'124,verification,c,1'
], all_result_rows)
if __name__ == '__main__':
tf.test.main()
......@@ -41,24 +41,31 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
filtered_data_frame = annotations_data_frame[
annotations_data_frame.LabelName.isin(label_map)]
filtered_data_frame_boxes = filtered_data_frame[
~filtered_data_frame.YMin.isnull()]
filtered_data_frame_labels = filtered_data_frame[
filtered_data_frame.YMin.isnull()]
image_id = annotations_data_frame.ImageID.iloc[0]
feature_map = {
standard_fields.TfExampleFields.object_bbox_ymin:
dataset_util.float_list_feature(filtered_data_frame.YMin.as_matrix()),
dataset_util.float_list_feature(
filtered_data_frame_boxes.YMin.as_matrix()),
standard_fields.TfExampleFields.object_bbox_xmin:
dataset_util.float_list_feature(filtered_data_frame.XMin.as_matrix()),
dataset_util.float_list_feature(
filtered_data_frame_boxes.XMin.as_matrix()),
standard_fields.TfExampleFields.object_bbox_ymax:
dataset_util.float_list_feature(filtered_data_frame.YMax.as_matrix()),
dataset_util.float_list_feature(
filtered_data_frame_boxes.YMax.as_matrix()),
standard_fields.TfExampleFields.object_bbox_xmax:
dataset_util.float_list_feature(filtered_data_frame.XMax.as_matrix()),
dataset_util.float_list_feature(
filtered_data_frame_boxes.XMax.as_matrix()),
standard_fields.TfExampleFields.object_class_text:
dataset_util.bytes_list_feature(
filtered_data_frame.LabelName.as_matrix()),
filtered_data_frame_boxes.LabelName.as_matrix()),
standard_fields.TfExampleFields.object_class_label:
dataset_util.int64_list_feature(
filtered_data_frame.LabelName.map(lambda x: label_map[x])
filtered_data_frame_boxes.LabelName.map(lambda x: label_map[x])
.as_matrix()),
standard_fields.TfExampleFields.filename:
dataset_util.bytes_feature('{}.jpg'.format(image_id)),
......@@ -71,43 +78,29 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
if 'IsGroupOf' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields.
object_group_of] = dataset_util.int64_list_feature(
filtered_data_frame.IsGroupOf.as_matrix().astype(int))
filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(int))
if 'IsOccluded' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields.
object_occluded] = dataset_util.int64_list_feature(
filtered_data_frame.IsOccluded.as_matrix().astype(int))
filtered_data_frame_boxes.IsOccluded.as_matrix().astype(
int))
if 'IsTruncated' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields.
object_truncated] = dataset_util.int64_list_feature(
filtered_data_frame.IsTruncated.as_matrix().astype(int))
filtered_data_frame_boxes.IsTruncated.as_matrix().astype(
int))
if 'IsDepiction' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields.
object_depiction] = dataset_util.int64_list_feature(
filtered_data_frame.IsDepiction.as_matrix().astype(int))
filtered_data_frame_boxes.IsDepiction.as_matrix().astype(
int))
if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns:
feature_map[standard_fields.TfExampleFields.
image_class_label] = dataset_util.int64_list_feature(
filtered_data_frame_labels.LabelName.map(
lambda x: label_map[x]).as_matrix())
feature_map[standard_fields.TfExampleFields.
image_class_text] = dataset_util.bytes_list_feature(
filtered_data_frame_labels.LabelName.as_matrix()),
return tf.train.Example(features=tf.train.Features(feature=feature_map))
def open_sharded_output_tfrecords(exit_stack, base_path, num_shards):
"""Opens all TFRecord shards for writing and adds them to an exit stack.
Args:
exit_stack: A context2.ExitStack used to automatically closed the TFRecords
opened in this function.
base_path: The base path for all shards
num_shards: The number of shards
Returns:
The list of opened TFRecords. Position k in the list corresponds to shard k.
"""
tf_record_output_filenames = [
'{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards)
for idx in range(num_shards)
]
tfrecords = [
exit_stack.enter_context(tf.python_io.TFRecordWriter(file_name))
for file_name in tf_record_output_filenames
]
return tfrecords
......@@ -14,8 +14,6 @@
# ==============================================================================
"""Tests for oid_tfrecord_creation.py."""
import os
import contextlib2
import pandas as pd
import tensorflow as tf
......@@ -24,16 +22,17 @@ from object_detection.dataset_tools import oid_tfrecord_creation
def create_test_data():
data = {
'ImageID': ['i1', 'i1', 'i1', 'i1', 'i2', 'i2'],
'LabelName': ['a', 'a', 'b', 'b', 'b', 'c'],
'YMin': [0.3, 0.6, 0.8, 0.1, 0.0, 0.0],
'XMin': [0.1, 0.3, 0.7, 0.0, 0.1, 0.1],
'XMax': [0.2, 0.3, 0.8, 0.5, 0.9, 0.9],
'YMax': [0.3, 0.6, 1, 0.8, 0.8, 0.8],
'IsOccluded': [0, 1, 1, 0, 0, 0],
'IsTruncated': [0, 0, 0, 1, 0, 0],
'IsGroupOf': [0, 0, 0, 0, 0, 1],
'IsDepiction': [1, 0, 0, 0, 0, 0],
'ImageID': ['i1', 'i1', 'i1', 'i1', 'i1', 'i2', 'i2'],
'LabelName': ['a', 'a', 'b', 'b', 'c', 'b', 'c'],
'YMin': [0.3, 0.6, 0.8, 0.1, None, 0.0, 0.0],
'XMin': [0.1, 0.3, 0.7, 0.0, None, 0.1, 0.1],
'XMax': [0.2, 0.3, 0.8, 0.5, None, 0.9, 0.9],
'YMax': [0.3, 0.6, 1, 0.8, None, 0.8, 0.8],
'IsOccluded': [0, 1, 1, 0, None, 0, 0],
'IsTruncated': [0, 0, 0, 1, None, 0, 0],
'IsGroupOf': [0, 0, 0, 0, None, 0, 1],
'IsDepiction': [1, 0, 0, 0, None, 0, 0],
'ConfidenceImageLabel': [None, None, None, None, 0, None, None],
}
df = pd.DataFrame(data=data)
label_map = {'a': 0, 'b': 1, 'c': 2}
......@@ -47,7 +46,8 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
df[df.ImageID == 'i1'], label_map, 'encoded_image_test')
self.assertProtoEquals("""
self.assertProtoEquals(
"""
features {
feature {
key: "image/encoded"
......@@ -87,7 +87,13 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
value { int64_list { value: [0, 1, 1, 0] } } }
feature {
key: "image/object/truncated"
value { int64_list { value: [0, 0, 0, 1] } } } }
value { int64_list { value: [0, 0, 0, 1] } } }
feature {
key: "image/class/label"
value { int64_list { value: [2] } } }
feature {
key: "image/class/text"
value { bytes_list { value: ["c"] } } } }
""", tf_example)
def test_no_attributes(self):
......@@ -97,6 +103,7 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
del df['IsGroupOf']
del df['IsOccluded']
del df['IsTruncated']
del df['ConfidenceImageLabel']
tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
df[df.ImageID == 'i2'], label_map, 'encoded_image_test')
......@@ -138,7 +145,8 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
df[df.ImageID == 'i1'], label_map, 'encoded_image_test')
self.assertProtoEquals("""
self.assertProtoEquals(
"""
features {
feature {
key: "image/encoded"
......@@ -178,26 +186,15 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
value { int64_list { value: [0, 1] } } }
feature {
key: "image/object/truncated"
value { int64_list { value: [0, 0] } } } }
value { int64_list { value: [0, 0] } } }
feature {
key: "image/class/label"
value { int64_list { } } }
feature {
key: "image/class/text"
value { bytes_list { } } } }
""", tf_example)
class OpenOutputTfrecordsTests(tf.test.TestCase):
def test_sharded_tfrecord_writes(self):
with contextlib2.ExitStack() as tf_record_close_stack:
output_tfrecords = oid_tfrecord_creation.open_sharded_output_tfrecords(
tf_record_close_stack,
os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), 10)
for idx in range(10):
output_tfrecords[idx].write('test_{}'.format(idx))
for idx in range(10):
tf_record_path = '{}-{:05d}-of-00010'.format(
os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx)
records = list(tf.python_io.tf_record_iterator(tf_record_path))
self.assertAllEqual(records, ['test_{}'.format(idx)])
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment