Commit 27b4acd4 authored by Aman Gupta's avatar Aman Gupta
Browse files

Merge remote-tracking branch 'upstream/master'

parents 5133522f d4e1f97f
......@@ -93,8 +93,7 @@ class TargetAssigner(object):
groundtruth_boxes,
groundtruth_labels=None,
unmatched_class_label=None,
groundtruth_weights=None,
**params):
groundtruth_weights=None):
"""Assign classification and regression targets to each anchor.
For a given set of anchors and groundtruth detections, match anchors
......@@ -121,9 +120,11 @@ class TargetAssigner(object):
If set to None, unmatched_cls_target is set to be [0] for each anchor.
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box. The weights
must be in [0., 1.]. If None, all weights are set to 1.
**params: Additional keyword arguments for specific implementations of
the Matcher.
must be in [0., 1.]. If None, all weights are set to 1. Generally no
groundtruth boxes with zero weight match to any anchors as matchers are
aware of groundtruth weights. Additionally, `cls_weights` and
`reg_weights` are calculated using groundtruth weights as an added
safety.
Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
......@@ -177,7 +178,8 @@ class TargetAssigner(object):
[unmatched_shape_assert, labels_and_box_shapes_assert]):
match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
anchors)
match = self._matcher.match(match_quality_matrix, **params)
match = self._matcher.match(match_quality_matrix,
valid_rows=tf.greater(groundtruth_weights, 0))
reg_targets = self._create_regression_targets(anchors,
groundtruth_boxes,
match)
......
......@@ -495,8 +495,7 @@ class TargetAssignerTest(test_case.TestCase):
priors,
boxes,
groundtruth_labels,
unmatched_class_label=unmatched_class_label,
num_valid_rows=3)
unmatched_class_label=unmatched_class_label)
def test_raises_error_on_invalid_groundtruth_labels(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
......@@ -520,8 +519,7 @@ class TargetAssignerTest(test_case.TestCase):
priors,
boxes,
groundtruth_labels,
unmatched_class_label=unmatched_class_label,
num_valid_rows=3)
unmatched_class_label=unmatched_class_label)
class BatchTargetAssignerTest(test_case.TestCase):
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -19,9 +19,6 @@ protos for object detection.
"""
import tensorflow as tf
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields
from object_detection.protos import input_reader_pb2
......@@ -30,14 +27,12 @@ from object_detection.utils import label_map_util
slim_example_decoder = tf.contrib.slim.tfexample_decoder
# TODO(lzc): keep LookupTensor and BackupHandler in sync with
# tf.contrib.slim.tfexample_decoder version.
class LookupTensor(slim_example_decoder.Tensor):
"""An ItemHandler that returns a parsed Tensor, the result of a lookup."""
class _ClassTensorHandler(slim_example_decoder.Tensor):
"""An ItemHandler to fetch class ids from class text."""
def __init__(self,
tensor_key,
table,
label_map_proto_file,
shape_keys=None,
shape=None,
default_value=''):
......@@ -47,7 +42,8 @@ class LookupTensor(slim_example_decoder.Tensor):
Args:
tensor_key: the name of the `TFExample` feature to read the tensor from.
table: A tf.lookup table.
label_map_proto_file: File path to a text format LabelMapProto message
mapping class text to id.
shape_keys: Optional name or list of names of the TF-Example feature in
which the tensor shape is stored. If a list, then each corresponds to
one dimension of the shape.
......@@ -59,16 +55,39 @@ class LookupTensor(slim_example_decoder.Tensor):
Raises:
ValueError: if both `shape_keys` and `shape` are specified.
"""
self._table = table
super(LookupTensor, self).__init__(tensor_key, shape_keys, shape,
default_value)
name_to_id = label_map_util.get_label_map_dict(
label_map_proto_file, use_display_name=False)
# We use a default_value of -1, but we expect all labels to be contained
# in the label map.
name_to_id_table = tf.contrib.lookup.HashTable(
initializer=tf.contrib.lookup.KeyValueTensorInitializer(
keys=tf.constant(list(name_to_id.keys())),
values=tf.constant(list(name_to_id.values()), dtype=tf.int64)),
default_value=-1)
display_name_to_id = label_map_util.get_label_map_dict(
label_map_proto_file, use_display_name=True)
# We use a default_value of -1, but we expect all labels to be contained
# in the label map.
display_name_to_id_table = tf.contrib.lookup.HashTable(
initializer=tf.contrib.lookup.KeyValueTensorInitializer(
keys=tf.constant(list(display_name_to_id.keys())),
values=tf.constant(
list(display_name_to_id.values()), dtype=tf.int64)),
default_value=-1)
self._name_to_id_table = name_to_id_table
self._display_name_to_id_table = display_name_to_id_table
super(_ClassTensorHandler, self).__init__(tensor_key, shape_keys, shape,
default_value)
def tensors_to_item(self, keys_to_tensors):
unmapped_tensor = super(LookupTensor, self).tensors_to_item(keys_to_tensors)
return self._table.lookup(unmapped_tensor)
unmapped_tensor = super(_ClassTensorHandler,
self).tensors_to_item(keys_to_tensors)
return tf.maximum(self._name_to_id_table.lookup(unmapped_tensor),
self._display_name_to_id_table.lookup(unmapped_tensor))
class BackupHandler(slim_example_decoder.ItemHandler):
class _BackupHandler(slim_example_decoder.ItemHandler):
"""An ItemHandler that tries two ItemHandlers in order."""
def __init__(self, handler, backup):
......@@ -92,12 +111,12 @@ class BackupHandler(slim_example_decoder.ItemHandler):
'Backup handler is of type %s instead of ItemHandler' % type(backup))
self._handler = handler
self._backup = backup
super(BackupHandler, self).__init__(handler.keys + backup.keys)
super(_BackupHandler, self).__init__(handler.keys + backup.keys)
def tensors_to_item(self, keys_to_tensors):
item = self._handler.tensors_to_item(keys_to_tensors)
return control_flow_ops.cond(
pred=math_ops.equal(math_ops.reduce_prod(array_ops.shape(item)), 0),
return tf.cond(
pred=tf.equal(tf.reduce_prod(tf.shape(item)), 0),
true_fn=lambda: self._backup.tensors_to_item(keys_to_tensors),
false_fn=lambda: item)
......@@ -140,6 +159,9 @@ class TfExampleDecoder(data_decoder.DataDecoder):
input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
input_reader_pb2.PNG_MASKS.
"""
# TODO(rathodv): delete unused `use_display_name` argument once we change
# other decoders to handle label maps similarly.
del use_display_name
self.keys_to_features = {
'image/encoded':
tf.FixedLenFeature((), tf.string, default_value=''),
......@@ -267,27 +289,18 @@ class TfExampleDecoder(data_decoder.DataDecoder):
else:
raise ValueError('Did not recognize the `instance_mask_type` option.')
if label_map_proto_file:
label_map = label_map_util.get_label_map_dict(label_map_proto_file,
use_display_name)
# We use a default_value of -1, but we expect all labels to be contained
# in the label map.
table = tf.contrib.lookup.HashTable(
initializer=tf.contrib.lookup.KeyValueTensorInitializer(
keys=tf.constant(list(label_map.keys())),
values=tf.constant(list(label_map.values()), dtype=tf.int64)),
default_value=-1)
# If the label_map_proto is provided, try to use it in conjunction with
# the class text, and fall back to a materialized ID.
# TODO(lzc): note that here we are using BackupHandler defined in this
# file(which is branching slim_example_decoder.BackupHandler). Need to
# switch back to slim_example_decoder.BackupHandler once tf 1.5 becomes
# more popular.
label_handler = BackupHandler(
LookupTensor('image/object/class/text', table, default_value=''),
label_handler = _BackupHandler(
_ClassTensorHandler(
'image/object/class/text', label_map_proto_file,
default_value=''),
slim_example_decoder.Tensor('image/object/class/label'))
image_label_handler = BackupHandler(
LookupTensor(
fields.TfExampleFields.image_class_text, table, default_value=''),
image_label_handler = _BackupHandler(
_ClassTensorHandler(
fields.TfExampleFields.image_class_text,
label_map_proto_file,
default_value=''),
slim_example_decoder.Tensor(fields.TfExampleFields.image_class_label))
else:
label_handler = slim_example_decoder.Tensor('image/object/class/label')
......@@ -309,6 +322,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
A dictionary of the following tensors.
fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
containing image.
fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
shape [2] containing shape of the image.
fields.InputDataFields.source_id - string tensor containing original
image id.
fields.InputDataFields.key - string tensor with unique sha256 hash key.
......@@ -352,6 +367,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
is_crowd = fields.InputDataFields.groundtruth_is_crowd
tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape(
tensor_dict[fields.InputDataFields.image])[:2]
tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
......
......@@ -12,24 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.data_decoders.tf_example_decoder."""
import os
import numpy as np
import tensorflow as tf
from tensorflow.core.example import example_pb2
from tensorflow.core.example import feature_pb2
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import test_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import lookup_ops
from tensorflow.python.ops import parsing_ops
from object_detection.core import standard_fields as fields
from object_detection.data_decoders import tf_example_decoder
from object_detection.protos import input_reader_pb2
from object_detection.utils import dataset_util
slim_example_decoder = tf.contrib.slim.tfexample_decoder
......@@ -56,25 +49,6 @@ class TfExampleDecoderTest(tf.test.TestCase):
raise ValueError('Invalid encoding type.')
return image_decoded
def _Int64Feature(self, value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def _FloatFeature(self, value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def _BytesFeature(self, value):
if isinstance(value, list):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _Int64FeatureFromList(self, ndarray):
return feature_pb2.Feature(
int64_list=feature_pb2.Int64List(value=ndarray.flatten().tolist()))
def _BytesFeatureFromList(self, ndarray):
values = ndarray.flatten().tolist()
return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=values))
def testDecodeAdditionalChannels(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
......@@ -88,14 +62,14 @@ class TfExampleDecoderTest(tf.test.TestCase):
features=tf.train.Features(
feature={
'image/encoded':
self._BytesFeature(encoded_jpeg),
dataset_util.bytes_feature(encoded_jpeg),
'image/additional_channels/encoded':
self._BytesFeatureFromList(
np.array([encoded_additional_channel] * 2)),
dataset_util.bytes_list_feature(
[encoded_additional_channel] * 2),
'image/format':
self._BytesFeature('jpeg'),
dataset_util.bytes_feature('jpeg'),
'image/source_id':
self._BytesFeature('image_id'),
dataset_util.bytes_feature('image_id'),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
......@@ -108,118 +82,44 @@ class TfExampleDecoderTest(tf.test.TestCase):
np.concatenate([decoded_additional_channel] * 2, axis=2),
tensor_dict[fields.InputDataFields.image_additional_channels])
def testDecodeExampleWithBranchedBackupHandler(self):
example1 = example_pb2.Example(
features=feature_pb2.Features(
feature={
'image/object/class/text':
self._BytesFeatureFromList(
np.array(['cat', 'dog', 'guinea pig'])),
'image/object/class/label':
self._Int64FeatureFromList(np.array([42, 10, 900]))
}))
example2 = example_pb2.Example(
features=feature_pb2.Features(
feature={
'image/object/class/text':
self._BytesFeatureFromList(
np.array(['cat', 'dog', 'guinea pig'])),
}))
example3 = example_pb2.Example(
features=feature_pb2.Features(
feature={
'image/object/class/label':
self._Int64FeatureFromList(np.array([42, 10, 901]))
}))
# 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
table = lookup_ops.index_table_from_tensor(
constant_op.constant(['dog', 'guinea pig', 'cat']))
keys_to_features = {
'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string),
'image/object/class/label': parsing_ops.VarLenFeature(dtypes.int64),
}
backup_handler = tf_example_decoder.BackupHandler(
handler=slim_example_decoder.Tensor('image/object/class/label'),
backup=tf_example_decoder.LookupTensor('image/object/class/text',
table))
items_to_handlers = {
'labels': backup_handler,
}
decoder = slim_example_decoder.TFExampleDecoder(keys_to_features,
items_to_handlers)
obtained_class_ids_each_example = []
with self.test_session() as sess:
sess.run(lookup_ops.tables_initializer())
for example in [example1, example2, example3]:
serialized_example = array_ops.reshape(
example.SerializeToString(), shape=[])
obtained_class_ids_each_example.append(
decoder.decode(serialized_example)[0].eval())
self.assertAllClose([42, 10, 900], obtained_class_ids_each_example[0])
self.assertAllClose([2, 0, 1], obtained_class_ids_each_example[1])
self.assertAllClose([42, 10, 901], obtained_class_ids_each_example[2])
def testDecodeExampleWithBranchedLookup(self):
example = example_pb2.Example(features=feature_pb2.Features(feature={
'image/object/class/text': self._BytesFeatureFromList(
np.array(['cat', 'dog', 'guinea pig'])),
}))
serialized_example = example.SerializeToString()
# 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
table = lookup_ops.index_table_from_tensor(
constant_op.constant(['dog', 'guinea pig', 'cat']))
with self.test_session() as sess:
sess.run(lookup_ops.tables_initializer())
serialized_example = array_ops.reshape(serialized_example, shape=[])
keys_to_features = {
'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string),
}
items_to_handlers = {
'labels':
tf_example_decoder.LookupTensor('image/object/class/text', table),
}
decoder = slim_example_decoder.TFExampleDecoder(keys_to_features,
items_to_handlers)
obtained_class_ids = decoder.decode(serialized_example)[0].eval()
self.assertAllClose([2, 0, 1], obtained_class_ids)
def testDecodeJpegImage(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
decoded_jpeg = self._DecodeImage(encoded_jpeg)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/source_id': self._BytesFeature('image_id'),
})).SerializeToString()
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
'image/format': dataset_util.bytes_feature('jpeg'),
'image/source_id': dataset_util.bytes_feature('image_id'),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
get_shape().as_list()), [None, None, 3])
self.assertAllEqual((tensor_dict[fields.InputDataFields.
original_image_spatial_shape].
get_shape().as_list()), [2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
original_image_spatial_shape])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodeImageKeyAndFilename(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/key/sha256': self._BytesFeature('abc'),
'image/filename': self._BytesFeature('filename')
})).SerializeToString()
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
'image/key/sha256': dataset_util.bytes_feature('abc'),
'image/filename': dataset_util.bytes_feature('filename')
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
......@@ -234,21 +134,28 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_png = self._EncodeImage(image_tensor, encoding_type='png')
decoded_png = self._DecodeImage(encoded_png, encoding_type='png')
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_png),
'image/format': self._BytesFeature('png'),
'image/source_id': self._BytesFeature('image_id')
})).SerializeToString()
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': dataset_util.bytes_feature(encoded_png),
'image/format': dataset_util.bytes_feature('png'),
'image/source_id': dataset_util.bytes_feature('image_id')
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
get_shape().as_list()), [None, None, 3])
self.assertAllEqual((tensor_dict[fields.InputDataFields.
original_image_spatial_shape].
get_shape().as_list()), [2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
original_image_spatial_shape])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodePngInstanceMasks(self):
......@@ -265,9 +172,12 @@ class TfExampleDecoderTest(tf.test.TestCase):
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/mask': self._BytesFeature(encoded_masks)
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/mask':
dataset_util.bytes_list_feature(encoded_masks)
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
......@@ -288,11 +198,16 @@ class TfExampleDecoderTest(tf.test.TestCase):
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/mask': self._BytesFeature(encoded_masks),
'image/height': self._Int64Feature([10]),
'image/width': self._Int64Feature([10]),
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/mask':
dataset_util.bytes_list_feature(encoded_masks),
'image/height':
dataset_util.int64_feature(10),
'image/width':
dataset_util.int64_feature(10),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
......@@ -312,25 +227,33 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
})).SerializeToString()
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/bbox/ymin':
dataset_util.float_list_feature(bbox_ymins),
'image/object/bbox/xmin':
dataset_util.float_list_feature(bbox_xmins),
'image/object/bbox/ymax':
dataset_util.float_list_feature(bbox_ymaxs),
'image/object/bbox/xmax':
dataset_util.float_list_feature(bbox_xmaxs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
get_shape().as_list()), [None, 4])
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
.get_shape().as_list()), [None, 4])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
expected_boxes = np.vstack([bbox_ymins, bbox_xmins,
bbox_ymaxs, bbox_xmaxs]).transpose()
expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs,
bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes,
tensor_dict[fields.InputDataFields.groundtruth_boxes])
self.assertAllEqual(
......@@ -346,30 +269,40 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_xmaxs = [3.0, 7.0]
keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
'image/object/keypoint/y': self._FloatFeature(keypoint_ys),
'image/object/keypoint/x': self._FloatFeature(keypoint_xs),
})).SerializeToString()
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/bbox/ymin':
dataset_util.float_list_feature(bbox_ymins),
'image/object/bbox/xmin':
dataset_util.float_list_feature(bbox_xmins),
'image/object/bbox/ymax':
dataset_util.float_list_feature(bbox_ymaxs),
'image/object/bbox/xmax':
dataset_util.float_list_feature(bbox_xmaxs),
'image/object/keypoint/y':
dataset_util.float_list_feature(keypoint_ys),
'image/object/keypoint/x':
dataset_util.float_list_feature(keypoint_xs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
get_shape().as_list()), [None, 4])
self.assertAllEqual((tensor_dict[fields.InputDataFields.
groundtruth_keypoints].
get_shape().as_list()), [2, 3, 2])
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
.get_shape().as_list()), [None, 4])
self.assertAllEqual(
(tensor_dict[fields.InputDataFields.groundtruth_keypoints].get_shape()
.as_list()), [2, 3, 2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
expected_boxes = np.vstack([bbox_ymins, bbox_xmins,
bbox_ymaxs, bbox_xmaxs]).transpose()
expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs,
bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes,
tensor_dict[fields.InputDataFields.groundtruth_boxes])
self.assertAllEqual(
......@@ -377,9 +310,9 @@ class TfExampleDecoderTest(tf.test.TestCase):
expected_keypoints = (
np.vstack([keypoint_ys, keypoint_xs]).transpose().reshape((2, 3, 2)))
self.assertAllEqual(expected_keypoints,
tensor_dict[
fields.InputDataFields.groundtruth_keypoints])
self.assertAllEqual(
expected_keypoints,
tensor_dict[fields.InputDataFields.groundtruth_keypoints])
def testDecodeDefaultGroundtruthWeights(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
......@@ -388,20 +321,28 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
})).SerializeToString()
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/bbox/ymin':
dataset_util.float_list_feature(bbox_ymins),
'image/object/bbox/xmin':
dataset_util.float_list_feature(bbox_xmins),
'image/object/bbox/ymax':
dataset_util.float_list_feature(bbox_ymaxs),
'image/object/bbox/xmax':
dataset_util.float_list_feature(bbox_xmaxs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
get_shape().as_list()), [None, 4])
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
.get_shape().as_list()), [None, 4])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
......@@ -414,18 +355,22 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/class/label': self._Int64Feature(bbox_classes),
})).SerializeToString()
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/class/label':
dataset_util.int64_list_feature(bbox_classes),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
[2])
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
.get_shape().as_list()), [2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
......@@ -437,11 +382,16 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes = [1, 2]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/class/label': self._Int64Feature(bbox_classes),
})).SerializeToString()
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/class/label':
dataset_util.int64_list_feature(bbox_classes),
})).SerializeToString()
label_map_string = """
item {
id:1
......@@ -460,9 +410,8 @@ class TfExampleDecoderTest(tf.test.TestCase):
label_map_proto_file=label_map_path)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
[None])
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
.get_shape().as_list()), [None])
init = tf.tables_initializer()
with self.test_session() as sess:
......@@ -480,11 +429,11 @@ class TfExampleDecoderTest(tf.test.TestCase):
features=tf.train.Features(
feature={
'image/encoded':
self._BytesFeature(encoded_jpeg),
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
self._BytesFeature('jpeg'),
dataset_util.bytes_feature('jpeg'),
'image/object/class/text':
self._BytesFeature(bbox_classes_text),
dataset_util.bytes_list_feature(bbox_classes_text),
})).SerializeToString()
label_map_string = """
......@@ -514,7 +463,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual([2, -1],
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectLabelWithMapping(self):
def testDecodeObjectLabelWithMappingWithDisplayName(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes_text = ['cat', 'dog']
......@@ -522,11 +471,53 @@ class TfExampleDecoderTest(tf.test.TestCase):
features=tf.train.Features(
feature={
'image/encoded':
self._BytesFeature(encoded_jpeg),
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
self._BytesFeature('jpeg'),
dataset_util.bytes_feature('jpeg'),
'image/object/class/text':
self._BytesFeature(bbox_classes_text),
dataset_util.bytes_list_feature(bbox_classes_text),
})).SerializeToString()
label_map_string = """
item {
id:3
display_name:'cat'
}
item {
id:1
display_name:'dog'
}
"""
label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
with tf.gfile.Open(label_map_path, 'wb') as f:
f.write(label_map_string)
example_decoder = tf_example_decoder.TfExampleDecoder(
label_map_proto_file=label_map_path)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
.get_shape().as_list()), [None])
with self.test_session() as sess:
sess.run(tf.tables_initializer())
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([3, 1],
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectLabelWithMappingWithName(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes_text = ['cat', 'dog']
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/class/text':
dataset_util.bytes_list_feature(bbox_classes_text),
})).SerializeToString()
label_map_string = """
......@@ -561,17 +552,22 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_area = [100., 174.]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/area': self._FloatFeature(object_area),
})).SerializeToString()
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/area':
dataset_util.float_list_feature(object_area),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area].
get_shape().as_list()), [2])
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area]
.get_shape().as_list()), [2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
......@@ -583,67 +579,81 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_is_crowd = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/is_crowd': self._Int64Feature(object_is_crowd),
})).SerializeToString()
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/is_crowd':
dataset_util.int64_list_feature(object_is_crowd),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()),
[2])
self.assertAllEqual(
(tensor_dict[fields.InputDataFields.groundtruth_is_crowd].get_shape()
.as_list()), [2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([bool(item) for item in object_is_crowd],
tensor_dict[
fields.InputDataFields.groundtruth_is_crowd])
self.assertAllEqual(
[bool(item) for item in object_is_crowd],
tensor_dict[fields.InputDataFields.groundtruth_is_crowd])
@test_util.enable_c_shapes
def testDecodeObjectDifficult(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_difficult = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/difficult': self._Int64Feature(object_difficult),
})).SerializeToString()
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/difficult':
dataset_util.int64_list_feature(object_difficult),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_difficult].get_shape().as_list()),
[2])
self.assertAllEqual(
(tensor_dict[fields.InputDataFields.groundtruth_difficult].get_shape()
.as_list()), [2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([bool(item) for item in object_difficult],
tensor_dict[
fields.InputDataFields.groundtruth_difficult])
self.assertAllEqual(
[bool(item) for item in object_difficult],
tensor_dict[fields.InputDataFields.groundtruth_difficult])
@test_util.enable_c_shapes
def testDecodeObjectGroupOf(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_group_of = [0, 1]
example = tf.train.Example(features=tf.train.Features(
feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/group_of': self._Int64Feature(object_group_of),
})).SerializeToString()
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/group_of':
dataset_util.int64_list_feature(object_group_of),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_group_of].get_shape().as_list()),
[2])
self.assertAllEqual(
(tensor_dict[fields.InputDataFields.groundtruth_group_of].get_shape()
.as_list()), [2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
......@@ -655,25 +665,27 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_weights = [0.75, 1.0]
example = tf.train.Example(features=tf.train.Features(
feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/weight': self._FloatFeature(object_weights),
})).SerializeToString()
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/weight':
dataset_util.float_list_feature(object_weights),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_weights].get_shape().as_list()),
[None])
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_weights]
.get_shape().as_list()), [None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
object_weights,
tensor_dict[fields.InputDataFields.groundtruth_weights])
self.assertAllEqual(object_weights,
tensor_dict[fields.InputDataFields.groundtruth_weights])
@test_util.enable_c_shapes
def testDecodeInstanceSegmentation(self):
......@@ -682,15 +694,13 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_width = 3
# Randomly generate image.
image_tensor = np.random.randint(256, size=(image_height,
image_width,
3)).astype(np.uint8)
image_tensor = np.random.randint(
256, size=(image_height, image_width, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
# Randomly generate instance segmentation masks.
instance_masks = (
np.random.randint(2, size=(num_instances,
image_height,
np.random.randint(2, size=(num_instances, image_height,
image_width)).astype(np.float32))
instance_masks_flattened = np.reshape(instance_masks, [-1])
......@@ -698,25 +708,32 @@ class TfExampleDecoderTest(tf.test.TestCase):
object_classes = np.random.randint(
100, size=(num_instances)).astype(np.int64)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/height': self._Int64Feature([image_height]),
'image/width': self._Int64Feature([image_width]),
'image/object/mask': self._FloatFeature(instance_masks_flattened),
'image/object/class/label': self._Int64Feature(
object_classes)})).SerializeToString()
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/height':
dataset_util.int64_feature(image_height),
'image/width':
dataset_util.int64_feature(image_width),
'image/object/mask':
dataset_util.float_list_feature(instance_masks_flattened),
'image/object/class/label':
dataset_util.int64_list_feature(object_classes)
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=True)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((
tensor_dict[fields.InputDataFields.groundtruth_instance_masks].
get_shape().as_list()), [4, 5, 3])
self.assertAllEqual(
(tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
.get_shape().as_list()), [4, 5, 3])
self.assertAllEqual((
tensor_dict[fields.InputDataFields.groundtruth_classes].
get_shape().as_list()), [4])
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
.get_shape().as_list()), [4])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
......@@ -724,24 +741,21 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual(
instance_masks.astype(np.float32),
tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
self.assertAllEqual(
object_classes,
tensor_dict[fields.InputDataFields.groundtruth_classes])
self.assertAllEqual(object_classes,
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testInstancesNotAvailableByDefault(self):
num_instances = 4
image_height = 5
image_width = 3
# Randomly generate image.
image_tensor = np.random.randint(256, size=(image_height,
image_width,
3)).astype(np.uint8)
image_tensor = np.random.randint(
256, size=(image_height, image_width, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
# Randomly generate instance segmentation masks.
instance_masks = (
np.random.randint(2, size=(num_instances,
image_height,
np.random.randint(2, size=(num_instances, image_height,
image_width)).astype(np.float32))
instance_masks_flattened = np.reshape(instance_masks, [-1])
......@@ -749,18 +763,26 @@ class TfExampleDecoderTest(tf.test.TestCase):
object_classes = np.random.randint(
100, size=(num_instances)).astype(np.int64)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/height': self._Int64Feature([image_height]),
'image/width': self._Int64Feature([image_width]),
'image/object/mask': self._FloatFeature(instance_masks_flattened),
'image/object/class/label': self._Int64Feature(
object_classes)})).SerializeToString()
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/height':
dataset_util.int64_feature(image_height),
'image/width':
dataset_util.int64_feature(image_width),
'image/object/mask':
dataset_util.float_list_feature(instance_masks_flattened),
'image/object/class/label':
dataset_util.int64_list_feature(object_classes)
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertTrue(fields.InputDataFields.groundtruth_instance_masks
not in tensor_dict)
self.assertTrue(
fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
def testDecodeImageLabels(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
......@@ -768,9 +790,9 @@ class TfExampleDecoderTest(tf.test.TestCase):
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/class/label': self._Int64Feature([1, 2]),
'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
'image/format': dataset_util.bytes_feature('jpeg'),
'image/class/label': dataset_util.int64_list_feature([1, 2]),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
......@@ -784,9 +806,12 @@ class TfExampleDecoderTest(tf.test.TestCase):
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/class/text': self._BytesFeature(['dog', 'cat']),
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/class/text':
dataset_util.bytes_list_feature(['dog', 'cat']),
})).SerializeToString()
label_map_string = """
item {
......
......@@ -177,8 +177,8 @@ def create_tf_example(image,
dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax':
dataset_util.float_list_feature(ymax),
'image/object/class/label':
dataset_util.int64_list_feature(category_ids),
'image/object/class/text':
dataset_util.bytes_list_feature(category_names),
'image/object/is_crowd':
dataset_util.int64_list_feature(is_crowd),
'image/object/area':
......
......@@ -106,6 +106,9 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value,
['cat'])
def test_create_tf_example_with_instance_masks(self):
image_file_name = 'tmp_image.jpg'
......@@ -169,6 +172,9 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[1])
self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value,
['dog'])
encoded_mask_pngs = [
io.BytesIO(encoded_masks) for encoded_masks in example.features.feature[
'image/object/mask'].bytes_list.value
......
......@@ -14,7 +14,6 @@
# ==============================================================================
"""Common utility functions for evaluation."""
import collections
import logging
import os
import time
......@@ -53,15 +52,15 @@ def write_metrics(metrics, global_step, summary_dir):
global_step: Global step at which the metrics are computed.
summary_dir: Directory to write tensorflow summaries to.
"""
logging.info('Writing metrics to tf summary.')
tf.logging.info('Writing metrics to tf summary.')
summary_writer = tf.summary.FileWriterCache.get(summary_dir)
for key in sorted(metrics):
summary = tf.Summary(value=[
tf.Summary.Value(tag=key, simple_value=metrics[key]),
])
summary_writer.add_summary(summary, global_step)
logging.info('%s: %f', key, metrics[key])
logging.info('Metrics written to tf summary.')
tf.logging.info('%s: %f', key, metrics[key])
tf.logging.info('Metrics written to tf summary.')
# TODO(rathodv): Add tests.
......@@ -141,7 +140,7 @@ def visualize_detection_results(result_dict,
if show_groundtruth and input_fields.groundtruth_boxes not in result_dict:
raise ValueError('If show_groundtruth is enabled, result_dict must contain '
'groundtruth_boxes.')
logging.info('Creating detection visualizations.')
tf.logging.info('Creating detection visualizations.')
category_index = label_map_util.create_category_index(categories)
image = np.squeeze(result_dict[input_fields.original_image], axis=0)
......@@ -205,7 +204,8 @@ def visualize_detection_results(result_dict,
summary_writer = tf.summary.FileWriterCache.get(summary_dir)
summary_writer.add_summary(summary, global_step)
logging.info('Detection visualizations written to summary with tag %s.', tag)
tf.logging.info('Detection visualizations written to summary with tag %s.',
tag)
def _run_checkpoint_once(tensor_dict,
......@@ -218,7 +218,8 @@ def _run_checkpoint_once(tensor_dict,
master='',
save_graph=False,
save_graph_dir='',
losses_dict=None):
losses_dict=None,
eval_export_path=None):
"""Evaluates metrics defined in evaluators and returns summaries.
This function loads the latest checkpoint in checkpoint_dirs and evaluates
......@@ -258,6 +259,8 @@ def _run_checkpoint_once(tensor_dict,
save_graph_dir: where to store the Tensorflow graph on disk. If save_graph
is True this must be non-empty.
losses_dict: optional dictionary of scalar detection losses.
eval_export_path: Path for saving a json file that contains the detection
results in json format.
Returns:
global_step: the count of global steps.
......@@ -292,7 +295,8 @@ def _run_checkpoint_once(tensor_dict,
try:
for batch in range(int(num_batches)):
if (batch + 1) % 100 == 0:
logging.info('Running eval ops batch %d/%d', batch + 1, num_batches)
tf.logging.info('Running eval ops batch %d/%d', batch + 1,
num_batches)
if not batch_processor:
try:
if not losses_dict:
......@@ -301,7 +305,7 @@ def _run_checkpoint_once(tensor_dict,
losses_dict])
counters['success'] += 1
except tf.errors.InvalidArgumentError:
logging.info('Skipping image')
tf.logging.info('Skipping image')
counters['skipped'] += 1
result_dict = {}
else:
......@@ -316,18 +320,31 @@ def _run_checkpoint_once(tensor_dict,
# decoders to return correct image_id.
# TODO(akuznetsa): result_dict contains batches of images, while
# add_single_ground_truth_image_info expects a single image. Fix
if (isinstance(result_dict, dict) and
result_dict[fields.InputDataFields.key]):
image_id = result_dict[fields.InputDataFields.key]
else:
image_id = batch
evaluator.add_single_ground_truth_image_info(
image_id=batch, groundtruth_dict=result_dict)
image_id=image_id, groundtruth_dict=result_dict)
evaluator.add_single_detected_image_info(
image_id=batch, detections_dict=result_dict)
logging.info('Running eval batches done.')
image_id=image_id, detections_dict=result_dict)
tf.logging.info('Running eval batches done.')
except tf.errors.OutOfRangeError:
logging.info('Done evaluating -- epoch limit reached')
tf.logging.info('Done evaluating -- epoch limit reached')
finally:
# When done, ask the threads to stop.
logging.info('# success: %d', counters['success'])
logging.info('# skipped: %d', counters['skipped'])
tf.logging.info('# success: %d', counters['success'])
tf.logging.info('# skipped: %d', counters['skipped'])
all_evaluator_metrics = {}
if eval_export_path and eval_export_path is not None:
for evaluator in evaluators:
if (isinstance(evaluator, coco_evaluation.CocoDetectionEvaluator) or
isinstance(evaluator, coco_evaluation.CocoMaskEvaluator)):
tf.logging.info('Started dumping to json file.')
evaluator.dump_detections_to_json_file(
json_output_path=eval_export_path)
tf.logging.info('Finished dumping to json file.')
for evaluator in evaluators:
metrics = evaluator.evaluate()
evaluator.clear()
......@@ -356,7 +373,8 @@ def repeated_checkpoint_run(tensor_dict,
master='',
save_graph=False,
save_graph_dir='',
losses_dict=None):
losses_dict=None,
eval_export_path=None):
"""Periodically evaluates desired tensors using checkpoint_dirs or restore_fn.
This function repeatedly loads a checkpoint and evaluates a desired
......@@ -397,6 +415,8 @@ def repeated_checkpoint_run(tensor_dict,
save_graph_dir: where to save on disk the Tensorflow graph. If store_graph
is True this must be non-empty.
losses_dict: optional dictionary of scalar detection losses.
eval_export_path: Path for saving a json file that contains the detection
results in json format.
Returns:
metrics: A dictionary containing metric names and values in the latest
......@@ -417,31 +437,36 @@ def repeated_checkpoint_run(tensor_dict,
number_of_evaluations = 0
while True:
start = time.time()
logging.info('Starting evaluation at ' + time.strftime(
tf.logging.info('Starting evaluation at ' + time.strftime(
'%Y-%m-%d-%H:%M:%S', time.gmtime()))
model_path = tf.train.latest_checkpoint(checkpoint_dirs[0])
if not model_path:
logging.info('No model found in %s. Will try again in %d seconds',
checkpoint_dirs[0], eval_interval_secs)
tf.logging.info('No model found in %s. Will try again in %d seconds',
checkpoint_dirs[0], eval_interval_secs)
elif model_path == last_evaluated_model_path:
logging.info('Found already evaluated checkpoint. Will try again in %d '
'seconds', eval_interval_secs)
tf.logging.info('Found already evaluated checkpoint. Will try again in '
'%d seconds', eval_interval_secs)
else:
last_evaluated_model_path = model_path
global_step, metrics = _run_checkpoint_once(tensor_dict, evaluators,
batch_processor,
checkpoint_dirs,
variables_to_restore,
restore_fn, num_batches,
master, save_graph,
save_graph_dir,
losses_dict=losses_dict)
global_step, metrics = _run_checkpoint_once(
tensor_dict,
evaluators,
batch_processor,
checkpoint_dirs,
variables_to_restore,
restore_fn,
num_batches,
master,
save_graph,
save_graph_dir,
losses_dict=losses_dict,
eval_export_path=eval_export_path)
write_metrics(metrics, global_step, summary_dir)
number_of_evaluations += 1
if (max_number_of_evaluations and
number_of_evaluations >= max_number_of_evaluations):
logging.info('Finished evaluation!')
tf.logging.info('Finished evaluation!')
break
time_to_next_eval = start + eval_interval_secs - time.time()
if time_to_next_eval > 0:
......@@ -680,4 +705,3 @@ def evaluator_options_from_eval_config(eval_config):
eval_config.include_metrics_per_category)
}
return evaluator_options
......@@ -2,13 +2,12 @@
We provide a collection of detection models pre-trained on the [COCO
dataset](http://mscoco.org), the [Kitti dataset](http://www.cvlibs.net/datasets/kitti/),
the [Open Images dataset](https://github.com/openimages/dataset) and the
[AVA v2.1 dataset](https://research.google.com/ava/). These models can
be useful for
out-of-the-box inference if you are interested in categories already in COCO
(e.g., humans, cars, etc) or in Open Images (e.g.,
surfboard, jacuzzi, etc). They are also useful for initializing your models when
training on novel datasets.
the [Open Images dataset](https://github.com/openimages/dataset), the
[AVA v2.1 dataset](https://research.google.com/ava/) and the
[iNaturalist Species Detection Dataset](https://github.com/visipedia/inat_comp/blob/master/2017/README.md#bounding-boxes).
These models can be useful for out-of-the-box inference if you are interested in
categories already in those datasets. They are also useful for initializing your
models when training on novel datasets.
In the table below, we list each such pre-trained model including:
......@@ -113,6 +112,13 @@ Model name
[faster_rcnn_inception_resnet_v2_atrous_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2018_01_28.tar.gz) | 727 | 37 | Boxes
[faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz) | 347 | | Boxes
## iNaturalist Species-trained models
Model name | Speed (ms) | Pascal mAP@0.5 | Outputs
----------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---: | :-------------: | :-----:
[faster_rcnn_resnet101_fgvc](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_fgvc_2018_07_19.tar.gz) | 395 | 58 | Boxes
[faster_rcnn_resnet50_fgvc](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_fgvc_2018_07_19.tar.gz) | 366 | 55 | Boxes
## AVA v2.1 trained models
......
......@@ -37,12 +37,12 @@ A local training job can be run with the following command:
PIPELINE_CONFIG_PATH={path to pipeline config file}
MODEL_DIR={path to model directory}
NUM_TRAIN_STEPS=50000
NUM_EVAL_STEPS=2000
SAMPLE_1_OF_N_EVAL_EXAMPLES=1
python object_detection/model_main.py \
--pipeline_config_path=${PIPELINE_CONFIG_PATH} \
--model_dir=${MODEL_DIR} \
--num_train_steps=${NUM_TRAIN_STEPS} \
--num_eval_steps=${NUM_EVAL_STEPS} \
--sample_1_of_n_eval_examples=$SAMPLE_1_OF_N_EVAL_EXAMPLES \
--alsologtostderr
```
......
......@@ -216,7 +216,7 @@ To start training and evaluation, execute the following command from the
```bash
# From tensorflow/models/research/
gcloud ml-engine jobs submit training `whoami`_object_detection_pets_`date +%m_%d_%Y_%H_%M_%S` \
--runtime-version 1.9 \
--runtime-version 1.8 \
--job-dir=gs://${YOUR_GCS_BUCKET}/model_dir \
--packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz,/tmp/pycocotools/pycocotools-2.0.tar.gz \
--module-name object_detection.model_main \
......
......@@ -52,7 +52,8 @@ def transform_input_data(tensor_dict,
num_classes,
data_augmentation_fn=None,
merge_multiple_boxes=False,
retain_original_image=False):
retain_original_image=False,
use_bfloat16=False):
"""A single function that is responsible for all input data transformations.
Data transformation functions are applied in the following order.
......@@ -86,6 +87,7 @@ def transform_input_data(tensor_dict,
and classes for a given image if the boxes are exactly the same.
retain_original_image: (optional) whether to retain original image in the
output dictionary.
use_bfloat16: (optional) a bool, whether to use bfloat16 in training.
Returns:
A dictionary keyed by fields.InputDataFields containing the tensors obtained
......@@ -101,7 +103,8 @@ def transform_input_data(tensor_dict,
if retain_original_image:
tensor_dict[fields.InputDataFields.original_image] = tf.cast(
tensor_dict[fields.InputDataFields.image], tf.uint8)
image_resizer_fn(tensor_dict[fields.InputDataFields.image], None)[0],
tf.uint8)
# Apply data augmentation ops.
if data_augmentation_fn is not None:
......@@ -111,6 +114,9 @@ def transform_input_data(tensor_dict,
image = tensor_dict[fields.InputDataFields.image]
preprocessed_resized_image, true_image_shape = model_preprocess_fn(
tf.expand_dims(tf.to_float(image), axis=0))
if use_bfloat16:
preprocessed_resized_image = tf.cast(
preprocessed_resized_image, tf.bfloat16)
tensor_dict[fields.InputDataFields.image] = tf.squeeze(
preprocessed_resized_image, axis=0)
tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
......@@ -128,13 +134,33 @@ def transform_input_data(tensor_dict,
tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot(
zero_indexed_groundtruth_classes, num_classes)
if fields.InputDataFields.groundtruth_confidences in tensor_dict:
groundtruth_confidences = tensor_dict[
fields.InputDataFields.groundtruth_confidences]
tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
tf.sparse_to_dense(
zero_indexed_groundtruth_classes,
[num_classes],
groundtruth_confidences,
validate_indices=False))
else:
groundtruth_confidences = tf.ones_like(
zero_indexed_groundtruth_classes, dtype=tf.float32)
tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
tensor_dict[fields.InputDataFields.groundtruth_classes])
if merge_multiple_boxes:
merged_boxes, merged_classes, _ = util_ops.merge_boxes_with_multiple_labels(
tensor_dict[fields.InputDataFields.groundtruth_boxes],
zero_indexed_groundtruth_classes, num_classes)
merged_boxes, merged_classes, merged_confidences, _ = (
util_ops.merge_boxes_with_multiple_labels(
tensor_dict[fields.InputDataFields.groundtruth_boxes],
zero_indexed_groundtruth_classes,
groundtruth_confidences,
num_classes))
merged_classes = tf.cast(merged_classes, tf.float32)
tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes
tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes
tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
merged_confidences)
return tensor_dict
......@@ -174,6 +200,7 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
fields.InputDataFields.image: [
height, width, 3 + num_additional_channels
],
fields.InputDataFields.original_image_spatial_shape: [2],
fields.InputDataFields.image_additional_channels: [
height, width, num_additional_channels
],
......@@ -183,6 +210,8 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
fields.InputDataFields.groundtruth_difficult: [max_num_boxes],
fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4],
fields.InputDataFields.groundtruth_classes: [max_num_boxes, num_classes],
fields.InputDataFields.groundtruth_confidences: [
max_num_boxes, num_classes],
fields.InputDataFields.groundtruth_instance_masks: [
max_num_boxes, height, width
],
......@@ -198,11 +227,12 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
max_num_boxes, num_classes + 1 if num_classes is not None else None
],
fields.InputDataFields.groundtruth_image_classes: [num_classes],
fields.InputDataFields.groundtruth_image_confidences: [num_classes],
}
if fields.InputDataFields.original_image in tensor_dict:
padding_shapes[fields.InputDataFields.original_image] = [
None, None, 3 + num_additional_channels
height, width, 3 + num_additional_channels
]
if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
tensor_shape = (
......@@ -252,9 +282,12 @@ def augment_input_data(tensor_dict, data_augmentation_options):
in tensor_dict)
include_keypoints = (fields.InputDataFields.groundtruth_keypoints
in tensor_dict)
include_label_scores = (fields.InputDataFields.groundtruth_confidences in
tensor_dict)
tensor_dict = preprocessor.preprocess(
tensor_dict, data_augmentation_options,
func_arg_map=preprocessor.get_default_func_arg_map(
include_label_scores=include_label_scores,
include_instance_masks=include_instance_masks,
include_keypoints=include_keypoints))
tensor_dict[fields.InputDataFields.image] = tf.squeeze(
......@@ -275,6 +308,7 @@ def _get_labels_dict(input_dict):
labels_dict[key] = input_dict[key]
optional_label_keys = [
fields.InputDataFields.groundtruth_confidences,
fields.InputDataFields.groundtruth_keypoints,
fields.InputDataFields.groundtruth_instance_masks,
fields.InputDataFields.groundtruth_area,
......@@ -291,16 +325,50 @@ def _get_labels_dict(input_dict):
return labels_dict
def _replace_empty_string_with_random_number(string_tensor):
"""Returns string unchanged if non-empty, and random string tensor otherwise.
The random string is an integer 0 and 2**63 - 1, casted as string.
Args:
string_tensor: A tf.tensor of dtype string.
Returns:
out_string: A tf.tensor of dtype string. If string_tensor contains the empty
string, out_string will contain a random integer casted to a string.
Otherwise string_tensor is returned unchanged.
"""
empty_string = tf.constant('', dtype=tf.string, name='EmptyString')
random_source_id = tf.as_string(
tf.random_uniform(shape=[], maxval=2**63 - 1, dtype=tf.int64))
out_string = tf.cond(
tf.equal(string_tensor, empty_string),
true_fn=lambda: random_source_id,
false_fn=lambda: string_tensor)
return out_string
def _get_features_dict(input_dict):
"""Extracts features dict from input dict."""
hash_from_source_id = tf.string_to_hash_bucket_fast(
input_dict[fields.InputDataFields.source_id], HASH_BINS)
source_id = _replace_empty_string_with_random_number(
input_dict[fields.InputDataFields.source_id])
hash_from_source_id = tf.string_to_hash_bucket_fast(source_id, HASH_BINS)
features = {
fields.InputDataFields.image:
input_dict[fields.InputDataFields.image],
HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
fields.InputDataFields.true_image_shape:
input_dict[fields.InputDataFields.true_image_shape]
input_dict[fields.InputDataFields.true_image_shape],
fields.InputDataFields.original_image_spatial_shape:
input_dict[fields.InputDataFields.original_image_spatial_shape]
}
if fields.InputDataFields.original_image in input_dict:
features[fields.InputDataFields.original_image] = input_dict[
......@@ -392,7 +460,8 @@ def create_train_input_fn(train_config, train_input_config,
num_classes=config_util.get_number_of_classes(model_config),
data_augmentation_fn=data_augmentation_fn,
merge_multiple_boxes=train_config.merge_multiple_label_boxes,
retain_original_image=train_config.retain_original_images)
retain_original_image=train_config.retain_original_images,
use_bfloat16=train_config.use_bfloat16)
tensor_dict = pad_input_data_to_static_shapes(
tensor_dict=transform_data_fn(tensor_dict),
......@@ -414,8 +483,6 @@ def create_train_input_fn(train_config, train_input_config,
def create_eval_input_fn(eval_config, eval_input_config, model_config):
"""Creates an eval `input` function for `Estimator`.
# TODO(ronnyvotel,rathodv): Allow batch sizes of more than 1 for eval.
Args:
eval_config: An eval_pb2.EvalConfig.
eval_input_config: An input_reader_pb2.InputReader.
......@@ -497,7 +564,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config):
return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict))
dataset = INPUT_BUILDER_UTIL_MAP['dataset_build'](
eval_input_config,
batch_size=1, # Currently only support batch size of 1 for eval.
batch_size=params['batch_size'] if params else eval_config.batch_size,
transform_input_data_fn=transform_and_pad_input_data_fn)
return dataset
......
......@@ -20,6 +20,7 @@ from __future__ import print_function
import functools
import os
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
......@@ -28,6 +29,7 @@ from object_detection import inputs
from object_detection.core import preprocessor
from object_detection.core import standard_fields as fields
from object_detection.utils import config_util
from object_detection.utils import test_case
FLAGS = tf.flags.FLAGS
......@@ -41,11 +43,13 @@ def _get_configs_for_model(model_name):
data_path = os.path.join(tf.resource_loader.get_data_files_path(),
'test_data/pets_examples.record')
configs = config_util.get_configs_from_pipeline_file(fname)
override_dict = {
'train_input_path': data_path,
'eval_input_path': data_path,
'label_map_path': label_map_path
}
return config_util.merge_external_params_with_configs(
configs,
train_input_path=data_path,
eval_input_path=data_path,
label_map_path=label_map_path)
configs, kwargs_dict=override_dict)
def _make_initializable_iterator(dataset):
......@@ -62,7 +66,7 @@ def _make_initializable_iterator(dataset):
return iterator
class InputsTest(tf.test.TestCase):
class InputsTest(test_case.TestCase, parameterized.TestCase):
def test_faster_rcnn_resnet50_train_input(self):
"""Tests the training input function for FasterRcnnResnet50."""
......@@ -89,52 +93,71 @@ class InputsTest(tf.test.TestCase):
labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_classes].dtype)
self.assertAllEqual(
[1, 100, model_config.faster_rcnn.num_classes],
labels[fields.InputDataFields.groundtruth_confidences].shape.as_list())
self.assertEqual(
tf.float32,
labels[fields.InputDataFields.groundtruth_confidences].dtype)
self.assertAllEqual(
[1, 100],
labels[fields.InputDataFields.groundtruth_weights].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_weights].dtype)
def test_faster_rcnn_resnet50_eval_input(self):
@parameterized.parameters(
{'eval_batch_size': 1},
{'eval_batch_size': 8}
)
def test_faster_rcnn_resnet50_eval_input(self, eval_batch_size=1):
"""Tests the eval input function for FasterRcnnResnet50."""
configs = _get_configs_for_model('faster_rcnn_resnet50_pets')
model_config = configs['model']
model_config.faster_rcnn.num_classes = 37
eval_config = configs['eval_config']
eval_config.batch_size = eval_batch_size
eval_input_fn = inputs.create_eval_input_fn(
configs['eval_config'], configs['eval_input_config'], model_config)
eval_config, configs['eval_input_configs'][0], model_config)
features, labels = _make_initializable_iterator(eval_input_fn()).get_next()
self.assertAllEqual([1, None, None, 3],
self.assertAllEqual([eval_batch_size, None, None, 3],
features[fields.InputDataFields.image].shape.as_list())
self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype)
self.assertAllEqual(
[1, None, None, 3],
[eval_batch_size, None, None, 3],
features[fields.InputDataFields.original_image].shape.as_list())
self.assertEqual(tf.uint8,
features[fields.InputDataFields.original_image].dtype)
self.assertAllEqual([1], features[inputs.HASH_KEY].shape.as_list())
self.assertAllEqual([eval_batch_size],
features[inputs.HASH_KEY].shape.as_list())
self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype)
self.assertAllEqual(
[1, 100, 4],
[eval_batch_size, 100, 4],
labels[fields.InputDataFields.groundtruth_boxes].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_boxes].dtype)
self.assertAllEqual(
[1, 100, model_config.faster_rcnn.num_classes],
[eval_batch_size, 100, model_config.faster_rcnn.num_classes],
labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_classes].dtype)
self.assertAllEqual(
[1, 100],
[eval_batch_size, 100, model_config.faster_rcnn.num_classes],
labels[fields.InputDataFields.groundtruth_confidences].shape.as_list())
self.assertEqual(
tf.float32,
labels[fields.InputDataFields.groundtruth_confidences].dtype)
self.assertAllEqual(
[eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_area].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_area].dtype)
self.assertAllEqual(
[1, 100],
[eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list())
self.assertEqual(
tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype)
self.assertAllEqual(
[1, 100],
[eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_difficult].shape.as_list())
self.assertEqual(
tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype)
......@@ -170,52 +193,73 @@ class InputsTest(tf.test.TestCase):
labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_classes].dtype)
self.assertAllEqual(
[batch_size, 100, model_config.ssd.num_classes],
labels[
fields.InputDataFields.groundtruth_confidences].shape.as_list())
self.assertEqual(
tf.float32,
labels[fields.InputDataFields.groundtruth_confidences].dtype)
self.assertAllEqual(
[batch_size, 100],
labels[fields.InputDataFields.groundtruth_weights].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_weights].dtype)
def test_ssd_inceptionV2_eval_input(self):
@parameterized.parameters(
{'eval_batch_size': 1},
{'eval_batch_size': 8}
)
def test_ssd_inceptionV2_eval_input(self, eval_batch_size=1):
"""Tests the eval input function for SSDInceptionV2."""
configs = _get_configs_for_model('ssd_inception_v2_pets')
model_config = configs['model']
model_config.ssd.num_classes = 37
eval_config = configs['eval_config']
eval_config.batch_size = eval_batch_size
eval_input_fn = inputs.create_eval_input_fn(
configs['eval_config'], configs['eval_input_config'], model_config)
eval_config, configs['eval_input_configs'][0], model_config)
features, labels = _make_initializable_iterator(eval_input_fn()).get_next()
self.assertAllEqual([1, 300, 300, 3],
self.assertAllEqual([eval_batch_size, 300, 300, 3],
features[fields.InputDataFields.image].shape.as_list())
self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype)
self.assertAllEqual(
[1, None, None, 3],
[eval_batch_size, 300, 300, 3],
features[fields.InputDataFields.original_image].shape.as_list())
self.assertEqual(tf.uint8,
features[fields.InputDataFields.original_image].dtype)
self.assertAllEqual([1], features[inputs.HASH_KEY].shape.as_list())
self.assertAllEqual([eval_batch_size],
features[inputs.HASH_KEY].shape.as_list())
self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype)
self.assertAllEqual(
[1, 100, 4],
[eval_batch_size, 100, 4],
labels[fields.InputDataFields.groundtruth_boxes].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_boxes].dtype)
self.assertAllEqual(
[1, 100, model_config.ssd.num_classes],
[eval_batch_size, 100, model_config.ssd.num_classes],
labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_classes].dtype)
self.assertAllEqual(
[1, 100],
[eval_batch_size, 100, model_config.ssd.num_classes],
labels[
fields.InputDataFields.groundtruth_confidences].shape.as_list())
self.assertEqual(
tf.float32,
labels[fields.InputDataFields.groundtruth_confidences].dtype)
self.assertAllEqual(
[eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_area].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_area].dtype)
self.assertAllEqual(
[1, 100],
[eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list())
self.assertEqual(
tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype)
self.assertAllEqual(
[1, 100],
[eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_difficult].shape.as_list())
self.assertEqual(
tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype)
......@@ -225,7 +269,7 @@ class InputsTest(tf.test.TestCase):
configs = _get_configs_for_model('ssd_inception_v2_pets')
predict_input_fn = inputs.create_predict_input_fn(
model_config=configs['model'],
predict_input_config=configs['eval_input_config'])
predict_input_config=configs['eval_input_configs'][0])
serving_input_receiver = predict_input_fn()
image = serving_input_receiver.features[fields.InputDataFields.image]
......@@ -238,10 +282,10 @@ class InputsTest(tf.test.TestCase):
def test_predict_input_with_additional_channels(self):
"""Tests the predict input function with additional channels."""
configs = _get_configs_for_model('ssd_inception_v2_pets')
configs['eval_input_config'].num_additional_channels = 2
configs['eval_input_configs'][0].num_additional_channels = 2
predict_input_fn = inputs.create_predict_input_fn(
model_config=configs['model'],
predict_input_config=configs['eval_input_config'])
predict_input_config=configs['eval_input_configs'][0])
serving_input_receiver = predict_input_fn()
image = serving_input_receiver.features[fields.InputDataFields.image]
......@@ -291,7 +335,7 @@ class InputsTest(tf.test.TestCase):
configs['model'].ssd.num_classes = 37
eval_input_fn = inputs.create_eval_input_fn(
eval_config=configs['train_config'], # Expecting `EvalConfig`.
eval_input_config=configs['eval_input_config'],
eval_input_config=configs['eval_input_configs'][0],
model_config=configs['model'])
with self.assertRaises(TypeError):
eval_input_fn()
......@@ -313,13 +357,45 @@ class InputsTest(tf.test.TestCase):
configs['model'].ssd.num_classes = 37
eval_input_fn = inputs.create_eval_input_fn(
eval_config=configs['eval_config'],
eval_input_config=configs['eval_input_config'],
eval_input_config=configs['eval_input_configs'][0],
model_config=configs['eval_config']) # Expecting `DetectionModel`.
with self.assertRaises(TypeError):
eval_input_fn()
def test_output_equal_in_replace_empty_string_with_random_number(self):
string_placeholder = tf.placeholder(tf.string, shape=[])
replaced_string = inputs._replace_empty_string_with_random_number(
string_placeholder)
test_string = 'hello world'
feed_dict = {string_placeholder: test_string}
with self.test_session() as sess:
out_string = sess.run(replaced_string, feed_dict=feed_dict)
self.assertEqual(test_string, out_string)
def test_output_is_integer_in_replace_empty_string_with_random_number(self):
string_placeholder = tf.placeholder(tf.string, shape=[])
replaced_string = inputs._replace_empty_string_with_random_number(
string_placeholder)
empty_string = ''
feed_dict = {string_placeholder: empty_string}
tf.set_random_seed(0)
with self.test_session() as sess:
out_string = sess.run(replaced_string, feed_dict=feed_dict)
# Test whether out_string is a string which represents an integer.
int(out_string) # throws an error if out_string is not castable to int.
self.assertEqual(out_string, '2798129067578209328')
class DataAugmentationFnTest(tf.test.TestCase):
class DataAugmentationFnTest(test_case.TestCase):
def test_apply_image_and_box_augmentation(self):
data_augmentation_options = [
......@@ -352,6 +428,50 @@ class DataAugmentationFnTest(tf.test.TestCase):
[[10, 10, 20, 20]]
)
def test_apply_image_and_box_augmentation_with_scores(self):
data_augmentation_options = [
(preprocessor.resize_image, {
'new_height': 20,
'new_width': 20,
'method': tf.image.ResizeMethod.NEAREST_NEIGHBOR
}),
(preprocessor.scale_boxes_to_pixel_coordinates, {}),
]
data_augmentation_fn = functools.partial(
inputs.augment_input_data,
data_augmentation_options=data_augmentation_options)
tensor_dict = {
fields.InputDataFields.image:
tf.constant(np.random.rand(10, 10, 3).astype(np.float32)),
fields.InputDataFields.groundtruth_boxes:
tf.constant(np.array([[.5, .5, 1., 1.]], np.float32)),
fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([1.0], np.float32)),
fields.InputDataFields.groundtruth_confidences:
tf.constant(np.array([0.8], np.float32)),
}
augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict)
with self.test_session() as sess:
augmented_tensor_dict_out = sess.run(augmented_tensor_dict)
self.assertAllEqual(
augmented_tensor_dict_out[fields.InputDataFields.image].shape,
[20, 20, 3]
)
self.assertAllClose(
augmented_tensor_dict_out[fields.InputDataFields.groundtruth_boxes],
[[10, 10, 20, 20]]
)
self.assertAllClose(
augmented_tensor_dict_out[fields.InputDataFields.groundtruth_classes],
[1.0]
)
self.assertAllClose(
augmented_tensor_dict_out[
fields.InputDataFields.groundtruth_confidences],
[0.8]
)
def test_include_masks_in_data_augmentation(self):
data_augmentation_options = [
(preprocessor.resize_image, {
......@@ -425,7 +545,7 @@ def _fake_image_resizer_fn(image, mask):
return (image, mask, tf.shape(image))
class DataTransformationFnTest(tf.test.TestCase):
class DataTransformationFnTest(test_case.TestCase):
def test_combine_additional_channels_if_present(self):
image = np.random.rand(4, 4, 3).astype(np.float32)
......@@ -476,6 +596,9 @@ class DataTransformationFnTest(tf.test.TestCase):
self.assertAllClose(
transformed_inputs[fields.InputDataFields.groundtruth_classes],
[[0, 0, 1], [1, 0, 0]])
self.assertAllClose(
transformed_inputs[fields.InputDataFields.groundtruth_confidences],
[[0, 0, 1], [1, 0, 0]])
def test_returns_correct_merged_boxes(self):
tensor_dict = {
......@@ -504,6 +627,9 @@ class DataTransformationFnTest(tf.test.TestCase):
self.assertAllClose(
transformed_inputs[fields.InputDataFields.groundtruth_classes],
[[1, 0, 1]])
self.assertAllClose(
transformed_inputs[fields.InputDataFields.groundtruth_confidences],
[[1, 0, 1]])
def test_returns_resized_masks(self):
tensor_dict = {
......@@ -512,8 +638,11 @@ class DataTransformationFnTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_instance_masks:
tf.constant(np.random.rand(2, 4, 4).astype(np.float32)),
fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([3, 1], np.int32))
tf.constant(np.array([3, 1], np.int32)),
fields.InputDataFields.original_image_spatial_shape:
tf.constant(np.array([4, 4], np.int32))
}
def fake_image_resizer_fn(image, masks=None):
resized_image = tf.image.resize_images(image, [8, 8])
results = [resized_image]
......@@ -538,7 +667,9 @@ class DataTransformationFnTest(tf.test.TestCase):
self.assertAllEqual(transformed_inputs[
fields.InputDataFields.original_image].dtype, tf.uint8)
self.assertAllEqual(transformed_inputs[
fields.InputDataFields.original_image].shape, [4, 4, 3])
fields.InputDataFields.original_image_spatial_shape], [4, 4])
self.assertAllEqual(transformed_inputs[
fields.InputDataFields.original_image].shape, [8, 8, 3])
self.assertAllEqual(transformed_inputs[
fields.InputDataFields.groundtruth_instance_masks].shape, [2, 8, 8])
......@@ -550,6 +681,7 @@ class DataTransformationFnTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([3, 1], np.int32))
}
def fake_model_preprocessor_fn(image):
return (image / 255., tf.expand_dims(tf.shape(image)[1:], axis=0))
......@@ -577,6 +709,7 @@ class DataTransformationFnTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([3, 1], np.int32))
}
def add_one_data_augmentation_fn(tensor_dict):
return {key: value + 1 for key, value in tensor_dict.items()}
......@@ -605,8 +738,10 @@ class DataTransformationFnTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([3, 1], np.int32))
}
def mul_two_model_preprocessor_fn(image):
return (image * 2, tf.expand_dims(tf.shape(image)[1:], axis=0))
def add_five_to_image_data_augmentation_fn(tensor_dict):
tensor_dict[fields.InputDataFields.image] += 5
return tensor_dict
......@@ -626,7 +761,7 @@ class DataTransformationFnTest(tf.test.TestCase):
(np_image + 5) * 2)
class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
class PadInputDataToStaticShapesFnTest(test_case.TestCase):
def test_pad_images_boxes_and_classes(self):
input_tensor_dict = {
......@@ -636,7 +771,10 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
tf.placeholder(tf.float32, [None, 4]),
fields.InputDataFields.groundtruth_classes:
tf.placeholder(tf.int32, [None, 3]),
fields.InputDataFields.true_image_shape: tf.placeholder(tf.int32, [3]),
fields.InputDataFields.true_image_shape:
tf.placeholder(tf.int32, [3]),
fields.InputDataFields.original_image_spatial_shape:
tf.placeholder(tf.int32, [2])
}
padded_tensor_dict = inputs.pad_input_data_to_static_shapes(
tensor_dict=input_tensor_dict,
......@@ -650,6 +788,9 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
self.assertAllEqual(
padded_tensor_dict[fields.InputDataFields.true_image_shape]
.shape.as_list(), [3])
self.assertAllEqual(
padded_tensor_dict[fields.InputDataFields.original_image_spatial_shape]
.shape.as_list(), [2])
self.assertAllEqual(
padded_tensor_dict[fields.InputDataFields.groundtruth_boxes]
.shape.as_list(), [3, 4])
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Evaluation executable for detection models.
This executable is used to evaluate DetectionModels. There are two ways of
......@@ -54,29 +53,30 @@ from object_detection.legacy import evaluator
from object_detection.utils import config_util
from object_detection.utils import label_map_util
tf.logging.set_verbosity(tf.logging.INFO)
flags = tf.app.flags
flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.')
flags.DEFINE_string('checkpoint_dir', '',
'Directory containing checkpoints to evaluate, typically '
'set to `train_dir` used in the training job.')
flags.DEFINE_string('eval_dir', '',
'Directory to write eval summaries to.')
flags.DEFINE_string('pipeline_config_path', '',
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file. If provided, other configs are ignored')
flags.DEFINE_string(
'checkpoint_dir', '',
'Directory containing checkpoints to evaluate, typically '
'set to `train_dir` used in the training job.')
flags.DEFINE_string('eval_dir', '', 'Directory to write eval summaries to.')
flags.DEFINE_string(
'pipeline_config_path', '',
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file. If provided, other configs are ignored')
flags.DEFINE_string('eval_config_path', '',
'Path to an eval_pb2.EvalConfig config file.')
flags.DEFINE_string('input_config_path', '',
'Path to an input_reader_pb2.InputReader config file.')
flags.DEFINE_string('model_config_path', '',
'Path to a model_pb2.DetectionModel config file.')
flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of '
'evaluation. Overrides the `max_evals` parameter in the '
'provided config.')
flags.DEFINE_boolean(
'run_once', False, 'Option to only run a single pass of '
'evaluation. Overrides the `max_evals` parameter in the '
'provided config.')
FLAGS = flags.FLAGS
......@@ -88,9 +88,10 @@ def main(unused_argv):
if FLAGS.pipeline_config_path:
configs = config_util.get_configs_from_pipeline_file(
FLAGS.pipeline_config_path)
tf.gfile.Copy(FLAGS.pipeline_config_path,
os.path.join(FLAGS.eval_dir, 'pipeline.config'),
overwrite=True)
tf.gfile.Copy(
FLAGS.pipeline_config_path,
os.path.join(FLAGS.eval_dir, 'pipeline.config'),
overwrite=True)
else:
configs = config_util.get_configs_from_multiple_files(
model_config_path=FLAGS.model_config_path,
......@@ -99,9 +100,7 @@ def main(unused_argv):
for name, config in [('model.config', FLAGS.model_config_path),
('eval.config', FLAGS.eval_config_path),
('input.config', FLAGS.input_config_path)]:
tf.gfile.Copy(config,
os.path.join(FLAGS.eval_dir, name),
overwrite=True)
tf.gfile.Copy(config, os.path.join(FLAGS.eval_dir, name), overwrite=True)
model_config = configs['model']
eval_config = configs['eval_config']
......@@ -110,9 +109,7 @@ def main(unused_argv):
input_config = configs['train_input_config']
model_fn = functools.partial(
model_builder.build,
model_config=model_config,
is_training=False)
model_builder.build, model_config=model_config, is_training=False)
def get_next(config):
return dataset_builder.make_initializable_iterator(
......@@ -120,10 +117,8 @@ def main(unused_argv):
create_input_dict_fn = functools.partial(get_next, input_config)
label_map = label_map_util.load_labelmap(input_config.label_map_path)
max_num_classes = max([item.id for item in label_map.item])
categories = label_map_util.convert_label_map_to_categories(
label_map, max_num_classes)
categories = label_map_util.create_categories_from_labelmap(
input_config.label_map_path)
if FLAGS.run_once:
eval_config.max_evals = 1
......
......@@ -273,6 +273,7 @@ def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
master=eval_config.eval_master,
save_graph=eval_config.save_graph,
save_graph_dir=(eval_dir if eval_config.save_graph else ''),
losses_dict=losses_dict)
losses_dict=losses_dict,
eval_export_path=eval_config.export_path)
return metrics
......@@ -99,17 +99,19 @@ class ArgMaxMatcher(matcher.Matcher):
if self._unmatched_threshold == self._matched_threshold:
raise ValueError('When negatives are in between matched and '
'unmatched thresholds, these cannot be of equal '
'value. matched: %s, unmatched: %s',
self._matched_threshold, self._unmatched_threshold)
'value. matched: {}, unmatched: {}'.format(
self._matched_threshold,
self._unmatched_threshold))
self._force_match_for_each_row = force_match_for_each_row
self._negatives_lower_than_unmatched = negatives_lower_than_unmatched
def _match(self, similarity_matrix):
def _match(self, similarity_matrix, valid_rows):
"""Tries to match each column of the similarity matrix to a row.
Args:
similarity_matrix: tensor of shape [N, M] representing any similarity
metric.
valid_rows: a boolean tensor of shape [N] indicating valid rows.
Returns:
Match object with corresponding matches for each of M columns.
......@@ -167,8 +169,10 @@ class ArgMaxMatcher(matcher.Matcher):
similarity_matrix)
force_match_column_ids = tf.argmax(similarity_matrix, 1,
output_type=tf.int32)
force_match_column_indicators = tf.one_hot(
force_match_column_ids, depth=similarity_matrix_shape[1])
force_match_column_indicators = (
tf.one_hot(
force_match_column_ids, depth=similarity_matrix_shape[1]) *
tf.cast(tf.expand_dims(valid_rows, axis=-1), dtype=tf.float32))
force_match_row_ids = tf.argmax(force_match_column_indicators, 0,
output_type=tf.int32)
force_match_column_mask = tf.cast(
......
......@@ -182,6 +182,34 @@ class ArgMaxMatcherTest(test_case.TestCase):
self.assertAllEqual(np.nonzero(res_unmatched_cols)[0],
expected_unmatched_cols)
def test_return_correct_matches_using_force_match_padded_groundtruth(self):
def graph_fn(similarity, valid_rows):
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3.,
unmatched_threshold=2.,
force_match_for_each_row=True)
match = matcher.match(similarity, valid_rows)
matched_cols = match.matched_column_indicator()
unmatched_cols = match.unmatched_column_indicator()
match_results = match.match_results
return (matched_cols, unmatched_cols, match_results)
similarity = np.array([[1, 1, 1, 3, 1],
[-1, 0, -2, -2, -1],
[0, 0, 0, 0, 0],
[3, 0, -1, 2, 0],
[0, 0, 0, 0, 0]], dtype=np.float32)
valid_rows = np.array([True, True, False, True, False])
expected_matched_cols = np.array([0, 1, 3])
expected_matched_rows = np.array([3, 1, 0])
expected_unmatched_cols = np.array([2, 4]) # col 2 has too high max val
(res_matched_cols, res_unmatched_cols,
match_results) = self.execute(graph_fn, [similarity, valid_rows])
self.assertAllEqual(match_results[res_matched_cols], expected_matched_rows)
self.assertAllEqual(np.nonzero(res_matched_cols)[0], expected_matched_cols)
self.assertAllEqual(np.nonzero(res_unmatched_cols)[0],
expected_unmatched_cols)
def test_valid_arguments_corner_case(self):
argmax_matcher.ArgMaxMatcher(matched_threshold=1,
unmatched_threshold=1)
......
......@@ -35,7 +35,7 @@ class GreedyBipartiteMatcher(matcher.Matcher):
super(GreedyBipartiteMatcher, self).__init__(
use_matmul_gather=use_matmul_gather)
def _match(self, similarity_matrix, num_valid_rows=-1):
def _match(self, similarity_matrix, valid_rows):
"""Bipartite matches a collection rows and columns. A greedy bi-partite.
TODO(rathodv): Add num_valid_columns options to match only that many columns
......@@ -44,21 +44,27 @@ class GreedyBipartiteMatcher(matcher.Matcher):
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher values mean more similar.
num_valid_rows: A scalar or a 1-D tensor with one element describing the
number of valid rows of similarity_matrix to consider for the bipartite
matching. If set to be negative, then all rows from similarity_matrix
are used.
valid_rows: A boolean tensor of shape [N] indicating the rows that are
valid.
Returns:
match_results: int32 tensor of shape [M] with match_results[i]=-1
meaning that column i is not matched and otherwise that it is matched to
row match_results[i].
"""
valid_row_sim_matrix = tf.gather(similarity_matrix,
tf.squeeze(tf.where(valid_rows), axis=-1))
invalid_row_sim_matrix = tf.gather(
similarity_matrix,
tf.squeeze(tf.where(tf.logical_not(valid_rows)), axis=-1))
similarity_matrix = tf.concat(
[valid_row_sim_matrix, invalid_row_sim_matrix], axis=0)
# Convert similarity matrix to distance matrix as tf.image.bipartite tries
# to find minimum distance matches.
distance_matrix = -1 * similarity_matrix
num_valid_rows = tf.reduce_sum(tf.to_float(valid_rows))
_, match_results = image_ops.bipartite_match(
distance_matrix, num_valid_rows)
distance_matrix, num_valid_rows=num_valid_rows)
match_results = tf.reshape(match_results, [-1])
match_results = tf.cast(match_results, tf.int32)
return match_results
......@@ -24,44 +24,54 @@ class GreedyBipartiteMatcherTest(tf.test.TestCase):
def test_get_expected_matches_when_all_rows_are_valid(self):
similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]])
num_valid_rows = 2
valid_rows = tf.ones([2], dtype=tf.bool)
expected_match_results = [-1, 1, 0]
matcher = bipartite_matcher.GreedyBipartiteMatcher()
match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows)
match = matcher.match(similarity_matrix, valid_rows=valid_rows)
with self.test_session() as sess:
match_results_out = sess.run(match._match_results)
self.assertAllEqual(match_results_out, expected_match_results)
def test_get_expected_matches_with_valid_rows_set_to_minus_one(self):
def test_get_expected_matches_with_all_rows_be_default(self):
similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]])
num_valid_rows = -1
expected_match_results = [-1, 1, 0]
matcher = bipartite_matcher.GreedyBipartiteMatcher()
match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows)
match = matcher.match(similarity_matrix)
with self.test_session() as sess:
match_results_out = sess.run(match._match_results)
self.assertAllEqual(match_results_out, expected_match_results)
def test_get_no_matches_with_zero_valid_rows(self):
similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]])
num_valid_rows = 0
valid_rows = tf.zeros([2], dtype=tf.bool)
expected_match_results = [-1, -1, -1]
matcher = bipartite_matcher.GreedyBipartiteMatcher()
match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows)
match = matcher.match(similarity_matrix, valid_rows)
with self.test_session() as sess:
match_results_out = sess.run(match._match_results)
self.assertAllEqual(match_results_out, expected_match_results)
def test_get_expected_matches_with_only_one_valid_row(self):
similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]])
num_valid_rows = 1
valid_rows = tf.constant([True, False], dtype=tf.bool)
expected_match_results = [-1, -1, 0]
matcher = bipartite_matcher.GreedyBipartiteMatcher()
match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows)
match = matcher.match(similarity_matrix, valid_rows)
with self.test_session() as sess:
match_results_out = sess.run(match._match_results)
self.assertAllEqual(match_results_out, expected_match_results)
def test_get_expected_matches_with_only_one_valid_row_at_bottom(self):
similarity_matrix = tf.constant([[0.15, 0.2, 0.3], [0.50, 0.1, 0.8]])
valid_rows = tf.constant([False, True], dtype=tf.bool)
expected_match_results = [-1, -1, 0]
matcher = bipartite_matcher.GreedyBipartiteMatcher()
match = matcher.match(similarity_matrix, valid_rows)
with self.test_session() as sess:
match_results_out = sess.run(match._match_results)
self.assertAllEqual(match_results_out, expected_match_results)
......
......@@ -103,7 +103,6 @@ from object_detection.core import box_list_ops
from object_detection.core import box_predictor
from object_detection.core import losses
from object_detection.core import model
from object_detection.core import post_processing
from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner
from object_detection.utils import ops
......@@ -234,11 +233,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
first_stage_box_predictor_depth,
first_stage_minibatch_size,
first_stage_sampler,
first_stage_nms_score_threshold,
first_stage_nms_iou_threshold,
first_stage_non_max_suppression_fn,
first_stage_max_proposals,
first_stage_localization_loss_weight,
first_stage_objectness_loss_weight,
crop_and_resize_fn,
initial_crop_size,
maxpool_kernel_size,
maxpool_stride,
......@@ -255,8 +254,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
hard_example_miner=None,
parallel_iterations=16,
add_summaries=True,
use_matmul_crop_and_resize=False,
clip_anchors_to_image=False):
clip_anchors_to_image=False,
use_static_shapes=False,
resize_masks=True):
"""FasterRCNNMetaArch Constructor.
Args:
......@@ -309,18 +309,22 @@ class FasterRCNNMetaArch(model.DetectionModel):
to the loss function for any given image within the image batch and is
only called "batch_size" due to terminology from the Faster R-CNN paper.
first_stage_sampler: Sampler to use for first stage loss (RPN loss).
first_stage_nms_score_threshold: Score threshold for non max suppression
for the Region Proposal Network (RPN). This value is expected to be in
[0, 1] as it is applied directly after a softmax transformation. The
recommended value for Faster R-CNN is 0.
first_stage_nms_iou_threshold: The Intersection Over Union (IOU) threshold
for performing Non-Max Suppression (NMS) on the boxes predicted by the
Region Proposal Network (RPN).
first_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression
callable that takes `boxes`, `scores` and optional `clip_window`(with
all other inputs already set) and returns a dictionary containing
tensors with keys: `detection_boxes`, `detection_scores`,
`detection_classes`, `num_detections`. This is used to perform non max
suppression on the boxes predicted by the Region Proposal Network
(RPN).
See `post_processing.batch_multiclass_non_max_suppression` for the type
and shape of these tensors.
first_stage_max_proposals: Maximum number of boxes to retain after
performing Non-Max Suppression (NMS) on the boxes predicted by the
Region Proposal Network (RPN).
first_stage_localization_loss_weight: A float
first_stage_objectness_loss_weight: A float
crop_and_resize_fn: A differentiable resampler to use for cropping RPN
proposal features.
initial_crop_size: A single integer indicating the output size
(width and height are set to be the same) of the initial bilinear
interpolation based cropping during ROI pooling.
......@@ -367,12 +371,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
in parallel for calls to tf.map_fn.
add_summaries: boolean (default: True) controlling whether summary ops
should be added to tensorflow graph.
use_matmul_crop_and_resize: Force the use of matrix multiplication based
crop and resize instead of standard tf.image.crop_and_resize while
computing second stage input feature maps.
clip_anchors_to_image: Normally, anchors generated for a given image size
are pruned during training if they lie outside the image window. This
option clips the anchors to be within the image instead of pruning.
are pruned during training if they lie outside the image window. This
option clips the anchors to be within the image instead of pruning.
use_static_shapes: If True, uses implementation of ops with static shape
guarantees.
resize_masks: Indicates whether the masks presend in the groundtruth
should be resized in the model with `image_resizer_fn`
Raises:
ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at
......@@ -384,9 +389,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
# in the future.
super(FasterRCNNMetaArch, self).__init__(num_classes=num_classes)
if is_training and second_stage_batch_size > first_stage_max_proposals:
raise ValueError('second_stage_batch_size should be no greater than '
'first_stage_max_proposals.')
if not isinstance(first_stage_anchor_generator,
grid_anchor_generator.GridAnchorGenerator):
raise ValueError('first_stage_anchor_generator must be of type '
......@@ -394,6 +396,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
self._is_training = is_training
self._image_resizer_fn = image_resizer_fn
self._resize_masks = resize_masks
self._feature_extractor = feature_extractor
self._number_of_stages = number_of_stages
......@@ -425,9 +428,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
min_depth=0,
max_depth=0))
self._first_stage_nms_score_threshold = first_stage_nms_score_threshold
self._first_stage_nms_iou_threshold = first_stage_nms_iou_threshold
self._first_stage_nms_fn = first_stage_non_max_suppression_fn
self._first_stage_max_proposals = first_stage_max_proposals
self._use_static_shapes = use_static_shapes
self._first_stage_localization_loss = (
losses.WeightedSmoothL1LocalizationLoss())
......@@ -437,6 +440,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
self._first_stage_obj_loss_weight = first_stage_objectness_loss_weight
# Per-region cropping parameters
self._crop_and_resize_fn = crop_and_resize_fn
self._initial_crop_size = initial_crop_size
self._maxpool_kernel_size = maxpool_kernel_size
self._maxpool_stride = maxpool_stride
......@@ -458,7 +462,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
self._second_stage_cls_loss_weight = second_stage_classification_loss_weight
self._second_stage_mask_loss_weight = (
second_stage_mask_prediction_loss_weight)
self._use_matmul_crop_and_resize = use_matmul_crop_and_resize
self._hard_example_miner = hard_example_miner
self._parallel_iterations = parallel_iterations
......@@ -673,9 +676,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
}
if self._number_of_stages >= 2:
# If mixed-precision training on TPU is enabled, rpn_box_encodings and
# rpn_objectness_predictions_with_background are bfloat16 tensors.
# Considered prediction results, they need to be casted to float32
# tensors for correct postprocess_rpn computation in predict_second_stage.
prediction_dict.update(self._predict_second_stage(
rpn_box_encodings,
rpn_objectness_predictions_with_background,
tf.to_float(rpn_box_encodings),
tf.to_float(rpn_objectness_predictions_with_background),
rpn_features_to_crop,
self._anchors.get(), image_shape, true_image_shapes))
......@@ -719,7 +726,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
[batch_size, num_valid_anchors, 2] containing class
predictions (logits) for each of the anchors. Note that this
tensor *includes* background class predictions (at class index 0).
rpn_features_to_crop: A 4-D float32 tensor with shape
rpn_features_to_crop: A 4-D float32 or bfloat16 tensor with shape
[batch_size, height, width, depth] representing image features to crop
using the proposal boxes predicted by the RPN.
anchors: 2-D float tensor of shape
......@@ -758,17 +765,22 @@ class FasterRCNNMetaArch(model.DetectionModel):
boxes proposed by the RPN, thus enabling one to extract features and
get box classification and prediction for externally selected areas
of the image.
6) box_classifier_features: a 4-D float32 tensor representing the
features for each proposal.
6) box_classifier_features: a 4-D float32 or bfloat16 tensor
representing the features for each proposal.
"""
image_shape_2d = self._image_batch_shape_2d(image_shape)
proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn(
rpn_box_encodings, rpn_objectness_predictions_with_background,
anchors, image_shape_2d, true_image_shapes)
# If mixed-precision training on TPU is enabled, the dtype of
# rpn_features_to_crop is bfloat16, otherwise it is float32. tf.cast is
# used to match the dtype of proposal_boxes_normalized to that of
# rpn_features_to_crop for further computation.
flattened_proposal_feature_maps = (
self._compute_second_stage_input_feature_maps(
rpn_features_to_crop, proposal_boxes_normalized))
rpn_features_to_crop,
tf.cast(proposal_boxes_normalized, rpn_features_to_crop.dtype)))
box_classifier_features = (
self._feature_extractor.extract_box_classifier_features(
......@@ -956,8 +968,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
image_shape: A 1-D tensor representing the input image shape.
"""
image_shape = tf.shape(preprocessed_inputs)
rpn_features_to_crop, _ = self._feature_extractor.extract_proposal_features(
preprocessed_inputs, scope=self.first_stage_feature_extractor_scope)
rpn_features_to_crop, self.endpoints = (
self._feature_extractor.extract_proposal_features(
preprocessed_inputs,
scope=self.first_stage_feature_extractor_scope))
feature_map_shape = tf.shape(rpn_features_to_crop)
anchors = box_list_ops.concatenate(
......@@ -965,12 +980,15 @@ class FasterRCNNMetaArch(model.DetectionModel):
feature_map_shape[2])]))
with slim.arg_scope(self._first_stage_box_predictor_arg_scope_fn()):
kernel_size = self._first_stage_box_predictor_kernel_size
reuse = tf.get_variable_scope().reuse
rpn_box_predictor_features = slim.conv2d(
rpn_features_to_crop,
self._first_stage_box_predictor_depth,
kernel_size=[kernel_size, kernel_size],
rate=self._first_stage_atrous_rate,
activation_fn=tf.nn.relu6)
activation_fn=tf.nn.relu6,
scope='Conv',
reuse=reuse)
return (rpn_box_predictor_features, rpn_features_to_crop,
anchors, image_shape)
......@@ -1223,14 +1241,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
rpn_objectness_predictions_with_background_batch)[:, :, 1]
clip_window = self._compute_clip_window(image_shapes)
(proposal_boxes, proposal_scores, _, _, _,
num_proposals) = post_processing.batch_multiclass_non_max_suppression(
num_proposals) = self._first_stage_nms_fn(
tf.expand_dims(proposal_boxes, axis=2),
tf.expand_dims(rpn_objectness_softmax_without_background,
axis=2),
self._first_stage_nms_score_threshold,
self._first_stage_nms_iou_threshold,
self._first_stage_max_proposals,
self._first_stage_max_proposals,
tf.expand_dims(rpn_objectness_softmax_without_background, axis=2),
clip_window=clip_window)
if self._is_training:
proposal_boxes = tf.stop_gradient(proposal_boxes)
......@@ -1377,16 +1390,19 @@ class FasterRCNNMetaArch(model.DetectionModel):
groundtruth_masks_list = self._groundtruth_lists.get(
fields.BoxListFields.masks)
if groundtruth_masks_list is not None:
# TODO(rathodv): Remove mask resizing once the legacy pipeline is deleted.
if groundtruth_masks_list is not None and self._resize_masks:
resized_masks_list = []
for mask in groundtruth_masks_list:
_, resized_mask, _ = self._image_resizer_fn(
# Reuse the given `image_resizer_fn` to resize groundtruth masks.
# `mask` tensor for an image is of the shape [num_masks,
# image_height, image_width]. Below we create a dummy image of the
# the shape [image_height, image_width, 1] to use with
# `image_resizer_fn`.
image=tf.zeros(tf.stack([tf.shape(mask)[1], tf.shape(mask)[2], 1])),
image=tf.zeros(tf.stack([tf.shape(mask)[1],
tf.shape(mask)[2], 1])),
masks=mask)
resized_masks_list.append(resized_mask)
......@@ -1443,11 +1459,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
tf.range(proposal_boxlist.num_boxes()) < num_valid_proposals,
cls_weights > 0
)
sampled_indices = self._second_stage_sampler.subsample(
selected_positions = self._second_stage_sampler.subsample(
valid_indicator,
self._second_stage_batch_size,
positive_indicator)
return box_list_ops.boolean_mask(proposal_boxlist, sampled_indices)
return box_list_ops.boolean_mask(
proposal_boxlist,
selected_positions,
use_static_shapes=self._use_static_shapes,
indicator_sum=(self._second_stage_batch_size
if self._use_static_shapes else None))
def _compute_second_stage_input_feature_maps(self, features_to_crop,
proposal_boxes_normalized):
......@@ -1467,35 +1488,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns:
A float32 tensor with shape [K, new_height, new_width, depth].
"""
def get_box_inds(proposals):
proposals_shape = proposals.get_shape().as_list()
if any(dim is None for dim in proposals_shape):
proposals_shape = tf.shape(proposals)
ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
multiplier = tf.expand_dims(
tf.range(start=0, limit=proposals_shape[0]), 1)
return tf.reshape(ones_mat * multiplier, [-1])
if self._use_matmul_crop_and_resize:
def _single_image_crop_and_resize(inputs):
single_image_features_to_crop, proposal_boxes_normalized = inputs
return ops.matmul_crop_and_resize(
tf.expand_dims(single_image_features_to_crop, 0),
proposal_boxes_normalized,
[self._initial_crop_size, self._initial_crop_size])
cropped_regions = self._flatten_first_two_dimensions(
shape_utils.static_or_dynamic_map_fn(
_single_image_crop_and_resize,
elems=[features_to_crop, proposal_boxes_normalized],
dtype=tf.float32,
parallel_iterations=self._parallel_iterations))
else:
cropped_regions = tf.image.crop_and_resize(
features_to_crop,
self._flatten_first_two_dimensions(proposal_boxes_normalized),
get_box_inds(proposal_boxes_normalized),
(self._initial_crop_size, self._initial_crop_size))
cropped_regions = self._flatten_first_two_dimensions(
self._crop_and_resize_fn(
features_to_crop, proposal_boxes_normalized,
[self._initial_crop_size, self._initial_crop_size]))
return slim.max_pool2d(
cropped_regions,
[self._maxpool_kernel_size, self._maxpool_kernel_size],
......@@ -1738,11 +1734,17 @@ class FasterRCNNMetaArch(model.DetectionModel):
sampled_reg_indices = tf.multiply(batch_sampled_indices,
batch_reg_weights)
losses_mask = None
if self.groundtruth_has_field(fields.InputDataFields.is_annotated):
losses_mask = tf.stack(self.groundtruth_lists(
fields.InputDataFields.is_annotated))
localization_losses = self._first_stage_localization_loss(
rpn_box_encodings, batch_reg_targets, weights=sampled_reg_indices)
rpn_box_encodings, batch_reg_targets, weights=sampled_reg_indices,
losses_mask=losses_mask)
objectness_losses = self._first_stage_objectness_loss(
rpn_objectness_predictions_with_background,
batch_one_hot_targets, weights=batch_sampled_indices)
batch_one_hot_targets, weights=batch_sampled_indices,
losses_mask=losses_mask)
localization_loss = tf.reduce_mean(
tf.reduce_sum(localization_losses, axis=1) / normalizer)
objectness_loss = tf.reduce_mean(
......@@ -1866,32 +1868,32 @@ class FasterRCNNMetaArch(model.DetectionModel):
# for just one class to avoid over-counting for regression loss and
# (optionally) mask loss.
else:
# We only predict refined location encodings for the non background
# classes, but we now pad it to make it compatible with the class
# predictions
refined_box_encodings_with_background = tf.pad(
refined_box_encodings, [[0, 0], [1, 0], [0, 0]])
refined_box_encodings_masked_by_class_targets = tf.boolean_mask(
refined_box_encodings_with_background,
tf.greater(one_hot_flat_cls_targets_with_background, 0))
reshaped_refined_box_encodings = tf.reshape(
refined_box_encodings_masked_by_class_targets,
[batch_size, self.max_num_proposals, self._box_coder.code_size])
reshaped_refined_box_encodings = (
self._get_refined_encodings_for_postitive_class(
refined_box_encodings,
one_hot_flat_cls_targets_with_background, batch_size))
losses_mask = None
if self.groundtruth_has_field(fields.InputDataFields.is_annotated):
losses_mask = tf.stack(self.groundtruth_lists(
fields.InputDataFields.is_annotated))
second_stage_loc_losses = self._second_stage_localization_loss(
reshaped_refined_box_encodings,
batch_reg_targets, weights=batch_reg_weights) / normalizer
batch_reg_targets,
weights=batch_reg_weights,
losses_mask=losses_mask) / normalizer
second_stage_cls_losses = ops.reduce_sum_trailing_dimensions(
self._second_stage_classification_loss(
class_predictions_with_background,
batch_cls_targets_with_background,
weights=batch_cls_weights),
weights=batch_cls_weights,
losses_mask=losses_mask),
ndims=2) / normalizer
second_stage_loc_loss = tf.reduce_sum(
tf.boolean_mask(second_stage_loc_losses, paddings_indicator))
second_stage_loc_losses * tf.to_float(paddings_indicator))
second_stage_cls_loss = tf.reduce_sum(
tf.boolean_mask(second_stage_cls_losses, paddings_indicator))
second_stage_cls_losses * tf.to_float(paddings_indicator))
if self._hard_example_miner:
(second_stage_loc_loss, second_stage_cls_loss
......@@ -1954,10 +1956,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
box_list.BoxList(tf.reshape(proposal_boxes, [-1, 4])),
image_shape[1], image_shape[2]).get()
flat_cropped_gt_mask = tf.image.crop_and_resize(
flat_cropped_gt_mask = self._crop_and_resize_fn(
tf.expand_dims(flat_gt_masks, -1),
flat_normalized_proposals,
tf.range(flat_normalized_proposals.shape[0].value),
tf.expand_dims(flat_normalized_proposals, axis=1),
[mask_height, mask_width])
batch_cropped_gt_mask = tf.reshape(
......@@ -1968,14 +1969,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
self._second_stage_mask_loss(
reshaped_prediction_masks,
batch_cropped_gt_mask,
weights=batch_mask_target_weights),
weights=batch_mask_target_weights,
losses_mask=losses_mask),
ndims=2) / (
mask_height * mask_width * tf.maximum(
tf.reduce_sum(
batch_mask_target_weights, axis=1, keep_dims=True
), tf.ones((batch_size, 1))))
second_stage_mask_loss = tf.reduce_sum(
tf.boolean_mask(second_stage_mask_losses, paddings_indicator))
tf.where(paddings_indicator, second_stage_mask_losses,
tf.zeros_like(second_stage_mask_losses)))
if second_stage_mask_loss is not None:
mask_loss = tf.multiply(self._second_stage_mask_loss_weight,
......@@ -1983,6 +1986,29 @@ class FasterRCNNMetaArch(model.DetectionModel):
loss_dict[mask_loss.op.name] = mask_loss
return loss_dict
def _get_refined_encodings_for_postitive_class(
self, refined_box_encodings, flat_cls_targets_with_background,
batch_size):
# We only predict refined location encodings for the non background
# classes, but we now pad it to make it compatible with the class
# predictions
refined_box_encodings_with_background = tf.pad(refined_box_encodings,
[[0, 0], [1, 0], [0, 0]])
refined_box_encodings_masked_by_class_targets = (
box_list_ops.boolean_mask(
box_list.BoxList(
tf.reshape(refined_box_encodings_with_background,
[-1, self._box_coder.code_size])),
tf.reshape(tf.greater(flat_cls_targets_with_background, 0), [-1]),
use_static_shapes=self._use_static_shapes,
indicator_sum=batch_size * self.max_num_proposals
if self._use_static_shapes else None).get())
return tf.reshape(
refined_box_encodings_masked_by_class_targets, [
batch_size, self.max_num_proposals,
self._box_coder.code_size
])
def _padded_batched_proposals_indicator(self,
num_proposals,
max_num_proposals):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment