Commit 27b4acd4 authored by Aman Gupta's avatar Aman Gupta
Browse files

Merge remote-tracking branch 'upstream/master'

parents 5133522f d4e1f97f
...@@ -93,8 +93,7 @@ class TargetAssigner(object): ...@@ -93,8 +93,7 @@ class TargetAssigner(object):
groundtruth_boxes, groundtruth_boxes,
groundtruth_labels=None, groundtruth_labels=None,
unmatched_class_label=None, unmatched_class_label=None,
groundtruth_weights=None, groundtruth_weights=None):
**params):
"""Assign classification and regression targets to each anchor. """Assign classification and regression targets to each anchor.
For a given set of anchors and groundtruth detections, match anchors For a given set of anchors and groundtruth detections, match anchors
...@@ -121,9 +120,11 @@ class TargetAssigner(object): ...@@ -121,9 +120,11 @@ class TargetAssigner(object):
If set to None, unmatched_cls_target is set to be [0] for each anchor. If set to None, unmatched_cls_target is set to be [0] for each anchor.
groundtruth_weights: a float tensor of shape [M] indicating the weight to groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box. The weights assign to all anchors match to a particular groundtruth box. The weights
must be in [0., 1.]. If None, all weights are set to 1. must be in [0., 1.]. If None, all weights are set to 1. Generally no
**params: Additional keyword arguments for specific implementations of groundtruth boxes with zero weight match to any anchors as matchers are
the Matcher. aware of groundtruth weights. Additionally, `cls_weights` and
`reg_weights` are calculated using groundtruth weights as an added
safety.
Returns: Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
...@@ -177,7 +178,8 @@ class TargetAssigner(object): ...@@ -177,7 +178,8 @@ class TargetAssigner(object):
[unmatched_shape_assert, labels_and_box_shapes_assert]): [unmatched_shape_assert, labels_and_box_shapes_assert]):
match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes, match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
anchors) anchors)
match = self._matcher.match(match_quality_matrix, **params) match = self._matcher.match(match_quality_matrix,
valid_rows=tf.greater(groundtruth_weights, 0))
reg_targets = self._create_regression_targets(anchors, reg_targets = self._create_regression_targets(anchors,
groundtruth_boxes, groundtruth_boxes,
match) match)
......
...@@ -495,8 +495,7 @@ class TargetAssignerTest(test_case.TestCase): ...@@ -495,8 +495,7 @@ class TargetAssignerTest(test_case.TestCase):
priors, priors,
boxes, boxes,
groundtruth_labels, groundtruth_labels,
unmatched_class_label=unmatched_class_label, unmatched_class_label=unmatched_class_label)
num_valid_rows=3)
def test_raises_error_on_invalid_groundtruth_labels(self): def test_raises_error_on_invalid_groundtruth_labels(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity() similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
...@@ -520,8 +519,7 @@ class TargetAssignerTest(test_case.TestCase): ...@@ -520,8 +519,7 @@ class TargetAssignerTest(test_case.TestCase):
priors, priors,
boxes, boxes,
groundtruth_labels, groundtruth_labels,
unmatched_class_label=unmatched_class_label, unmatched_class_label=unmatched_class_label)
num_valid_rows=3)
class BatchTargetAssignerTest(test_case.TestCase): class BatchTargetAssignerTest(test_case.TestCase):
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -19,9 +19,6 @@ protos for object detection. ...@@ -19,9 +19,6 @@ protos for object detection.
""" """
import tensorflow as tf import tensorflow as tf
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from object_detection.core import data_decoder from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.protos import input_reader_pb2 from object_detection.protos import input_reader_pb2
...@@ -30,14 +27,12 @@ from object_detection.utils import label_map_util ...@@ -30,14 +27,12 @@ from object_detection.utils import label_map_util
slim_example_decoder = tf.contrib.slim.tfexample_decoder slim_example_decoder = tf.contrib.slim.tfexample_decoder
# TODO(lzc): keep LookupTensor and BackupHandler in sync with class _ClassTensorHandler(slim_example_decoder.Tensor):
# tf.contrib.slim.tfexample_decoder version. """An ItemHandler to fetch class ids from class text."""
class LookupTensor(slim_example_decoder.Tensor):
"""An ItemHandler that returns a parsed Tensor, the result of a lookup."""
def __init__(self, def __init__(self,
tensor_key, tensor_key,
table, label_map_proto_file,
shape_keys=None, shape_keys=None,
shape=None, shape=None,
default_value=''): default_value=''):
...@@ -47,7 +42,8 @@ class LookupTensor(slim_example_decoder.Tensor): ...@@ -47,7 +42,8 @@ class LookupTensor(slim_example_decoder.Tensor):
Args: Args:
tensor_key: the name of the `TFExample` feature to read the tensor from. tensor_key: the name of the `TFExample` feature to read the tensor from.
table: A tf.lookup table. label_map_proto_file: File path to a text format LabelMapProto message
mapping class text to id.
shape_keys: Optional name or list of names of the TF-Example feature in shape_keys: Optional name or list of names of the TF-Example feature in
which the tensor shape is stored. If a list, then each corresponds to which the tensor shape is stored. If a list, then each corresponds to
one dimension of the shape. one dimension of the shape.
...@@ -59,16 +55,39 @@ class LookupTensor(slim_example_decoder.Tensor): ...@@ -59,16 +55,39 @@ class LookupTensor(slim_example_decoder.Tensor):
Raises: Raises:
ValueError: if both `shape_keys` and `shape` are specified. ValueError: if both `shape_keys` and `shape` are specified.
""" """
self._table = table name_to_id = label_map_util.get_label_map_dict(
super(LookupTensor, self).__init__(tensor_key, shape_keys, shape, label_map_proto_file, use_display_name=False)
default_value) # We use a default_value of -1, but we expect all labels to be contained
# in the label map.
name_to_id_table = tf.contrib.lookup.HashTable(
initializer=tf.contrib.lookup.KeyValueTensorInitializer(
keys=tf.constant(list(name_to_id.keys())),
values=tf.constant(list(name_to_id.values()), dtype=tf.int64)),
default_value=-1)
display_name_to_id = label_map_util.get_label_map_dict(
label_map_proto_file, use_display_name=True)
# We use a default_value of -1, but we expect all labels to be contained
# in the label map.
display_name_to_id_table = tf.contrib.lookup.HashTable(
initializer=tf.contrib.lookup.KeyValueTensorInitializer(
keys=tf.constant(list(display_name_to_id.keys())),
values=tf.constant(
list(display_name_to_id.values()), dtype=tf.int64)),
default_value=-1)
self._name_to_id_table = name_to_id_table
self._display_name_to_id_table = display_name_to_id_table
super(_ClassTensorHandler, self).__init__(tensor_key, shape_keys, shape,
default_value)
def tensors_to_item(self, keys_to_tensors): def tensors_to_item(self, keys_to_tensors):
unmapped_tensor = super(LookupTensor, self).tensors_to_item(keys_to_tensors) unmapped_tensor = super(_ClassTensorHandler,
return self._table.lookup(unmapped_tensor) self).tensors_to_item(keys_to_tensors)
return tf.maximum(self._name_to_id_table.lookup(unmapped_tensor),
self._display_name_to_id_table.lookup(unmapped_tensor))
class BackupHandler(slim_example_decoder.ItemHandler): class _BackupHandler(slim_example_decoder.ItemHandler):
"""An ItemHandler that tries two ItemHandlers in order.""" """An ItemHandler that tries two ItemHandlers in order."""
def __init__(self, handler, backup): def __init__(self, handler, backup):
...@@ -92,12 +111,12 @@ class BackupHandler(slim_example_decoder.ItemHandler): ...@@ -92,12 +111,12 @@ class BackupHandler(slim_example_decoder.ItemHandler):
'Backup handler is of type %s instead of ItemHandler' % type(backup)) 'Backup handler is of type %s instead of ItemHandler' % type(backup))
self._handler = handler self._handler = handler
self._backup = backup self._backup = backup
super(BackupHandler, self).__init__(handler.keys + backup.keys) super(_BackupHandler, self).__init__(handler.keys + backup.keys)
def tensors_to_item(self, keys_to_tensors): def tensors_to_item(self, keys_to_tensors):
item = self._handler.tensors_to_item(keys_to_tensors) item = self._handler.tensors_to_item(keys_to_tensors)
return control_flow_ops.cond( return tf.cond(
pred=math_ops.equal(math_ops.reduce_prod(array_ops.shape(item)), 0), pred=tf.equal(tf.reduce_prod(tf.shape(item)), 0),
true_fn=lambda: self._backup.tensors_to_item(keys_to_tensors), true_fn=lambda: self._backup.tensors_to_item(keys_to_tensors),
false_fn=lambda: item) false_fn=lambda: item)
...@@ -140,6 +159,9 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -140,6 +159,9 @@ class TfExampleDecoder(data_decoder.DataDecoder):
input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
input_reader_pb2.PNG_MASKS. input_reader_pb2.PNG_MASKS.
""" """
# TODO(rathodv): delete unused `use_display_name` argument once we change
# other decoders to handle label maps similarly.
del use_display_name
self.keys_to_features = { self.keys_to_features = {
'image/encoded': 'image/encoded':
tf.FixedLenFeature((), tf.string, default_value=''), tf.FixedLenFeature((), tf.string, default_value=''),
...@@ -267,27 +289,18 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -267,27 +289,18 @@ class TfExampleDecoder(data_decoder.DataDecoder):
else: else:
raise ValueError('Did not recognize the `instance_mask_type` option.') raise ValueError('Did not recognize the `instance_mask_type` option.')
if label_map_proto_file: if label_map_proto_file:
label_map = label_map_util.get_label_map_dict(label_map_proto_file,
use_display_name)
# We use a default_value of -1, but we expect all labels to be contained
# in the label map.
table = tf.contrib.lookup.HashTable(
initializer=tf.contrib.lookup.KeyValueTensorInitializer(
keys=tf.constant(list(label_map.keys())),
values=tf.constant(list(label_map.values()), dtype=tf.int64)),
default_value=-1)
# If the label_map_proto is provided, try to use it in conjunction with # If the label_map_proto is provided, try to use it in conjunction with
# the class text, and fall back to a materialized ID. # the class text, and fall back to a materialized ID.
# TODO(lzc): note that here we are using BackupHandler defined in this label_handler = _BackupHandler(
# file(which is branching slim_example_decoder.BackupHandler). Need to _ClassTensorHandler(
# switch back to slim_example_decoder.BackupHandler once tf 1.5 becomes 'image/object/class/text', label_map_proto_file,
# more popular. default_value=''),
label_handler = BackupHandler(
LookupTensor('image/object/class/text', table, default_value=''),
slim_example_decoder.Tensor('image/object/class/label')) slim_example_decoder.Tensor('image/object/class/label'))
image_label_handler = BackupHandler( image_label_handler = _BackupHandler(
LookupTensor( _ClassTensorHandler(
fields.TfExampleFields.image_class_text, table, default_value=''), fields.TfExampleFields.image_class_text,
label_map_proto_file,
default_value=''),
slim_example_decoder.Tensor(fields.TfExampleFields.image_class_label)) slim_example_decoder.Tensor(fields.TfExampleFields.image_class_label))
else: else:
label_handler = slim_example_decoder.Tensor('image/object/class/label') label_handler = slim_example_decoder.Tensor('image/object/class/label')
...@@ -309,6 +322,8 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -309,6 +322,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
A dictionary of the following tensors. A dictionary of the following tensors.
fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3] fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
containing image. containing image.
fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
shape [2] containing shape of the image.
fields.InputDataFields.source_id - string tensor containing original fields.InputDataFields.source_id - string tensor containing original
image id. image id.
fields.InputDataFields.key - string tensor with unique sha256 hash key. fields.InputDataFields.key - string tensor with unique sha256 hash key.
...@@ -352,6 +367,8 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -352,6 +367,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
is_crowd = fields.InputDataFields.groundtruth_is_crowd is_crowd = fields.InputDataFields.groundtruth_is_crowd
tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool) tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3]) tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape(
tensor_dict[fields.InputDataFields.image])[:2]
tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape( tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
tensor_dict[fields.InputDataFields.groundtruth_boxes])[0] tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
......
...@@ -12,24 +12,17 @@ ...@@ -12,24 +12,17 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Tests for object_detection.data_decoders.tf_example_decoder.""" """Tests for object_detection.data_decoders.tf_example_decoder."""
import os import os
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from tensorflow.core.example import example_pb2
from tensorflow.core.example import feature_pb2
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import test_util from tensorflow.python.framework import test_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import lookup_ops
from tensorflow.python.ops import parsing_ops
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.data_decoders import tf_example_decoder from object_detection.data_decoders import tf_example_decoder
from object_detection.protos import input_reader_pb2 from object_detection.protos import input_reader_pb2
from object_detection.utils import dataset_util
slim_example_decoder = tf.contrib.slim.tfexample_decoder slim_example_decoder = tf.contrib.slim.tfexample_decoder
...@@ -56,25 +49,6 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -56,25 +49,6 @@ class TfExampleDecoderTest(tf.test.TestCase):
raise ValueError('Invalid encoding type.') raise ValueError('Invalid encoding type.')
return image_decoded return image_decoded
def _Int64Feature(self, value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def _FloatFeature(self, value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def _BytesFeature(self, value):
if isinstance(value, list):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _Int64FeatureFromList(self, ndarray):
return feature_pb2.Feature(
int64_list=feature_pb2.Int64List(value=ndarray.flatten().tolist()))
def _BytesFeatureFromList(self, ndarray):
values = ndarray.flatten().tolist()
return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=values))
def testDecodeAdditionalChannels(self): def testDecodeAdditionalChannels(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
...@@ -88,14 +62,14 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -88,14 +62,14 @@ class TfExampleDecoderTest(tf.test.TestCase):
features=tf.train.Features( features=tf.train.Features(
feature={ feature={
'image/encoded': 'image/encoded':
self._BytesFeature(encoded_jpeg), dataset_util.bytes_feature(encoded_jpeg),
'image/additional_channels/encoded': 'image/additional_channels/encoded':
self._BytesFeatureFromList( dataset_util.bytes_list_feature(
np.array([encoded_additional_channel] * 2)), [encoded_additional_channel] * 2),
'image/format': 'image/format':
self._BytesFeature('jpeg'), dataset_util.bytes_feature('jpeg'),
'image/source_id': 'image/source_id':
self._BytesFeature('image_id'), dataset_util.bytes_feature('image_id'),
})).SerializeToString() })).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder( example_decoder = tf_example_decoder.TfExampleDecoder(
...@@ -108,118 +82,44 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -108,118 +82,44 @@ class TfExampleDecoderTest(tf.test.TestCase):
np.concatenate([decoded_additional_channel] * 2, axis=2), np.concatenate([decoded_additional_channel] * 2, axis=2),
tensor_dict[fields.InputDataFields.image_additional_channels]) tensor_dict[fields.InputDataFields.image_additional_channels])
def testDecodeExampleWithBranchedBackupHandler(self):
example1 = example_pb2.Example(
features=feature_pb2.Features(
feature={
'image/object/class/text':
self._BytesFeatureFromList(
np.array(['cat', 'dog', 'guinea pig'])),
'image/object/class/label':
self._Int64FeatureFromList(np.array([42, 10, 900]))
}))
example2 = example_pb2.Example(
features=feature_pb2.Features(
feature={
'image/object/class/text':
self._BytesFeatureFromList(
np.array(['cat', 'dog', 'guinea pig'])),
}))
example3 = example_pb2.Example(
features=feature_pb2.Features(
feature={
'image/object/class/label':
self._Int64FeatureFromList(np.array([42, 10, 901]))
}))
# 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
table = lookup_ops.index_table_from_tensor(
constant_op.constant(['dog', 'guinea pig', 'cat']))
keys_to_features = {
'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string),
'image/object/class/label': parsing_ops.VarLenFeature(dtypes.int64),
}
backup_handler = tf_example_decoder.BackupHandler(
handler=slim_example_decoder.Tensor('image/object/class/label'),
backup=tf_example_decoder.LookupTensor('image/object/class/text',
table))
items_to_handlers = {
'labels': backup_handler,
}
decoder = slim_example_decoder.TFExampleDecoder(keys_to_features,
items_to_handlers)
obtained_class_ids_each_example = []
with self.test_session() as sess:
sess.run(lookup_ops.tables_initializer())
for example in [example1, example2, example3]:
serialized_example = array_ops.reshape(
example.SerializeToString(), shape=[])
obtained_class_ids_each_example.append(
decoder.decode(serialized_example)[0].eval())
self.assertAllClose([42, 10, 900], obtained_class_ids_each_example[0])
self.assertAllClose([2, 0, 1], obtained_class_ids_each_example[1])
self.assertAllClose([42, 10, 901], obtained_class_ids_each_example[2])
def testDecodeExampleWithBranchedLookup(self):
example = example_pb2.Example(features=feature_pb2.Features(feature={
'image/object/class/text': self._BytesFeatureFromList(
np.array(['cat', 'dog', 'guinea pig'])),
}))
serialized_example = example.SerializeToString()
# 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
table = lookup_ops.index_table_from_tensor(
constant_op.constant(['dog', 'guinea pig', 'cat']))
with self.test_session() as sess:
sess.run(lookup_ops.tables_initializer())
serialized_example = array_ops.reshape(serialized_example, shape=[])
keys_to_features = {
'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string),
}
items_to_handlers = {
'labels':
tf_example_decoder.LookupTensor('image/object/class/text', table),
}
decoder = slim_example_decoder.TFExampleDecoder(keys_to_features,
items_to_handlers)
obtained_class_ids = decoder.decode(serialized_example)[0].eval()
self.assertAllClose([2, 0, 1], obtained_class_ids)
def testDecodeJpegImage(self): def testDecodeJpegImage(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
decoded_jpeg = self._DecodeImage(encoded_jpeg) decoded_jpeg = self._DecodeImage(encoded_jpeg)
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(
'image/encoded': self._BytesFeature(encoded_jpeg), features=tf.train.Features(
'image/format': self._BytesFeature('jpeg'), feature={
'image/source_id': self._BytesFeature('image_id'), 'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
})).SerializeToString() 'image/format': dataset_util.bytes_feature('jpeg'),
'image/source_id': dataset_util.bytes_feature('image_id'),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder() example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.image]. self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
get_shape().as_list()), [None, None, 3]) get_shape().as_list()), [None, None, 3])
self.assertAllEqual((tensor_dict[fields.InputDataFields.
original_image_spatial_shape].
get_shape().as_list()), [2])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image]) self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
original_image_spatial_shape])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id]) self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodeImageKeyAndFilename(self): def testDecodeImageKeyAndFilename(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(
'image/encoded': self._BytesFeature(encoded_jpeg), features=tf.train.Features(
'image/key/sha256': self._BytesFeature('abc'), feature={
'image/filename': self._BytesFeature('filename') 'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
})).SerializeToString() 'image/key/sha256': dataset_util.bytes_feature('abc'),
'image/filename': dataset_util.bytes_feature('filename')
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder() example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
...@@ -234,21 +134,28 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -234,21 +134,28 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_png = self._EncodeImage(image_tensor, encoding_type='png') encoded_png = self._EncodeImage(image_tensor, encoding_type='png')
decoded_png = self._DecodeImage(encoded_png, encoding_type='png') decoded_png = self._DecodeImage(encoded_png, encoding_type='png')
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(
'image/encoded': self._BytesFeature(encoded_png), features=tf.train.Features(
'image/format': self._BytesFeature('png'), feature={
'image/source_id': self._BytesFeature('image_id') 'image/encoded': dataset_util.bytes_feature(encoded_png),
})).SerializeToString() 'image/format': dataset_util.bytes_feature('png'),
'image/source_id': dataset_util.bytes_feature('image_id')
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder() example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.image]. self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
get_shape().as_list()), [None, None, 3]) get_shape().as_list()), [None, None, 3])
self.assertAllEqual((tensor_dict[fields.InputDataFields.
original_image_spatial_shape].
get_shape().as_list()), [2])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image]) self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
original_image_spatial_shape])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id]) self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodePngInstanceMasks(self): def testDecodePngInstanceMasks(self):
...@@ -265,9 +172,12 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -265,9 +172,12 @@ class TfExampleDecoderTest(tf.test.TestCase):
example = tf.train.Example( example = tf.train.Example(
features=tf.train.Features( features=tf.train.Features(
feature={ feature={
'image/encoded': self._BytesFeature(encoded_jpeg), 'image/encoded':
'image/format': self._BytesFeature('jpeg'), dataset_util.bytes_feature(encoded_jpeg),
'image/object/mask': self._BytesFeature(encoded_masks) 'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/mask':
dataset_util.bytes_list_feature(encoded_masks)
})).SerializeToString() })).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder( example_decoder = tf_example_decoder.TfExampleDecoder(
...@@ -288,11 +198,16 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -288,11 +198,16 @@ class TfExampleDecoderTest(tf.test.TestCase):
example = tf.train.Example( example = tf.train.Example(
features=tf.train.Features( features=tf.train.Features(
feature={ feature={
'image/encoded': self._BytesFeature(encoded_jpeg), 'image/encoded':
'image/format': self._BytesFeature('jpeg'), dataset_util.bytes_feature(encoded_jpeg),
'image/object/mask': self._BytesFeature(encoded_masks), 'image/format':
'image/height': self._Int64Feature([10]), dataset_util.bytes_feature('jpeg'),
'image/width': self._Int64Feature([10]), 'image/object/mask':
dataset_util.bytes_list_feature(encoded_masks),
'image/height':
dataset_util.int64_feature(10),
'image/width':
dataset_util.int64_feature(10),
})).SerializeToString() })).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder( example_decoder = tf_example_decoder.TfExampleDecoder(
...@@ -312,25 +227,33 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -312,25 +227,33 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_xmins = [1.0, 5.0] bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0] bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0] bbox_xmaxs = [3.0, 7.0]
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(
'image/encoded': self._BytesFeature(encoded_jpeg), features=tf.train.Features(
'image/format': self._BytesFeature('jpeg'), feature={
'image/object/bbox/ymin': self._FloatFeature(bbox_ymins), 'image/encoded':
'image/object/bbox/xmin': self._FloatFeature(bbox_xmins), dataset_util.bytes_feature(encoded_jpeg),
'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs), 'image/format':
'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs), dataset_util.bytes_feature('jpeg'),
})).SerializeToString() 'image/object/bbox/ymin':
dataset_util.float_list_feature(bbox_ymins),
'image/object/bbox/xmin':
dataset_util.float_list_feature(bbox_xmins),
'image/object/bbox/ymax':
dataset_util.float_list_feature(bbox_ymaxs),
'image/object/bbox/xmax':
dataset_util.float_list_feature(bbox_xmaxs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder() example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]. self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
get_shape().as_list()), [None, 4]) .get_shape().as_list()), [None, 4])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
expected_boxes = np.vstack([bbox_ymins, bbox_xmins, expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs,
bbox_ymaxs, bbox_xmaxs]).transpose() bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes, self.assertAllEqual(expected_boxes,
tensor_dict[fields.InputDataFields.groundtruth_boxes]) tensor_dict[fields.InputDataFields.groundtruth_boxes])
self.assertAllEqual( self.assertAllEqual(
...@@ -346,30 +269,40 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -346,30 +269,40 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_xmaxs = [3.0, 7.0] bbox_xmaxs = [3.0, 7.0]
keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(
'image/encoded': self._BytesFeature(encoded_jpeg), features=tf.train.Features(
'image/format': self._BytesFeature('jpeg'), feature={
'image/object/bbox/ymin': self._FloatFeature(bbox_ymins), 'image/encoded':
'image/object/bbox/xmin': self._FloatFeature(bbox_xmins), dataset_util.bytes_feature(encoded_jpeg),
'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs), 'image/format':
'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs), dataset_util.bytes_feature('jpeg'),
'image/object/keypoint/y': self._FloatFeature(keypoint_ys), 'image/object/bbox/ymin':
'image/object/keypoint/x': self._FloatFeature(keypoint_xs), dataset_util.float_list_feature(bbox_ymins),
})).SerializeToString() 'image/object/bbox/xmin':
dataset_util.float_list_feature(bbox_xmins),
'image/object/bbox/ymax':
dataset_util.float_list_feature(bbox_ymaxs),
'image/object/bbox/xmax':
dataset_util.float_list_feature(bbox_xmaxs),
'image/object/keypoint/y':
dataset_util.float_list_feature(keypoint_ys),
'image/object/keypoint/x':
dataset_util.float_list_feature(keypoint_xs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3) example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]. self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
get_shape().as_list()), [None, 4]) .get_shape().as_list()), [None, 4])
self.assertAllEqual((tensor_dict[fields.InputDataFields. self.assertAllEqual(
groundtruth_keypoints]. (tensor_dict[fields.InputDataFields.groundtruth_keypoints].get_shape()
get_shape().as_list()), [2, 3, 2]) .as_list()), [2, 3, 2])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
expected_boxes = np.vstack([bbox_ymins, bbox_xmins, expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs,
bbox_ymaxs, bbox_xmaxs]).transpose() bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes, self.assertAllEqual(expected_boxes,
tensor_dict[fields.InputDataFields.groundtruth_boxes]) tensor_dict[fields.InputDataFields.groundtruth_boxes])
self.assertAllEqual( self.assertAllEqual(
...@@ -377,9 +310,9 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -377,9 +310,9 @@ class TfExampleDecoderTest(tf.test.TestCase):
expected_keypoints = ( expected_keypoints = (
np.vstack([keypoint_ys, keypoint_xs]).transpose().reshape((2, 3, 2))) np.vstack([keypoint_ys, keypoint_xs]).transpose().reshape((2, 3, 2)))
self.assertAllEqual(expected_keypoints, self.assertAllEqual(
tensor_dict[ expected_keypoints,
fields.InputDataFields.groundtruth_keypoints]) tensor_dict[fields.InputDataFields.groundtruth_keypoints])
def testDecodeDefaultGroundtruthWeights(self): def testDecodeDefaultGroundtruthWeights(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
...@@ -388,20 +321,28 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -388,20 +321,28 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_xmins = [1.0, 5.0] bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0] bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0] bbox_xmaxs = [3.0, 7.0]
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(
'image/encoded': self._BytesFeature(encoded_jpeg), features=tf.train.Features(
'image/format': self._BytesFeature('jpeg'), feature={
'image/object/bbox/ymin': self._FloatFeature(bbox_ymins), 'image/encoded':
'image/object/bbox/xmin': self._FloatFeature(bbox_xmins), dataset_util.bytes_feature(encoded_jpeg),
'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs), 'image/format':
'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs), dataset_util.bytes_feature('jpeg'),
})).SerializeToString() 'image/object/bbox/ymin':
dataset_util.float_list_feature(bbox_ymins),
'image/object/bbox/xmin':
dataset_util.float_list_feature(bbox_xmins),
'image/object/bbox/ymax':
dataset_util.float_list_feature(bbox_ymaxs),
'image/object/bbox/xmax':
dataset_util.float_list_feature(bbox_xmaxs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder() example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]. self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
get_shape().as_list()), [None, 4]) .get_shape().as_list()), [None, 4])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
...@@ -414,18 +355,22 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -414,18 +355,22 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes = [0, 1] bbox_classes = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(
'image/encoded': self._BytesFeature(encoded_jpeg), features=tf.train.Features(
'image/format': self._BytesFeature('jpeg'), feature={
'image/object/class/label': self._Int64Feature(bbox_classes), 'image/encoded':
})).SerializeToString() dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/class/label':
dataset_util.int64_list_feature(bbox_classes),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder() example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[ self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
fields.InputDataFields.groundtruth_classes].get_shape().as_list()), .get_shape().as_list()), [2])
[2])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
...@@ -437,11 +382,16 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -437,11 +382,16 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes = [1, 2] bbox_classes = [1, 2]
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(
'image/encoded': self._BytesFeature(encoded_jpeg), features=tf.train.Features(
'image/format': self._BytesFeature('jpeg'), feature={
'image/object/class/label': self._Int64Feature(bbox_classes), 'image/encoded':
})).SerializeToString() dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/class/label':
dataset_util.int64_list_feature(bbox_classes),
})).SerializeToString()
label_map_string = """ label_map_string = """
item { item {
id:1 id:1
...@@ -460,9 +410,8 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -460,9 +410,8 @@ class TfExampleDecoderTest(tf.test.TestCase):
label_map_proto_file=label_map_path) label_map_proto_file=label_map_path)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[ self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
fields.InputDataFields.groundtruth_classes].get_shape().as_list()), .get_shape().as_list()), [None])
[None])
init = tf.tables_initializer() init = tf.tables_initializer()
with self.test_session() as sess: with self.test_session() as sess:
...@@ -480,11 +429,11 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -480,11 +429,11 @@ class TfExampleDecoderTest(tf.test.TestCase):
features=tf.train.Features( features=tf.train.Features(
feature={ feature={
'image/encoded': 'image/encoded':
self._BytesFeature(encoded_jpeg), dataset_util.bytes_feature(encoded_jpeg),
'image/format': 'image/format':
self._BytesFeature('jpeg'), dataset_util.bytes_feature('jpeg'),
'image/object/class/text': 'image/object/class/text':
self._BytesFeature(bbox_classes_text), dataset_util.bytes_list_feature(bbox_classes_text),
})).SerializeToString() })).SerializeToString()
label_map_string = """ label_map_string = """
...@@ -514,7 +463,7 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -514,7 +463,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual([2, -1], self.assertAllEqual([2, -1],
tensor_dict[fields.InputDataFields.groundtruth_classes]) tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectLabelWithMapping(self): def testDecodeObjectLabelWithMappingWithDisplayName(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes_text = ['cat', 'dog'] bbox_classes_text = ['cat', 'dog']
...@@ -522,11 +471,53 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -522,11 +471,53 @@ class TfExampleDecoderTest(tf.test.TestCase):
features=tf.train.Features( features=tf.train.Features(
feature={ feature={
'image/encoded': 'image/encoded':
self._BytesFeature(encoded_jpeg), dataset_util.bytes_feature(encoded_jpeg),
'image/format': 'image/format':
self._BytesFeature('jpeg'), dataset_util.bytes_feature('jpeg'),
'image/object/class/text': 'image/object/class/text':
self._BytesFeature(bbox_classes_text), dataset_util.bytes_list_feature(bbox_classes_text),
})).SerializeToString()
label_map_string = """
item {
id:3
display_name:'cat'
}
item {
id:1
display_name:'dog'
}
"""
label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
with tf.gfile.Open(label_map_path, 'wb') as f:
f.write(label_map_string)
example_decoder = tf_example_decoder.TfExampleDecoder(
label_map_proto_file=label_map_path)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
.get_shape().as_list()), [None])
with self.test_session() as sess:
sess.run(tf.tables_initializer())
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([3, 1],
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectLabelWithMappingWithName(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes_text = ['cat', 'dog']
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/class/text':
dataset_util.bytes_list_feature(bbox_classes_text),
})).SerializeToString() })).SerializeToString()
label_map_string = """ label_map_string = """
...@@ -561,17 +552,22 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -561,17 +552,22 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
object_area = [100., 174.] object_area = [100., 174.]
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(
'image/encoded': self._BytesFeature(encoded_jpeg), features=tf.train.Features(
'image/format': self._BytesFeature('jpeg'), feature={
'image/object/area': self._FloatFeature(object_area), 'image/encoded':
})).SerializeToString() dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/area':
dataset_util.float_list_feature(object_area),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder() example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area]. self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area]
get_shape().as_list()), [2]) .get_shape().as_list()), [2])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
...@@ -583,67 +579,81 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -583,67 +579,81 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
object_is_crowd = [0, 1] object_is_crowd = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(
'image/encoded': self._BytesFeature(encoded_jpeg), features=tf.train.Features(
'image/format': self._BytesFeature('jpeg'), feature={
'image/object/is_crowd': self._Int64Feature(object_is_crowd), 'image/encoded':
})).SerializeToString() dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/is_crowd':
dataset_util.int64_list_feature(object_is_crowd),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder() example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[ self.assertAllEqual(
fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()), (tensor_dict[fields.InputDataFields.groundtruth_is_crowd].get_shape()
[2]) .as_list()), [2])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([bool(item) for item in object_is_crowd], self.assertAllEqual(
tensor_dict[ [bool(item) for item in object_is_crowd],
fields.InputDataFields.groundtruth_is_crowd]) tensor_dict[fields.InputDataFields.groundtruth_is_crowd])
@test_util.enable_c_shapes @test_util.enable_c_shapes
def testDecodeObjectDifficult(self): def testDecodeObjectDifficult(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
object_difficult = [0, 1] object_difficult = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(
'image/encoded': self._BytesFeature(encoded_jpeg), features=tf.train.Features(
'image/format': self._BytesFeature('jpeg'), feature={
'image/object/difficult': self._Int64Feature(object_difficult), 'image/encoded':
})).SerializeToString() dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/difficult':
dataset_util.int64_list_feature(object_difficult),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder() example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[ self.assertAllEqual(
fields.InputDataFields.groundtruth_difficult].get_shape().as_list()), (tensor_dict[fields.InputDataFields.groundtruth_difficult].get_shape()
[2]) .as_list()), [2])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([bool(item) for item in object_difficult], self.assertAllEqual(
tensor_dict[ [bool(item) for item in object_difficult],
fields.InputDataFields.groundtruth_difficult]) tensor_dict[fields.InputDataFields.groundtruth_difficult])
@test_util.enable_c_shapes @test_util.enable_c_shapes
def testDecodeObjectGroupOf(self): def testDecodeObjectGroupOf(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
object_group_of = [0, 1] object_group_of = [0, 1]
example = tf.train.Example(features=tf.train.Features( example = tf.train.Example(
feature={ features=tf.train.Features(
'image/encoded': self._BytesFeature(encoded_jpeg), feature={
'image/format': self._BytesFeature('jpeg'), 'image/encoded':
'image/object/group_of': self._Int64Feature(object_group_of), dataset_util.bytes_feature(encoded_jpeg),
})).SerializeToString() 'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/group_of':
dataset_util.int64_list_feature(object_group_of),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder() example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[ self.assertAllEqual(
fields.InputDataFields.groundtruth_group_of].get_shape().as_list()), (tensor_dict[fields.InputDataFields.groundtruth_group_of].get_shape()
[2]) .as_list()), [2])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
...@@ -655,25 +665,27 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -655,25 +665,27 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
object_weights = [0.75, 1.0] object_weights = [0.75, 1.0]
example = tf.train.Example(features=tf.train.Features( example = tf.train.Example(
feature={ features=tf.train.Features(
'image/encoded': self._BytesFeature(encoded_jpeg), feature={
'image/format': self._BytesFeature('jpeg'), 'image/encoded':
'image/object/weight': self._FloatFeature(object_weights), dataset_util.bytes_feature(encoded_jpeg),
})).SerializeToString() 'image/format':
dataset_util.bytes_feature('jpeg'),
'image/object/weight':
dataset_util.float_list_feature(object_weights),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder() example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[ self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_weights]
fields.InputDataFields.groundtruth_weights].get_shape().as_list()), .get_shape().as_list()), [None])
[None])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
self.assertAllEqual( self.assertAllEqual(object_weights,
object_weights, tensor_dict[fields.InputDataFields.groundtruth_weights])
tensor_dict[fields.InputDataFields.groundtruth_weights])
@test_util.enable_c_shapes @test_util.enable_c_shapes
def testDecodeInstanceSegmentation(self): def testDecodeInstanceSegmentation(self):
...@@ -682,15 +694,13 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -682,15 +694,13 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_width = 3 image_width = 3
# Randomly generate image. # Randomly generate image.
image_tensor = np.random.randint(256, size=(image_height, image_tensor = np.random.randint(
image_width, 256, size=(image_height, image_width, 3)).astype(np.uint8)
3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
# Randomly generate instance segmentation masks. # Randomly generate instance segmentation masks.
instance_masks = ( instance_masks = (
np.random.randint(2, size=(num_instances, np.random.randint(2, size=(num_instances, image_height,
image_height,
image_width)).astype(np.float32)) image_width)).astype(np.float32))
instance_masks_flattened = np.reshape(instance_masks, [-1]) instance_masks_flattened = np.reshape(instance_masks, [-1])
...@@ -698,25 +708,32 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -698,25 +708,32 @@ class TfExampleDecoderTest(tf.test.TestCase):
object_classes = np.random.randint( object_classes = np.random.randint(
100, size=(num_instances)).astype(np.int64) 100, size=(num_instances)).astype(np.int64)
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(
'image/encoded': self._BytesFeature(encoded_jpeg), features=tf.train.Features(
'image/format': self._BytesFeature('jpeg'), feature={
'image/height': self._Int64Feature([image_height]), 'image/encoded':
'image/width': self._Int64Feature([image_width]), dataset_util.bytes_feature(encoded_jpeg),
'image/object/mask': self._FloatFeature(instance_masks_flattened), 'image/format':
'image/object/class/label': self._Int64Feature( dataset_util.bytes_feature('jpeg'),
object_classes)})).SerializeToString() 'image/height':
dataset_util.int64_feature(image_height),
'image/width':
dataset_util.int64_feature(image_width),
'image/object/mask':
dataset_util.float_list_feature(instance_masks_flattened),
'image/object/class/label':
dataset_util.int64_list_feature(object_classes)
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder( example_decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=True) load_instance_masks=True)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual(( self.assertAllEqual(
tensor_dict[fields.InputDataFields.groundtruth_instance_masks]. (tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
get_shape().as_list()), [4, 5, 3]) .get_shape().as_list()), [4, 5, 3])
self.assertAllEqual(( self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
tensor_dict[fields.InputDataFields.groundtruth_classes]. .get_shape().as_list()), [4])
get_shape().as_list()), [4])
with self.test_session() as sess: with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict) tensor_dict = sess.run(tensor_dict)
...@@ -724,24 +741,21 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -724,24 +741,21 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual( self.assertAllEqual(
instance_masks.astype(np.float32), instance_masks.astype(np.float32),
tensor_dict[fields.InputDataFields.groundtruth_instance_masks]) tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
self.assertAllEqual( self.assertAllEqual(object_classes,
object_classes, tensor_dict[fields.InputDataFields.groundtruth_classes])
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testInstancesNotAvailableByDefault(self): def testInstancesNotAvailableByDefault(self):
num_instances = 4 num_instances = 4
image_height = 5 image_height = 5
image_width = 3 image_width = 3
# Randomly generate image. # Randomly generate image.
image_tensor = np.random.randint(256, size=(image_height, image_tensor = np.random.randint(
image_width, 256, size=(image_height, image_width, 3)).astype(np.uint8)
3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
# Randomly generate instance segmentation masks. # Randomly generate instance segmentation masks.
instance_masks = ( instance_masks = (
np.random.randint(2, size=(num_instances, np.random.randint(2, size=(num_instances, image_height,
image_height,
image_width)).astype(np.float32)) image_width)).astype(np.float32))
instance_masks_flattened = np.reshape(instance_masks, [-1]) instance_masks_flattened = np.reshape(instance_masks, [-1])
...@@ -749,18 +763,26 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -749,18 +763,26 @@ class TfExampleDecoderTest(tf.test.TestCase):
object_classes = np.random.randint( object_classes = np.random.randint(
100, size=(num_instances)).astype(np.int64) 100, size=(num_instances)).astype(np.int64)
example = tf.train.Example(features=tf.train.Features(feature={ example = tf.train.Example(
'image/encoded': self._BytesFeature(encoded_jpeg), features=tf.train.Features(
'image/format': self._BytesFeature('jpeg'), feature={
'image/height': self._Int64Feature([image_height]), 'image/encoded':
'image/width': self._Int64Feature([image_width]), dataset_util.bytes_feature(encoded_jpeg),
'image/object/mask': self._FloatFeature(instance_masks_flattened), 'image/format':
'image/object/class/label': self._Int64Feature( dataset_util.bytes_feature('jpeg'),
object_classes)})).SerializeToString() 'image/height':
dataset_util.int64_feature(image_height),
'image/width':
dataset_util.int64_feature(image_width),
'image/object/mask':
dataset_util.float_list_feature(instance_masks_flattened),
'image/object/class/label':
dataset_util.int64_list_feature(object_classes)
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder() example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertTrue(fields.InputDataFields.groundtruth_instance_masks self.assertTrue(
not in tensor_dict) fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
def testDecodeImageLabels(self): def testDecodeImageLabels(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
...@@ -768,9 +790,9 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -768,9 +790,9 @@ class TfExampleDecoderTest(tf.test.TestCase):
example = tf.train.Example( example = tf.train.Example(
features=tf.train.Features( features=tf.train.Features(
feature={ feature={
'image/encoded': self._BytesFeature(encoded_jpeg), 'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'), 'image/format': dataset_util.bytes_feature('jpeg'),
'image/class/label': self._Int64Feature([1, 2]), 'image/class/label': dataset_util.int64_list_feature([1, 2]),
})).SerializeToString() })).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder() example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example)) tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
...@@ -784,9 +806,12 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -784,9 +806,12 @@ class TfExampleDecoderTest(tf.test.TestCase):
example = tf.train.Example( example = tf.train.Example(
features=tf.train.Features( features=tf.train.Features(
feature={ feature={
'image/encoded': self._BytesFeature(encoded_jpeg), 'image/encoded':
'image/format': self._BytesFeature('jpeg'), dataset_util.bytes_feature(encoded_jpeg),
'image/class/text': self._BytesFeature(['dog', 'cat']), 'image/format':
dataset_util.bytes_feature('jpeg'),
'image/class/text':
dataset_util.bytes_list_feature(['dog', 'cat']),
})).SerializeToString() })).SerializeToString()
label_map_string = """ label_map_string = """
item { item {
......
...@@ -177,8 +177,8 @@ def create_tf_example(image, ...@@ -177,8 +177,8 @@ def create_tf_example(image,
dataset_util.float_list_feature(ymin), dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax': 'image/object/bbox/ymax':
dataset_util.float_list_feature(ymax), dataset_util.float_list_feature(ymax),
'image/object/class/label': 'image/object/class/text':
dataset_util.int64_list_feature(category_ids), dataset_util.bytes_list_feature(category_names),
'image/object/is_crowd': 'image/object/is_crowd':
dataset_util.int64_list_feature(is_crowd), dataset_util.int64_list_feature(is_crowd),
'image/object/area': 'image/object/area':
......
...@@ -106,6 +106,9 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -106,6 +106,9 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value, example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75]) [0.75])
self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value,
['cat'])
def test_create_tf_example_with_instance_masks(self): def test_create_tf_example_with_instance_masks(self):
image_file_name = 'tmp_image.jpg' image_file_name = 'tmp_image.jpg'
...@@ -169,6 +172,9 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -169,6 +172,9 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value, example.features.feature['image/object/bbox/ymax'].float_list.value,
[1]) [1])
self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value,
['dog'])
encoded_mask_pngs = [ encoded_mask_pngs = [
io.BytesIO(encoded_masks) for encoded_masks in example.features.feature[ io.BytesIO(encoded_masks) for encoded_masks in example.features.feature[
'image/object/mask'].bytes_list.value 'image/object/mask'].bytes_list.value
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
# ============================================================================== # ==============================================================================
"""Common utility functions for evaluation.""" """Common utility functions for evaluation."""
import collections import collections
import logging
import os import os
import time import time
...@@ -53,15 +52,15 @@ def write_metrics(metrics, global_step, summary_dir): ...@@ -53,15 +52,15 @@ def write_metrics(metrics, global_step, summary_dir):
global_step: Global step at which the metrics are computed. global_step: Global step at which the metrics are computed.
summary_dir: Directory to write tensorflow summaries to. summary_dir: Directory to write tensorflow summaries to.
""" """
logging.info('Writing metrics to tf summary.') tf.logging.info('Writing metrics to tf summary.')
summary_writer = tf.summary.FileWriterCache.get(summary_dir) summary_writer = tf.summary.FileWriterCache.get(summary_dir)
for key in sorted(metrics): for key in sorted(metrics):
summary = tf.Summary(value=[ summary = tf.Summary(value=[
tf.Summary.Value(tag=key, simple_value=metrics[key]), tf.Summary.Value(tag=key, simple_value=metrics[key]),
]) ])
summary_writer.add_summary(summary, global_step) summary_writer.add_summary(summary, global_step)
logging.info('%s: %f', key, metrics[key]) tf.logging.info('%s: %f', key, metrics[key])
logging.info('Metrics written to tf summary.') tf.logging.info('Metrics written to tf summary.')
# TODO(rathodv): Add tests. # TODO(rathodv): Add tests.
...@@ -141,7 +140,7 @@ def visualize_detection_results(result_dict, ...@@ -141,7 +140,7 @@ def visualize_detection_results(result_dict,
if show_groundtruth and input_fields.groundtruth_boxes not in result_dict: if show_groundtruth and input_fields.groundtruth_boxes not in result_dict:
raise ValueError('If show_groundtruth is enabled, result_dict must contain ' raise ValueError('If show_groundtruth is enabled, result_dict must contain '
'groundtruth_boxes.') 'groundtruth_boxes.')
logging.info('Creating detection visualizations.') tf.logging.info('Creating detection visualizations.')
category_index = label_map_util.create_category_index(categories) category_index = label_map_util.create_category_index(categories)
image = np.squeeze(result_dict[input_fields.original_image], axis=0) image = np.squeeze(result_dict[input_fields.original_image], axis=0)
...@@ -205,7 +204,8 @@ def visualize_detection_results(result_dict, ...@@ -205,7 +204,8 @@ def visualize_detection_results(result_dict,
summary_writer = tf.summary.FileWriterCache.get(summary_dir) summary_writer = tf.summary.FileWriterCache.get(summary_dir)
summary_writer.add_summary(summary, global_step) summary_writer.add_summary(summary, global_step)
logging.info('Detection visualizations written to summary with tag %s.', tag) tf.logging.info('Detection visualizations written to summary with tag %s.',
tag)
def _run_checkpoint_once(tensor_dict, def _run_checkpoint_once(tensor_dict,
...@@ -218,7 +218,8 @@ def _run_checkpoint_once(tensor_dict, ...@@ -218,7 +218,8 @@ def _run_checkpoint_once(tensor_dict,
master='', master='',
save_graph=False, save_graph=False,
save_graph_dir='', save_graph_dir='',
losses_dict=None): losses_dict=None,
eval_export_path=None):
"""Evaluates metrics defined in evaluators and returns summaries. """Evaluates metrics defined in evaluators and returns summaries.
This function loads the latest checkpoint in checkpoint_dirs and evaluates This function loads the latest checkpoint in checkpoint_dirs and evaluates
...@@ -258,6 +259,8 @@ def _run_checkpoint_once(tensor_dict, ...@@ -258,6 +259,8 @@ def _run_checkpoint_once(tensor_dict,
save_graph_dir: where to store the Tensorflow graph on disk. If save_graph save_graph_dir: where to store the Tensorflow graph on disk. If save_graph
is True this must be non-empty. is True this must be non-empty.
losses_dict: optional dictionary of scalar detection losses. losses_dict: optional dictionary of scalar detection losses.
eval_export_path: Path for saving a json file that contains the detection
results in json format.
Returns: Returns:
global_step: the count of global steps. global_step: the count of global steps.
...@@ -292,7 +295,8 @@ def _run_checkpoint_once(tensor_dict, ...@@ -292,7 +295,8 @@ def _run_checkpoint_once(tensor_dict,
try: try:
for batch in range(int(num_batches)): for batch in range(int(num_batches)):
if (batch + 1) % 100 == 0: if (batch + 1) % 100 == 0:
logging.info('Running eval ops batch %d/%d', batch + 1, num_batches) tf.logging.info('Running eval ops batch %d/%d', batch + 1,
num_batches)
if not batch_processor: if not batch_processor:
try: try:
if not losses_dict: if not losses_dict:
...@@ -301,7 +305,7 @@ def _run_checkpoint_once(tensor_dict, ...@@ -301,7 +305,7 @@ def _run_checkpoint_once(tensor_dict,
losses_dict]) losses_dict])
counters['success'] += 1 counters['success'] += 1
except tf.errors.InvalidArgumentError: except tf.errors.InvalidArgumentError:
logging.info('Skipping image') tf.logging.info('Skipping image')
counters['skipped'] += 1 counters['skipped'] += 1
result_dict = {} result_dict = {}
else: else:
...@@ -316,18 +320,31 @@ def _run_checkpoint_once(tensor_dict, ...@@ -316,18 +320,31 @@ def _run_checkpoint_once(tensor_dict,
# decoders to return correct image_id. # decoders to return correct image_id.
# TODO(akuznetsa): result_dict contains batches of images, while # TODO(akuznetsa): result_dict contains batches of images, while
# add_single_ground_truth_image_info expects a single image. Fix # add_single_ground_truth_image_info expects a single image. Fix
if (isinstance(result_dict, dict) and
result_dict[fields.InputDataFields.key]):
image_id = result_dict[fields.InputDataFields.key]
else:
image_id = batch
evaluator.add_single_ground_truth_image_info( evaluator.add_single_ground_truth_image_info(
image_id=batch, groundtruth_dict=result_dict) image_id=image_id, groundtruth_dict=result_dict)
evaluator.add_single_detected_image_info( evaluator.add_single_detected_image_info(
image_id=batch, detections_dict=result_dict) image_id=image_id, detections_dict=result_dict)
logging.info('Running eval batches done.') tf.logging.info('Running eval batches done.')
except tf.errors.OutOfRangeError: except tf.errors.OutOfRangeError:
logging.info('Done evaluating -- epoch limit reached') tf.logging.info('Done evaluating -- epoch limit reached')
finally: finally:
# When done, ask the threads to stop. # When done, ask the threads to stop.
logging.info('# success: %d', counters['success']) tf.logging.info('# success: %d', counters['success'])
logging.info('# skipped: %d', counters['skipped']) tf.logging.info('# skipped: %d', counters['skipped'])
all_evaluator_metrics = {} all_evaluator_metrics = {}
if eval_export_path and eval_export_path is not None:
for evaluator in evaluators:
if (isinstance(evaluator, coco_evaluation.CocoDetectionEvaluator) or
isinstance(evaluator, coco_evaluation.CocoMaskEvaluator)):
tf.logging.info('Started dumping to json file.')
evaluator.dump_detections_to_json_file(
json_output_path=eval_export_path)
tf.logging.info('Finished dumping to json file.')
for evaluator in evaluators: for evaluator in evaluators:
metrics = evaluator.evaluate() metrics = evaluator.evaluate()
evaluator.clear() evaluator.clear()
...@@ -356,7 +373,8 @@ def repeated_checkpoint_run(tensor_dict, ...@@ -356,7 +373,8 @@ def repeated_checkpoint_run(tensor_dict,
master='', master='',
save_graph=False, save_graph=False,
save_graph_dir='', save_graph_dir='',
losses_dict=None): losses_dict=None,
eval_export_path=None):
"""Periodically evaluates desired tensors using checkpoint_dirs or restore_fn. """Periodically evaluates desired tensors using checkpoint_dirs or restore_fn.
This function repeatedly loads a checkpoint and evaluates a desired This function repeatedly loads a checkpoint and evaluates a desired
...@@ -397,6 +415,8 @@ def repeated_checkpoint_run(tensor_dict, ...@@ -397,6 +415,8 @@ def repeated_checkpoint_run(tensor_dict,
save_graph_dir: where to save on disk the Tensorflow graph. If store_graph save_graph_dir: where to save on disk the Tensorflow graph. If store_graph
is True this must be non-empty. is True this must be non-empty.
losses_dict: optional dictionary of scalar detection losses. losses_dict: optional dictionary of scalar detection losses.
eval_export_path: Path for saving a json file that contains the detection
results in json format.
Returns: Returns:
metrics: A dictionary containing metric names and values in the latest metrics: A dictionary containing metric names and values in the latest
...@@ -417,31 +437,36 @@ def repeated_checkpoint_run(tensor_dict, ...@@ -417,31 +437,36 @@ def repeated_checkpoint_run(tensor_dict,
number_of_evaluations = 0 number_of_evaluations = 0
while True: while True:
start = time.time() start = time.time()
logging.info('Starting evaluation at ' + time.strftime( tf.logging.info('Starting evaluation at ' + time.strftime(
'%Y-%m-%d-%H:%M:%S', time.gmtime())) '%Y-%m-%d-%H:%M:%S', time.gmtime()))
model_path = tf.train.latest_checkpoint(checkpoint_dirs[0]) model_path = tf.train.latest_checkpoint(checkpoint_dirs[0])
if not model_path: if not model_path:
logging.info('No model found in %s. Will try again in %d seconds', tf.logging.info('No model found in %s. Will try again in %d seconds',
checkpoint_dirs[0], eval_interval_secs) checkpoint_dirs[0], eval_interval_secs)
elif model_path == last_evaluated_model_path: elif model_path == last_evaluated_model_path:
logging.info('Found already evaluated checkpoint. Will try again in %d ' tf.logging.info('Found already evaluated checkpoint. Will try again in '
'seconds', eval_interval_secs) '%d seconds', eval_interval_secs)
else: else:
last_evaluated_model_path = model_path last_evaluated_model_path = model_path
global_step, metrics = _run_checkpoint_once(tensor_dict, evaluators, global_step, metrics = _run_checkpoint_once(
batch_processor, tensor_dict,
checkpoint_dirs, evaluators,
variables_to_restore, batch_processor,
restore_fn, num_batches, checkpoint_dirs,
master, save_graph, variables_to_restore,
save_graph_dir, restore_fn,
losses_dict=losses_dict) num_batches,
master,
save_graph,
save_graph_dir,
losses_dict=losses_dict,
eval_export_path=eval_export_path)
write_metrics(metrics, global_step, summary_dir) write_metrics(metrics, global_step, summary_dir)
number_of_evaluations += 1 number_of_evaluations += 1
if (max_number_of_evaluations and if (max_number_of_evaluations and
number_of_evaluations >= max_number_of_evaluations): number_of_evaluations >= max_number_of_evaluations):
logging.info('Finished evaluation!') tf.logging.info('Finished evaluation!')
break break
time_to_next_eval = start + eval_interval_secs - time.time() time_to_next_eval = start + eval_interval_secs - time.time()
if time_to_next_eval > 0: if time_to_next_eval > 0:
...@@ -680,4 +705,3 @@ def evaluator_options_from_eval_config(eval_config): ...@@ -680,4 +705,3 @@ def evaluator_options_from_eval_config(eval_config):
eval_config.include_metrics_per_category) eval_config.include_metrics_per_category)
} }
return evaluator_options return evaluator_options
...@@ -2,13 +2,12 @@ ...@@ -2,13 +2,12 @@
We provide a collection of detection models pre-trained on the [COCO We provide a collection of detection models pre-trained on the [COCO
dataset](http://mscoco.org), the [Kitti dataset](http://www.cvlibs.net/datasets/kitti/), dataset](http://mscoco.org), the [Kitti dataset](http://www.cvlibs.net/datasets/kitti/),
the [Open Images dataset](https://github.com/openimages/dataset) and the the [Open Images dataset](https://github.com/openimages/dataset), the
[AVA v2.1 dataset](https://research.google.com/ava/). These models can [AVA v2.1 dataset](https://research.google.com/ava/) and the
be useful for [iNaturalist Species Detection Dataset](https://github.com/visipedia/inat_comp/blob/master/2017/README.md#bounding-boxes).
out-of-the-box inference if you are interested in categories already in COCO These models can be useful for out-of-the-box inference if you are interested in
(e.g., humans, cars, etc) or in Open Images (e.g., categories already in those datasets. They are also useful for initializing your
surfboard, jacuzzi, etc). They are also useful for initializing your models when models when training on novel datasets.
training on novel datasets.
In the table below, we list each such pre-trained model including: In the table below, we list each such pre-trained model including:
...@@ -113,6 +112,13 @@ Model name ...@@ -113,6 +112,13 @@ Model name
[faster_rcnn_inception_resnet_v2_atrous_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2018_01_28.tar.gz) | 727 | 37 | Boxes [faster_rcnn_inception_resnet_v2_atrous_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2018_01_28.tar.gz) | 727 | 37 | Boxes
[faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz) | 347 | | Boxes [faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz) | 347 | | Boxes
## iNaturalist Species-trained models
Model name | Speed (ms) | Pascal mAP@0.5 | Outputs
----------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---: | :-------------: | :-----:
[faster_rcnn_resnet101_fgvc](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_fgvc_2018_07_19.tar.gz) | 395 | 58 | Boxes
[faster_rcnn_resnet50_fgvc](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_fgvc_2018_07_19.tar.gz) | 366 | 55 | Boxes
## AVA v2.1 trained models ## AVA v2.1 trained models
......
...@@ -37,12 +37,12 @@ A local training job can be run with the following command: ...@@ -37,12 +37,12 @@ A local training job can be run with the following command:
PIPELINE_CONFIG_PATH={path to pipeline config file} PIPELINE_CONFIG_PATH={path to pipeline config file}
MODEL_DIR={path to model directory} MODEL_DIR={path to model directory}
NUM_TRAIN_STEPS=50000 NUM_TRAIN_STEPS=50000
NUM_EVAL_STEPS=2000 SAMPLE_1_OF_N_EVAL_EXAMPLES=1
python object_detection/model_main.py \ python object_detection/model_main.py \
--pipeline_config_path=${PIPELINE_CONFIG_PATH} \ --pipeline_config_path=${PIPELINE_CONFIG_PATH} \
--model_dir=${MODEL_DIR} \ --model_dir=${MODEL_DIR} \
--num_train_steps=${NUM_TRAIN_STEPS} \ --num_train_steps=${NUM_TRAIN_STEPS} \
--num_eval_steps=${NUM_EVAL_STEPS} \ --sample_1_of_n_eval_examples=$SAMPLE_1_OF_N_EVAL_EXAMPLES \
--alsologtostderr --alsologtostderr
``` ```
......
...@@ -216,7 +216,7 @@ To start training and evaluation, execute the following command from the ...@@ -216,7 +216,7 @@ To start training and evaluation, execute the following command from the
```bash ```bash
# From tensorflow/models/research/ # From tensorflow/models/research/
gcloud ml-engine jobs submit training `whoami`_object_detection_pets_`date +%m_%d_%Y_%H_%M_%S` \ gcloud ml-engine jobs submit training `whoami`_object_detection_pets_`date +%m_%d_%Y_%H_%M_%S` \
--runtime-version 1.9 \ --runtime-version 1.8 \
--job-dir=gs://${YOUR_GCS_BUCKET}/model_dir \ --job-dir=gs://${YOUR_GCS_BUCKET}/model_dir \
--packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz,/tmp/pycocotools/pycocotools-2.0.tar.gz \ --packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz,/tmp/pycocotools/pycocotools-2.0.tar.gz \
--module-name object_detection.model_main \ --module-name object_detection.model_main \
......
...@@ -52,7 +52,8 @@ def transform_input_data(tensor_dict, ...@@ -52,7 +52,8 @@ def transform_input_data(tensor_dict,
num_classes, num_classes,
data_augmentation_fn=None, data_augmentation_fn=None,
merge_multiple_boxes=False, merge_multiple_boxes=False,
retain_original_image=False): retain_original_image=False,
use_bfloat16=False):
"""A single function that is responsible for all input data transformations. """A single function that is responsible for all input data transformations.
Data transformation functions are applied in the following order. Data transformation functions are applied in the following order.
...@@ -86,6 +87,7 @@ def transform_input_data(tensor_dict, ...@@ -86,6 +87,7 @@ def transform_input_data(tensor_dict,
and classes for a given image if the boxes are exactly the same. and classes for a given image if the boxes are exactly the same.
retain_original_image: (optional) whether to retain original image in the retain_original_image: (optional) whether to retain original image in the
output dictionary. output dictionary.
use_bfloat16: (optional) a bool, whether to use bfloat16 in training.
Returns: Returns:
A dictionary keyed by fields.InputDataFields containing the tensors obtained A dictionary keyed by fields.InputDataFields containing the tensors obtained
...@@ -101,7 +103,8 @@ def transform_input_data(tensor_dict, ...@@ -101,7 +103,8 @@ def transform_input_data(tensor_dict,
if retain_original_image: if retain_original_image:
tensor_dict[fields.InputDataFields.original_image] = tf.cast( tensor_dict[fields.InputDataFields.original_image] = tf.cast(
tensor_dict[fields.InputDataFields.image], tf.uint8) image_resizer_fn(tensor_dict[fields.InputDataFields.image], None)[0],
tf.uint8)
# Apply data augmentation ops. # Apply data augmentation ops.
if data_augmentation_fn is not None: if data_augmentation_fn is not None:
...@@ -111,6 +114,9 @@ def transform_input_data(tensor_dict, ...@@ -111,6 +114,9 @@ def transform_input_data(tensor_dict,
image = tensor_dict[fields.InputDataFields.image] image = tensor_dict[fields.InputDataFields.image]
preprocessed_resized_image, true_image_shape = model_preprocess_fn( preprocessed_resized_image, true_image_shape = model_preprocess_fn(
tf.expand_dims(tf.to_float(image), axis=0)) tf.expand_dims(tf.to_float(image), axis=0))
if use_bfloat16:
preprocessed_resized_image = tf.cast(
preprocessed_resized_image, tf.bfloat16)
tensor_dict[fields.InputDataFields.image] = tf.squeeze( tensor_dict[fields.InputDataFields.image] = tf.squeeze(
preprocessed_resized_image, axis=0) preprocessed_resized_image, axis=0)
tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze( tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
...@@ -128,13 +134,33 @@ def transform_input_data(tensor_dict, ...@@ -128,13 +134,33 @@ def transform_input_data(tensor_dict,
tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot( tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot(
zero_indexed_groundtruth_classes, num_classes) zero_indexed_groundtruth_classes, num_classes)
if fields.InputDataFields.groundtruth_confidences in tensor_dict:
groundtruth_confidences = tensor_dict[
fields.InputDataFields.groundtruth_confidences]
tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
tf.sparse_to_dense(
zero_indexed_groundtruth_classes,
[num_classes],
groundtruth_confidences,
validate_indices=False))
else:
groundtruth_confidences = tf.ones_like(
zero_indexed_groundtruth_classes, dtype=tf.float32)
tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
tensor_dict[fields.InputDataFields.groundtruth_classes])
if merge_multiple_boxes: if merge_multiple_boxes:
merged_boxes, merged_classes, _ = util_ops.merge_boxes_with_multiple_labels( merged_boxes, merged_classes, merged_confidences, _ = (
tensor_dict[fields.InputDataFields.groundtruth_boxes], util_ops.merge_boxes_with_multiple_labels(
zero_indexed_groundtruth_classes, num_classes) tensor_dict[fields.InputDataFields.groundtruth_boxes],
zero_indexed_groundtruth_classes,
groundtruth_confidences,
num_classes))
merged_classes = tf.cast(merged_classes, tf.float32) merged_classes = tf.cast(merged_classes, tf.float32)
tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes
tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes
tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
merged_confidences)
return tensor_dict return tensor_dict
...@@ -174,6 +200,7 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes, ...@@ -174,6 +200,7 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
fields.InputDataFields.image: [ fields.InputDataFields.image: [
height, width, 3 + num_additional_channels height, width, 3 + num_additional_channels
], ],
fields.InputDataFields.original_image_spatial_shape: [2],
fields.InputDataFields.image_additional_channels: [ fields.InputDataFields.image_additional_channels: [
height, width, num_additional_channels height, width, num_additional_channels
], ],
...@@ -183,6 +210,8 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes, ...@@ -183,6 +210,8 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
fields.InputDataFields.groundtruth_difficult: [max_num_boxes], fields.InputDataFields.groundtruth_difficult: [max_num_boxes],
fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4], fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4],
fields.InputDataFields.groundtruth_classes: [max_num_boxes, num_classes], fields.InputDataFields.groundtruth_classes: [max_num_boxes, num_classes],
fields.InputDataFields.groundtruth_confidences: [
max_num_boxes, num_classes],
fields.InputDataFields.groundtruth_instance_masks: [ fields.InputDataFields.groundtruth_instance_masks: [
max_num_boxes, height, width max_num_boxes, height, width
], ],
...@@ -198,11 +227,12 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes, ...@@ -198,11 +227,12 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
max_num_boxes, num_classes + 1 if num_classes is not None else None max_num_boxes, num_classes + 1 if num_classes is not None else None
], ],
fields.InputDataFields.groundtruth_image_classes: [num_classes], fields.InputDataFields.groundtruth_image_classes: [num_classes],
fields.InputDataFields.groundtruth_image_confidences: [num_classes],
} }
if fields.InputDataFields.original_image in tensor_dict: if fields.InputDataFields.original_image in tensor_dict:
padding_shapes[fields.InputDataFields.original_image] = [ padding_shapes[fields.InputDataFields.original_image] = [
None, None, 3 + num_additional_channels height, width, 3 + num_additional_channels
] ]
if fields.InputDataFields.groundtruth_keypoints in tensor_dict: if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
tensor_shape = ( tensor_shape = (
...@@ -252,9 +282,12 @@ def augment_input_data(tensor_dict, data_augmentation_options): ...@@ -252,9 +282,12 @@ def augment_input_data(tensor_dict, data_augmentation_options):
in tensor_dict) in tensor_dict)
include_keypoints = (fields.InputDataFields.groundtruth_keypoints include_keypoints = (fields.InputDataFields.groundtruth_keypoints
in tensor_dict) in tensor_dict)
include_label_scores = (fields.InputDataFields.groundtruth_confidences in
tensor_dict)
tensor_dict = preprocessor.preprocess( tensor_dict = preprocessor.preprocess(
tensor_dict, data_augmentation_options, tensor_dict, data_augmentation_options,
func_arg_map=preprocessor.get_default_func_arg_map( func_arg_map=preprocessor.get_default_func_arg_map(
include_label_scores=include_label_scores,
include_instance_masks=include_instance_masks, include_instance_masks=include_instance_masks,
include_keypoints=include_keypoints)) include_keypoints=include_keypoints))
tensor_dict[fields.InputDataFields.image] = tf.squeeze( tensor_dict[fields.InputDataFields.image] = tf.squeeze(
...@@ -275,6 +308,7 @@ def _get_labels_dict(input_dict): ...@@ -275,6 +308,7 @@ def _get_labels_dict(input_dict):
labels_dict[key] = input_dict[key] labels_dict[key] = input_dict[key]
optional_label_keys = [ optional_label_keys = [
fields.InputDataFields.groundtruth_confidences,
fields.InputDataFields.groundtruth_keypoints, fields.InputDataFields.groundtruth_keypoints,
fields.InputDataFields.groundtruth_instance_masks, fields.InputDataFields.groundtruth_instance_masks,
fields.InputDataFields.groundtruth_area, fields.InputDataFields.groundtruth_area,
...@@ -291,16 +325,50 @@ def _get_labels_dict(input_dict): ...@@ -291,16 +325,50 @@ def _get_labels_dict(input_dict):
return labels_dict return labels_dict
def _replace_empty_string_with_random_number(string_tensor):
"""Returns string unchanged if non-empty, and random string tensor otherwise.
The random string is an integer 0 and 2**63 - 1, casted as string.
Args:
string_tensor: A tf.tensor of dtype string.
Returns:
out_string: A tf.tensor of dtype string. If string_tensor contains the empty
string, out_string will contain a random integer casted to a string.
Otherwise string_tensor is returned unchanged.
"""
empty_string = tf.constant('', dtype=tf.string, name='EmptyString')
random_source_id = tf.as_string(
tf.random_uniform(shape=[], maxval=2**63 - 1, dtype=tf.int64))
out_string = tf.cond(
tf.equal(string_tensor, empty_string),
true_fn=lambda: random_source_id,
false_fn=lambda: string_tensor)
return out_string
def _get_features_dict(input_dict): def _get_features_dict(input_dict):
"""Extracts features dict from input dict.""" """Extracts features dict from input dict."""
hash_from_source_id = tf.string_to_hash_bucket_fast(
input_dict[fields.InputDataFields.source_id], HASH_BINS) source_id = _replace_empty_string_with_random_number(
input_dict[fields.InputDataFields.source_id])
hash_from_source_id = tf.string_to_hash_bucket_fast(source_id, HASH_BINS)
features = { features = {
fields.InputDataFields.image: fields.InputDataFields.image:
input_dict[fields.InputDataFields.image], input_dict[fields.InputDataFields.image],
HASH_KEY: tf.cast(hash_from_source_id, tf.int32), HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
fields.InputDataFields.true_image_shape: fields.InputDataFields.true_image_shape:
input_dict[fields.InputDataFields.true_image_shape] input_dict[fields.InputDataFields.true_image_shape],
fields.InputDataFields.original_image_spatial_shape:
input_dict[fields.InputDataFields.original_image_spatial_shape]
} }
if fields.InputDataFields.original_image in input_dict: if fields.InputDataFields.original_image in input_dict:
features[fields.InputDataFields.original_image] = input_dict[ features[fields.InputDataFields.original_image] = input_dict[
...@@ -392,7 +460,8 @@ def create_train_input_fn(train_config, train_input_config, ...@@ -392,7 +460,8 @@ def create_train_input_fn(train_config, train_input_config,
num_classes=config_util.get_number_of_classes(model_config), num_classes=config_util.get_number_of_classes(model_config),
data_augmentation_fn=data_augmentation_fn, data_augmentation_fn=data_augmentation_fn,
merge_multiple_boxes=train_config.merge_multiple_label_boxes, merge_multiple_boxes=train_config.merge_multiple_label_boxes,
retain_original_image=train_config.retain_original_images) retain_original_image=train_config.retain_original_images,
use_bfloat16=train_config.use_bfloat16)
tensor_dict = pad_input_data_to_static_shapes( tensor_dict = pad_input_data_to_static_shapes(
tensor_dict=transform_data_fn(tensor_dict), tensor_dict=transform_data_fn(tensor_dict),
...@@ -414,8 +483,6 @@ def create_train_input_fn(train_config, train_input_config, ...@@ -414,8 +483,6 @@ def create_train_input_fn(train_config, train_input_config,
def create_eval_input_fn(eval_config, eval_input_config, model_config): def create_eval_input_fn(eval_config, eval_input_config, model_config):
"""Creates an eval `input` function for `Estimator`. """Creates an eval `input` function for `Estimator`.
# TODO(ronnyvotel,rathodv): Allow batch sizes of more than 1 for eval.
Args: Args:
eval_config: An eval_pb2.EvalConfig. eval_config: An eval_pb2.EvalConfig.
eval_input_config: An input_reader_pb2.InputReader. eval_input_config: An input_reader_pb2.InputReader.
...@@ -497,7 +564,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config): ...@@ -497,7 +564,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config):
return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict)) return (_get_features_dict(tensor_dict), _get_labels_dict(tensor_dict))
dataset = INPUT_BUILDER_UTIL_MAP['dataset_build']( dataset = INPUT_BUILDER_UTIL_MAP['dataset_build'](
eval_input_config, eval_input_config,
batch_size=1, # Currently only support batch size of 1 for eval. batch_size=params['batch_size'] if params else eval_config.batch_size,
transform_input_data_fn=transform_and_pad_input_data_fn) transform_input_data_fn=transform_and_pad_input_data_fn)
return dataset return dataset
......
...@@ -20,6 +20,7 @@ from __future__ import print_function ...@@ -20,6 +20,7 @@ from __future__ import print_function
import functools import functools
import os import os
from absl.testing import parameterized
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
...@@ -28,6 +29,7 @@ from object_detection import inputs ...@@ -28,6 +29,7 @@ from object_detection import inputs
from object_detection.core import preprocessor from object_detection.core import preprocessor
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.utils import config_util from object_detection.utils import config_util
from object_detection.utils import test_case
FLAGS = tf.flags.FLAGS FLAGS = tf.flags.FLAGS
...@@ -41,11 +43,13 @@ def _get_configs_for_model(model_name): ...@@ -41,11 +43,13 @@ def _get_configs_for_model(model_name):
data_path = os.path.join(tf.resource_loader.get_data_files_path(), data_path = os.path.join(tf.resource_loader.get_data_files_path(),
'test_data/pets_examples.record') 'test_data/pets_examples.record')
configs = config_util.get_configs_from_pipeline_file(fname) configs = config_util.get_configs_from_pipeline_file(fname)
override_dict = {
'train_input_path': data_path,
'eval_input_path': data_path,
'label_map_path': label_map_path
}
return config_util.merge_external_params_with_configs( return config_util.merge_external_params_with_configs(
configs, configs, kwargs_dict=override_dict)
train_input_path=data_path,
eval_input_path=data_path,
label_map_path=label_map_path)
def _make_initializable_iterator(dataset): def _make_initializable_iterator(dataset):
...@@ -62,7 +66,7 @@ def _make_initializable_iterator(dataset): ...@@ -62,7 +66,7 @@ def _make_initializable_iterator(dataset):
return iterator return iterator
class InputsTest(tf.test.TestCase): class InputsTest(test_case.TestCase, parameterized.TestCase):
def test_faster_rcnn_resnet50_train_input(self): def test_faster_rcnn_resnet50_train_input(self):
"""Tests the training input function for FasterRcnnResnet50.""" """Tests the training input function for FasterRcnnResnet50."""
...@@ -89,52 +93,71 @@ class InputsTest(tf.test.TestCase): ...@@ -89,52 +93,71 @@ class InputsTest(tf.test.TestCase):
labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_classes].dtype) labels[fields.InputDataFields.groundtruth_classes].dtype)
self.assertAllEqual(
[1, 100, model_config.faster_rcnn.num_classes],
labels[fields.InputDataFields.groundtruth_confidences].shape.as_list())
self.assertEqual(
tf.float32,
labels[fields.InputDataFields.groundtruth_confidences].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100], [1, 100],
labels[fields.InputDataFields.groundtruth_weights].shape.as_list()) labels[fields.InputDataFields.groundtruth_weights].shape.as_list())
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_weights].dtype) labels[fields.InputDataFields.groundtruth_weights].dtype)
def test_faster_rcnn_resnet50_eval_input(self): @parameterized.parameters(
{'eval_batch_size': 1},
{'eval_batch_size': 8}
)
def test_faster_rcnn_resnet50_eval_input(self, eval_batch_size=1):
"""Tests the eval input function for FasterRcnnResnet50.""" """Tests the eval input function for FasterRcnnResnet50."""
configs = _get_configs_for_model('faster_rcnn_resnet50_pets') configs = _get_configs_for_model('faster_rcnn_resnet50_pets')
model_config = configs['model'] model_config = configs['model']
model_config.faster_rcnn.num_classes = 37 model_config.faster_rcnn.num_classes = 37
eval_config = configs['eval_config']
eval_config.batch_size = eval_batch_size
eval_input_fn = inputs.create_eval_input_fn( eval_input_fn = inputs.create_eval_input_fn(
configs['eval_config'], configs['eval_input_config'], model_config) eval_config, configs['eval_input_configs'][0], model_config)
features, labels = _make_initializable_iterator(eval_input_fn()).get_next() features, labels = _make_initializable_iterator(eval_input_fn()).get_next()
self.assertAllEqual([1, None, None, 3], self.assertAllEqual([eval_batch_size, None, None, 3],
features[fields.InputDataFields.image].shape.as_list()) features[fields.InputDataFields.image].shape.as_list())
self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype) self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, None, None, 3], [eval_batch_size, None, None, 3],
features[fields.InputDataFields.original_image].shape.as_list()) features[fields.InputDataFields.original_image].shape.as_list())
self.assertEqual(tf.uint8, self.assertEqual(tf.uint8,
features[fields.InputDataFields.original_image].dtype) features[fields.InputDataFields.original_image].dtype)
self.assertAllEqual([1], features[inputs.HASH_KEY].shape.as_list()) self.assertAllEqual([eval_batch_size],
features[inputs.HASH_KEY].shape.as_list())
self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype) self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100, 4], [eval_batch_size, 100, 4],
labels[fields.InputDataFields.groundtruth_boxes].shape.as_list()) labels[fields.InputDataFields.groundtruth_boxes].shape.as_list())
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_boxes].dtype) labels[fields.InputDataFields.groundtruth_boxes].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100, model_config.faster_rcnn.num_classes], [eval_batch_size, 100, model_config.faster_rcnn.num_classes],
labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_classes].dtype) labels[fields.InputDataFields.groundtruth_classes].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100], [eval_batch_size, 100, model_config.faster_rcnn.num_classes],
labels[fields.InputDataFields.groundtruth_confidences].shape.as_list())
self.assertEqual(
tf.float32,
labels[fields.InputDataFields.groundtruth_confidences].dtype)
self.assertAllEqual(
[eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_area].shape.as_list()) labels[fields.InputDataFields.groundtruth_area].shape.as_list())
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_area].dtype) labels[fields.InputDataFields.groundtruth_area].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100], [eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list()) labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list())
self.assertEqual( self.assertEqual(
tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype) tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100], [eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_difficult].shape.as_list()) labels[fields.InputDataFields.groundtruth_difficult].shape.as_list())
self.assertEqual( self.assertEqual(
tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype) tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype)
...@@ -170,52 +193,73 @@ class InputsTest(tf.test.TestCase): ...@@ -170,52 +193,73 @@ class InputsTest(tf.test.TestCase):
labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_classes].dtype) labels[fields.InputDataFields.groundtruth_classes].dtype)
self.assertAllEqual(
[batch_size, 100, model_config.ssd.num_classes],
labels[
fields.InputDataFields.groundtruth_confidences].shape.as_list())
self.assertEqual(
tf.float32,
labels[fields.InputDataFields.groundtruth_confidences].dtype)
self.assertAllEqual( self.assertAllEqual(
[batch_size, 100], [batch_size, 100],
labels[fields.InputDataFields.groundtruth_weights].shape.as_list()) labels[fields.InputDataFields.groundtruth_weights].shape.as_list())
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_weights].dtype) labels[fields.InputDataFields.groundtruth_weights].dtype)
def test_ssd_inceptionV2_eval_input(self): @parameterized.parameters(
{'eval_batch_size': 1},
{'eval_batch_size': 8}
)
def test_ssd_inceptionV2_eval_input(self, eval_batch_size=1):
"""Tests the eval input function for SSDInceptionV2.""" """Tests the eval input function for SSDInceptionV2."""
configs = _get_configs_for_model('ssd_inception_v2_pets') configs = _get_configs_for_model('ssd_inception_v2_pets')
model_config = configs['model'] model_config = configs['model']
model_config.ssd.num_classes = 37 model_config.ssd.num_classes = 37
eval_config = configs['eval_config']
eval_config.batch_size = eval_batch_size
eval_input_fn = inputs.create_eval_input_fn( eval_input_fn = inputs.create_eval_input_fn(
configs['eval_config'], configs['eval_input_config'], model_config) eval_config, configs['eval_input_configs'][0], model_config)
features, labels = _make_initializable_iterator(eval_input_fn()).get_next() features, labels = _make_initializable_iterator(eval_input_fn()).get_next()
self.assertAllEqual([1, 300, 300, 3], self.assertAllEqual([eval_batch_size, 300, 300, 3],
features[fields.InputDataFields.image].shape.as_list()) features[fields.InputDataFields.image].shape.as_list())
self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype) self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, None, None, 3], [eval_batch_size, 300, 300, 3],
features[fields.InputDataFields.original_image].shape.as_list()) features[fields.InputDataFields.original_image].shape.as_list())
self.assertEqual(tf.uint8, self.assertEqual(tf.uint8,
features[fields.InputDataFields.original_image].dtype) features[fields.InputDataFields.original_image].dtype)
self.assertAllEqual([1], features[inputs.HASH_KEY].shape.as_list()) self.assertAllEqual([eval_batch_size],
features[inputs.HASH_KEY].shape.as_list())
self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype) self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100, 4], [eval_batch_size, 100, 4],
labels[fields.InputDataFields.groundtruth_boxes].shape.as_list()) labels[fields.InputDataFields.groundtruth_boxes].shape.as_list())
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_boxes].dtype) labels[fields.InputDataFields.groundtruth_boxes].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100, model_config.ssd.num_classes], [eval_batch_size, 100, model_config.ssd.num_classes],
labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_classes].dtype) labels[fields.InputDataFields.groundtruth_classes].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100], [eval_batch_size, 100, model_config.ssd.num_classes],
labels[
fields.InputDataFields.groundtruth_confidences].shape.as_list())
self.assertEqual(
tf.float32,
labels[fields.InputDataFields.groundtruth_confidences].dtype)
self.assertAllEqual(
[eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_area].shape.as_list()) labels[fields.InputDataFields.groundtruth_area].shape.as_list())
self.assertEqual(tf.float32, self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_area].dtype) labels[fields.InputDataFields.groundtruth_area].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100], [eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list()) labels[fields.InputDataFields.groundtruth_is_crowd].shape.as_list())
self.assertEqual( self.assertEqual(
tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype) tf.bool, labels[fields.InputDataFields.groundtruth_is_crowd].dtype)
self.assertAllEqual( self.assertAllEqual(
[1, 100], [eval_batch_size, 100],
labels[fields.InputDataFields.groundtruth_difficult].shape.as_list()) labels[fields.InputDataFields.groundtruth_difficult].shape.as_list())
self.assertEqual( self.assertEqual(
tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype) tf.int32, labels[fields.InputDataFields.groundtruth_difficult].dtype)
...@@ -225,7 +269,7 @@ class InputsTest(tf.test.TestCase): ...@@ -225,7 +269,7 @@ class InputsTest(tf.test.TestCase):
configs = _get_configs_for_model('ssd_inception_v2_pets') configs = _get_configs_for_model('ssd_inception_v2_pets')
predict_input_fn = inputs.create_predict_input_fn( predict_input_fn = inputs.create_predict_input_fn(
model_config=configs['model'], model_config=configs['model'],
predict_input_config=configs['eval_input_config']) predict_input_config=configs['eval_input_configs'][0])
serving_input_receiver = predict_input_fn() serving_input_receiver = predict_input_fn()
image = serving_input_receiver.features[fields.InputDataFields.image] image = serving_input_receiver.features[fields.InputDataFields.image]
...@@ -238,10 +282,10 @@ class InputsTest(tf.test.TestCase): ...@@ -238,10 +282,10 @@ class InputsTest(tf.test.TestCase):
def test_predict_input_with_additional_channels(self): def test_predict_input_with_additional_channels(self):
"""Tests the predict input function with additional channels.""" """Tests the predict input function with additional channels."""
configs = _get_configs_for_model('ssd_inception_v2_pets') configs = _get_configs_for_model('ssd_inception_v2_pets')
configs['eval_input_config'].num_additional_channels = 2 configs['eval_input_configs'][0].num_additional_channels = 2
predict_input_fn = inputs.create_predict_input_fn( predict_input_fn = inputs.create_predict_input_fn(
model_config=configs['model'], model_config=configs['model'],
predict_input_config=configs['eval_input_config']) predict_input_config=configs['eval_input_configs'][0])
serving_input_receiver = predict_input_fn() serving_input_receiver = predict_input_fn()
image = serving_input_receiver.features[fields.InputDataFields.image] image = serving_input_receiver.features[fields.InputDataFields.image]
...@@ -291,7 +335,7 @@ class InputsTest(tf.test.TestCase): ...@@ -291,7 +335,7 @@ class InputsTest(tf.test.TestCase):
configs['model'].ssd.num_classes = 37 configs['model'].ssd.num_classes = 37
eval_input_fn = inputs.create_eval_input_fn( eval_input_fn = inputs.create_eval_input_fn(
eval_config=configs['train_config'], # Expecting `EvalConfig`. eval_config=configs['train_config'], # Expecting `EvalConfig`.
eval_input_config=configs['eval_input_config'], eval_input_config=configs['eval_input_configs'][0],
model_config=configs['model']) model_config=configs['model'])
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
eval_input_fn() eval_input_fn()
...@@ -313,13 +357,45 @@ class InputsTest(tf.test.TestCase): ...@@ -313,13 +357,45 @@ class InputsTest(tf.test.TestCase):
configs['model'].ssd.num_classes = 37 configs['model'].ssd.num_classes = 37
eval_input_fn = inputs.create_eval_input_fn( eval_input_fn = inputs.create_eval_input_fn(
eval_config=configs['eval_config'], eval_config=configs['eval_config'],
eval_input_config=configs['eval_input_config'], eval_input_config=configs['eval_input_configs'][0],
model_config=configs['eval_config']) # Expecting `DetectionModel`. model_config=configs['eval_config']) # Expecting `DetectionModel`.
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
eval_input_fn() eval_input_fn()
def test_output_equal_in_replace_empty_string_with_random_number(self):
string_placeholder = tf.placeholder(tf.string, shape=[])
replaced_string = inputs._replace_empty_string_with_random_number(
string_placeholder)
test_string = 'hello world'
feed_dict = {string_placeholder: test_string}
with self.test_session() as sess:
out_string = sess.run(replaced_string, feed_dict=feed_dict)
self.assertEqual(test_string, out_string)
def test_output_is_integer_in_replace_empty_string_with_random_number(self):
string_placeholder = tf.placeholder(tf.string, shape=[])
replaced_string = inputs._replace_empty_string_with_random_number(
string_placeholder)
empty_string = ''
feed_dict = {string_placeholder: empty_string}
tf.set_random_seed(0)
with self.test_session() as sess:
out_string = sess.run(replaced_string, feed_dict=feed_dict)
# Test whether out_string is a string which represents an integer.
int(out_string) # throws an error if out_string is not castable to int.
self.assertEqual(out_string, '2798129067578209328')
class DataAugmentationFnTest(tf.test.TestCase):
class DataAugmentationFnTest(test_case.TestCase):
def test_apply_image_and_box_augmentation(self): def test_apply_image_and_box_augmentation(self):
data_augmentation_options = [ data_augmentation_options = [
...@@ -352,6 +428,50 @@ class DataAugmentationFnTest(tf.test.TestCase): ...@@ -352,6 +428,50 @@ class DataAugmentationFnTest(tf.test.TestCase):
[[10, 10, 20, 20]] [[10, 10, 20, 20]]
) )
def test_apply_image_and_box_augmentation_with_scores(self):
data_augmentation_options = [
(preprocessor.resize_image, {
'new_height': 20,
'new_width': 20,
'method': tf.image.ResizeMethod.NEAREST_NEIGHBOR
}),
(preprocessor.scale_boxes_to_pixel_coordinates, {}),
]
data_augmentation_fn = functools.partial(
inputs.augment_input_data,
data_augmentation_options=data_augmentation_options)
tensor_dict = {
fields.InputDataFields.image:
tf.constant(np.random.rand(10, 10, 3).astype(np.float32)),
fields.InputDataFields.groundtruth_boxes:
tf.constant(np.array([[.5, .5, 1., 1.]], np.float32)),
fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([1.0], np.float32)),
fields.InputDataFields.groundtruth_confidences:
tf.constant(np.array([0.8], np.float32)),
}
augmented_tensor_dict = data_augmentation_fn(tensor_dict=tensor_dict)
with self.test_session() as sess:
augmented_tensor_dict_out = sess.run(augmented_tensor_dict)
self.assertAllEqual(
augmented_tensor_dict_out[fields.InputDataFields.image].shape,
[20, 20, 3]
)
self.assertAllClose(
augmented_tensor_dict_out[fields.InputDataFields.groundtruth_boxes],
[[10, 10, 20, 20]]
)
self.assertAllClose(
augmented_tensor_dict_out[fields.InputDataFields.groundtruth_classes],
[1.0]
)
self.assertAllClose(
augmented_tensor_dict_out[
fields.InputDataFields.groundtruth_confidences],
[0.8]
)
def test_include_masks_in_data_augmentation(self): def test_include_masks_in_data_augmentation(self):
data_augmentation_options = [ data_augmentation_options = [
(preprocessor.resize_image, { (preprocessor.resize_image, {
...@@ -425,7 +545,7 @@ def _fake_image_resizer_fn(image, mask): ...@@ -425,7 +545,7 @@ def _fake_image_resizer_fn(image, mask):
return (image, mask, tf.shape(image)) return (image, mask, tf.shape(image))
class DataTransformationFnTest(tf.test.TestCase): class DataTransformationFnTest(test_case.TestCase):
def test_combine_additional_channels_if_present(self): def test_combine_additional_channels_if_present(self):
image = np.random.rand(4, 4, 3).astype(np.float32) image = np.random.rand(4, 4, 3).astype(np.float32)
...@@ -476,6 +596,9 @@ class DataTransformationFnTest(tf.test.TestCase): ...@@ -476,6 +596,9 @@ class DataTransformationFnTest(tf.test.TestCase):
self.assertAllClose( self.assertAllClose(
transformed_inputs[fields.InputDataFields.groundtruth_classes], transformed_inputs[fields.InputDataFields.groundtruth_classes],
[[0, 0, 1], [1, 0, 0]]) [[0, 0, 1], [1, 0, 0]])
self.assertAllClose(
transformed_inputs[fields.InputDataFields.groundtruth_confidences],
[[0, 0, 1], [1, 0, 0]])
def test_returns_correct_merged_boxes(self): def test_returns_correct_merged_boxes(self):
tensor_dict = { tensor_dict = {
...@@ -504,6 +627,9 @@ class DataTransformationFnTest(tf.test.TestCase): ...@@ -504,6 +627,9 @@ class DataTransformationFnTest(tf.test.TestCase):
self.assertAllClose( self.assertAllClose(
transformed_inputs[fields.InputDataFields.groundtruth_classes], transformed_inputs[fields.InputDataFields.groundtruth_classes],
[[1, 0, 1]]) [[1, 0, 1]])
self.assertAllClose(
transformed_inputs[fields.InputDataFields.groundtruth_confidences],
[[1, 0, 1]])
def test_returns_resized_masks(self): def test_returns_resized_masks(self):
tensor_dict = { tensor_dict = {
...@@ -512,8 +638,11 @@ class DataTransformationFnTest(tf.test.TestCase): ...@@ -512,8 +638,11 @@ class DataTransformationFnTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_instance_masks: fields.InputDataFields.groundtruth_instance_masks:
tf.constant(np.random.rand(2, 4, 4).astype(np.float32)), tf.constant(np.random.rand(2, 4, 4).astype(np.float32)),
fields.InputDataFields.groundtruth_classes: fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([3, 1], np.int32)) tf.constant(np.array([3, 1], np.int32)),
fields.InputDataFields.original_image_spatial_shape:
tf.constant(np.array([4, 4], np.int32))
} }
def fake_image_resizer_fn(image, masks=None): def fake_image_resizer_fn(image, masks=None):
resized_image = tf.image.resize_images(image, [8, 8]) resized_image = tf.image.resize_images(image, [8, 8])
results = [resized_image] results = [resized_image]
...@@ -538,7 +667,9 @@ class DataTransformationFnTest(tf.test.TestCase): ...@@ -538,7 +667,9 @@ class DataTransformationFnTest(tf.test.TestCase):
self.assertAllEqual(transformed_inputs[ self.assertAllEqual(transformed_inputs[
fields.InputDataFields.original_image].dtype, tf.uint8) fields.InputDataFields.original_image].dtype, tf.uint8)
self.assertAllEqual(transformed_inputs[ self.assertAllEqual(transformed_inputs[
fields.InputDataFields.original_image].shape, [4, 4, 3]) fields.InputDataFields.original_image_spatial_shape], [4, 4])
self.assertAllEqual(transformed_inputs[
fields.InputDataFields.original_image].shape, [8, 8, 3])
self.assertAllEqual(transformed_inputs[ self.assertAllEqual(transformed_inputs[
fields.InputDataFields.groundtruth_instance_masks].shape, [2, 8, 8]) fields.InputDataFields.groundtruth_instance_masks].shape, [2, 8, 8])
...@@ -550,6 +681,7 @@ class DataTransformationFnTest(tf.test.TestCase): ...@@ -550,6 +681,7 @@ class DataTransformationFnTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_classes: fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([3, 1], np.int32)) tf.constant(np.array([3, 1], np.int32))
} }
def fake_model_preprocessor_fn(image): def fake_model_preprocessor_fn(image):
return (image / 255., tf.expand_dims(tf.shape(image)[1:], axis=0)) return (image / 255., tf.expand_dims(tf.shape(image)[1:], axis=0))
...@@ -577,6 +709,7 @@ class DataTransformationFnTest(tf.test.TestCase): ...@@ -577,6 +709,7 @@ class DataTransformationFnTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_classes: fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([3, 1], np.int32)) tf.constant(np.array([3, 1], np.int32))
} }
def add_one_data_augmentation_fn(tensor_dict): def add_one_data_augmentation_fn(tensor_dict):
return {key: value + 1 for key, value in tensor_dict.items()} return {key: value + 1 for key, value in tensor_dict.items()}
...@@ -605,8 +738,10 @@ class DataTransformationFnTest(tf.test.TestCase): ...@@ -605,8 +738,10 @@ class DataTransformationFnTest(tf.test.TestCase):
fields.InputDataFields.groundtruth_classes: fields.InputDataFields.groundtruth_classes:
tf.constant(np.array([3, 1], np.int32)) tf.constant(np.array([3, 1], np.int32))
} }
def mul_two_model_preprocessor_fn(image): def mul_two_model_preprocessor_fn(image):
return (image * 2, tf.expand_dims(tf.shape(image)[1:], axis=0)) return (image * 2, tf.expand_dims(tf.shape(image)[1:], axis=0))
def add_five_to_image_data_augmentation_fn(tensor_dict): def add_five_to_image_data_augmentation_fn(tensor_dict):
tensor_dict[fields.InputDataFields.image] += 5 tensor_dict[fields.InputDataFields.image] += 5
return tensor_dict return tensor_dict
...@@ -626,7 +761,7 @@ class DataTransformationFnTest(tf.test.TestCase): ...@@ -626,7 +761,7 @@ class DataTransformationFnTest(tf.test.TestCase):
(np_image + 5) * 2) (np_image + 5) * 2)
class PadInputDataToStaticShapesFnTest(tf.test.TestCase): class PadInputDataToStaticShapesFnTest(test_case.TestCase):
def test_pad_images_boxes_and_classes(self): def test_pad_images_boxes_and_classes(self):
input_tensor_dict = { input_tensor_dict = {
...@@ -636,7 +771,10 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase): ...@@ -636,7 +771,10 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
tf.placeholder(tf.float32, [None, 4]), tf.placeholder(tf.float32, [None, 4]),
fields.InputDataFields.groundtruth_classes: fields.InputDataFields.groundtruth_classes:
tf.placeholder(tf.int32, [None, 3]), tf.placeholder(tf.int32, [None, 3]),
fields.InputDataFields.true_image_shape: tf.placeholder(tf.int32, [3]), fields.InputDataFields.true_image_shape:
tf.placeholder(tf.int32, [3]),
fields.InputDataFields.original_image_spatial_shape:
tf.placeholder(tf.int32, [2])
} }
padded_tensor_dict = inputs.pad_input_data_to_static_shapes( padded_tensor_dict = inputs.pad_input_data_to_static_shapes(
tensor_dict=input_tensor_dict, tensor_dict=input_tensor_dict,
...@@ -650,6 +788,9 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase): ...@@ -650,6 +788,9 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
self.assertAllEqual( self.assertAllEqual(
padded_tensor_dict[fields.InputDataFields.true_image_shape] padded_tensor_dict[fields.InputDataFields.true_image_shape]
.shape.as_list(), [3]) .shape.as_list(), [3])
self.assertAllEqual(
padded_tensor_dict[fields.InputDataFields.original_image_spatial_shape]
.shape.as_list(), [2])
self.assertAllEqual( self.assertAllEqual(
padded_tensor_dict[fields.InputDataFields.groundtruth_boxes] padded_tensor_dict[fields.InputDataFields.groundtruth_boxes]
.shape.as_list(), [3, 4]) .shape.as_list(), [3, 4])
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
r"""Evaluation executable for detection models. r"""Evaluation executable for detection models.
This executable is used to evaluate DetectionModels. There are two ways of This executable is used to evaluate DetectionModels. There are two ways of
...@@ -54,29 +53,30 @@ from object_detection.legacy import evaluator ...@@ -54,29 +53,30 @@ from object_detection.legacy import evaluator
from object_detection.utils import config_util from object_detection.utils import config_util
from object_detection.utils import label_map_util from object_detection.utils import label_map_util
tf.logging.set_verbosity(tf.logging.INFO) tf.logging.set_verbosity(tf.logging.INFO)
flags = tf.app.flags flags = tf.app.flags
flags.DEFINE_boolean('eval_training_data', False, flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.') 'If training data should be evaluated for this job.')
flags.DEFINE_string('checkpoint_dir', '', flags.DEFINE_string(
'Directory containing checkpoints to evaluate, typically ' 'checkpoint_dir', '',
'set to `train_dir` used in the training job.') 'Directory containing checkpoints to evaluate, typically '
flags.DEFINE_string('eval_dir', '', 'set to `train_dir` used in the training job.')
'Directory to write eval summaries to.') flags.DEFINE_string('eval_dir', '', 'Directory to write eval summaries to.')
flags.DEFINE_string('pipeline_config_path', '', flags.DEFINE_string(
'Path to a pipeline_pb2.TrainEvalPipelineConfig config ' 'pipeline_config_path', '',
'file. If provided, other configs are ignored') 'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file. If provided, other configs are ignored')
flags.DEFINE_string('eval_config_path', '', flags.DEFINE_string('eval_config_path', '',
'Path to an eval_pb2.EvalConfig config file.') 'Path to an eval_pb2.EvalConfig config file.')
flags.DEFINE_string('input_config_path', '', flags.DEFINE_string('input_config_path', '',
'Path to an input_reader_pb2.InputReader config file.') 'Path to an input_reader_pb2.InputReader config file.')
flags.DEFINE_string('model_config_path', '', flags.DEFINE_string('model_config_path', '',
'Path to a model_pb2.DetectionModel config file.') 'Path to a model_pb2.DetectionModel config file.')
flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of ' flags.DEFINE_boolean(
'evaluation. Overrides the `max_evals` parameter in the ' 'run_once', False, 'Option to only run a single pass of '
'provided config.') 'evaluation. Overrides the `max_evals` parameter in the '
'provided config.')
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
...@@ -88,9 +88,10 @@ def main(unused_argv): ...@@ -88,9 +88,10 @@ def main(unused_argv):
if FLAGS.pipeline_config_path: if FLAGS.pipeline_config_path:
configs = config_util.get_configs_from_pipeline_file( configs = config_util.get_configs_from_pipeline_file(
FLAGS.pipeline_config_path) FLAGS.pipeline_config_path)
tf.gfile.Copy(FLAGS.pipeline_config_path, tf.gfile.Copy(
os.path.join(FLAGS.eval_dir, 'pipeline.config'), FLAGS.pipeline_config_path,
overwrite=True) os.path.join(FLAGS.eval_dir, 'pipeline.config'),
overwrite=True)
else: else:
configs = config_util.get_configs_from_multiple_files( configs = config_util.get_configs_from_multiple_files(
model_config_path=FLAGS.model_config_path, model_config_path=FLAGS.model_config_path,
...@@ -99,9 +100,7 @@ def main(unused_argv): ...@@ -99,9 +100,7 @@ def main(unused_argv):
for name, config in [('model.config', FLAGS.model_config_path), for name, config in [('model.config', FLAGS.model_config_path),
('eval.config', FLAGS.eval_config_path), ('eval.config', FLAGS.eval_config_path),
('input.config', FLAGS.input_config_path)]: ('input.config', FLAGS.input_config_path)]:
tf.gfile.Copy(config, tf.gfile.Copy(config, os.path.join(FLAGS.eval_dir, name), overwrite=True)
os.path.join(FLAGS.eval_dir, name),
overwrite=True)
model_config = configs['model'] model_config = configs['model']
eval_config = configs['eval_config'] eval_config = configs['eval_config']
...@@ -110,9 +109,7 @@ def main(unused_argv): ...@@ -110,9 +109,7 @@ def main(unused_argv):
input_config = configs['train_input_config'] input_config = configs['train_input_config']
model_fn = functools.partial( model_fn = functools.partial(
model_builder.build, model_builder.build, model_config=model_config, is_training=False)
model_config=model_config,
is_training=False)
def get_next(config): def get_next(config):
return dataset_builder.make_initializable_iterator( return dataset_builder.make_initializable_iterator(
...@@ -120,10 +117,8 @@ def main(unused_argv): ...@@ -120,10 +117,8 @@ def main(unused_argv):
create_input_dict_fn = functools.partial(get_next, input_config) create_input_dict_fn = functools.partial(get_next, input_config)
label_map = label_map_util.load_labelmap(input_config.label_map_path) categories = label_map_util.create_categories_from_labelmap(
max_num_classes = max([item.id for item in label_map.item]) input_config.label_map_path)
categories = label_map_util.convert_label_map_to_categories(
label_map, max_num_classes)
if FLAGS.run_once: if FLAGS.run_once:
eval_config.max_evals = 1 eval_config.max_evals = 1
......
...@@ -273,6 +273,7 @@ def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories, ...@@ -273,6 +273,7 @@ def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
master=eval_config.eval_master, master=eval_config.eval_master,
save_graph=eval_config.save_graph, save_graph=eval_config.save_graph,
save_graph_dir=(eval_dir if eval_config.save_graph else ''), save_graph_dir=(eval_dir if eval_config.save_graph else ''),
losses_dict=losses_dict) losses_dict=losses_dict,
eval_export_path=eval_config.export_path)
return metrics return metrics
...@@ -99,17 +99,19 @@ class ArgMaxMatcher(matcher.Matcher): ...@@ -99,17 +99,19 @@ class ArgMaxMatcher(matcher.Matcher):
if self._unmatched_threshold == self._matched_threshold: if self._unmatched_threshold == self._matched_threshold:
raise ValueError('When negatives are in between matched and ' raise ValueError('When negatives are in between matched and '
'unmatched thresholds, these cannot be of equal ' 'unmatched thresholds, these cannot be of equal '
'value. matched: %s, unmatched: %s', 'value. matched: {}, unmatched: {}'.format(
self._matched_threshold, self._unmatched_threshold) self._matched_threshold,
self._unmatched_threshold))
self._force_match_for_each_row = force_match_for_each_row self._force_match_for_each_row = force_match_for_each_row
self._negatives_lower_than_unmatched = negatives_lower_than_unmatched self._negatives_lower_than_unmatched = negatives_lower_than_unmatched
def _match(self, similarity_matrix): def _match(self, similarity_matrix, valid_rows):
"""Tries to match each column of the similarity matrix to a row. """Tries to match each column of the similarity matrix to a row.
Args: Args:
similarity_matrix: tensor of shape [N, M] representing any similarity similarity_matrix: tensor of shape [N, M] representing any similarity
metric. metric.
valid_rows: a boolean tensor of shape [N] indicating valid rows.
Returns: Returns:
Match object with corresponding matches for each of M columns. Match object with corresponding matches for each of M columns.
...@@ -167,8 +169,10 @@ class ArgMaxMatcher(matcher.Matcher): ...@@ -167,8 +169,10 @@ class ArgMaxMatcher(matcher.Matcher):
similarity_matrix) similarity_matrix)
force_match_column_ids = tf.argmax(similarity_matrix, 1, force_match_column_ids = tf.argmax(similarity_matrix, 1,
output_type=tf.int32) output_type=tf.int32)
force_match_column_indicators = tf.one_hot( force_match_column_indicators = (
force_match_column_ids, depth=similarity_matrix_shape[1]) tf.one_hot(
force_match_column_ids, depth=similarity_matrix_shape[1]) *
tf.cast(tf.expand_dims(valid_rows, axis=-1), dtype=tf.float32))
force_match_row_ids = tf.argmax(force_match_column_indicators, 0, force_match_row_ids = tf.argmax(force_match_column_indicators, 0,
output_type=tf.int32) output_type=tf.int32)
force_match_column_mask = tf.cast( force_match_column_mask = tf.cast(
......
...@@ -182,6 +182,34 @@ class ArgMaxMatcherTest(test_case.TestCase): ...@@ -182,6 +182,34 @@ class ArgMaxMatcherTest(test_case.TestCase):
self.assertAllEqual(np.nonzero(res_unmatched_cols)[0], self.assertAllEqual(np.nonzero(res_unmatched_cols)[0],
expected_unmatched_cols) expected_unmatched_cols)
def test_return_correct_matches_using_force_match_padded_groundtruth(self):
def graph_fn(similarity, valid_rows):
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=3.,
unmatched_threshold=2.,
force_match_for_each_row=True)
match = matcher.match(similarity, valid_rows)
matched_cols = match.matched_column_indicator()
unmatched_cols = match.unmatched_column_indicator()
match_results = match.match_results
return (matched_cols, unmatched_cols, match_results)
similarity = np.array([[1, 1, 1, 3, 1],
[-1, 0, -2, -2, -1],
[0, 0, 0, 0, 0],
[3, 0, -1, 2, 0],
[0, 0, 0, 0, 0]], dtype=np.float32)
valid_rows = np.array([True, True, False, True, False])
expected_matched_cols = np.array([0, 1, 3])
expected_matched_rows = np.array([3, 1, 0])
expected_unmatched_cols = np.array([2, 4]) # col 2 has too high max val
(res_matched_cols, res_unmatched_cols,
match_results) = self.execute(graph_fn, [similarity, valid_rows])
self.assertAllEqual(match_results[res_matched_cols], expected_matched_rows)
self.assertAllEqual(np.nonzero(res_matched_cols)[0], expected_matched_cols)
self.assertAllEqual(np.nonzero(res_unmatched_cols)[0],
expected_unmatched_cols)
def test_valid_arguments_corner_case(self): def test_valid_arguments_corner_case(self):
argmax_matcher.ArgMaxMatcher(matched_threshold=1, argmax_matcher.ArgMaxMatcher(matched_threshold=1,
unmatched_threshold=1) unmatched_threshold=1)
......
...@@ -35,7 +35,7 @@ class GreedyBipartiteMatcher(matcher.Matcher): ...@@ -35,7 +35,7 @@ class GreedyBipartiteMatcher(matcher.Matcher):
super(GreedyBipartiteMatcher, self).__init__( super(GreedyBipartiteMatcher, self).__init__(
use_matmul_gather=use_matmul_gather) use_matmul_gather=use_matmul_gather)
def _match(self, similarity_matrix, num_valid_rows=-1): def _match(self, similarity_matrix, valid_rows):
"""Bipartite matches a collection rows and columns. A greedy bi-partite. """Bipartite matches a collection rows and columns. A greedy bi-partite.
TODO(rathodv): Add num_valid_columns options to match only that many columns TODO(rathodv): Add num_valid_columns options to match only that many columns
...@@ -44,21 +44,27 @@ class GreedyBipartiteMatcher(matcher.Matcher): ...@@ -44,21 +44,27 @@ class GreedyBipartiteMatcher(matcher.Matcher):
Args: Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher values mean more similar. where higher values mean more similar.
num_valid_rows: A scalar or a 1-D tensor with one element describing the valid_rows: A boolean tensor of shape [N] indicating the rows that are
number of valid rows of similarity_matrix to consider for the bipartite valid.
matching. If set to be negative, then all rows from similarity_matrix
are used.
Returns: Returns:
match_results: int32 tensor of shape [M] with match_results[i]=-1 match_results: int32 tensor of shape [M] with match_results[i]=-1
meaning that column i is not matched and otherwise that it is matched to meaning that column i is not matched and otherwise that it is matched to
row match_results[i]. row match_results[i].
""" """
valid_row_sim_matrix = tf.gather(similarity_matrix,
tf.squeeze(tf.where(valid_rows), axis=-1))
invalid_row_sim_matrix = tf.gather(
similarity_matrix,
tf.squeeze(tf.where(tf.logical_not(valid_rows)), axis=-1))
similarity_matrix = tf.concat(
[valid_row_sim_matrix, invalid_row_sim_matrix], axis=0)
# Convert similarity matrix to distance matrix as tf.image.bipartite tries # Convert similarity matrix to distance matrix as tf.image.bipartite tries
# to find minimum distance matches. # to find minimum distance matches.
distance_matrix = -1 * similarity_matrix distance_matrix = -1 * similarity_matrix
num_valid_rows = tf.reduce_sum(tf.to_float(valid_rows))
_, match_results = image_ops.bipartite_match( _, match_results = image_ops.bipartite_match(
distance_matrix, num_valid_rows) distance_matrix, num_valid_rows=num_valid_rows)
match_results = tf.reshape(match_results, [-1]) match_results = tf.reshape(match_results, [-1])
match_results = tf.cast(match_results, tf.int32) match_results = tf.cast(match_results, tf.int32)
return match_results return match_results
...@@ -24,44 +24,54 @@ class GreedyBipartiteMatcherTest(tf.test.TestCase): ...@@ -24,44 +24,54 @@ class GreedyBipartiteMatcherTest(tf.test.TestCase):
def test_get_expected_matches_when_all_rows_are_valid(self): def test_get_expected_matches_when_all_rows_are_valid(self):
similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]]) similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]])
num_valid_rows = 2 valid_rows = tf.ones([2], dtype=tf.bool)
expected_match_results = [-1, 1, 0] expected_match_results = [-1, 1, 0]
matcher = bipartite_matcher.GreedyBipartiteMatcher() matcher = bipartite_matcher.GreedyBipartiteMatcher()
match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows) match = matcher.match(similarity_matrix, valid_rows=valid_rows)
with self.test_session() as sess: with self.test_session() as sess:
match_results_out = sess.run(match._match_results) match_results_out = sess.run(match._match_results)
self.assertAllEqual(match_results_out, expected_match_results) self.assertAllEqual(match_results_out, expected_match_results)
def test_get_expected_matches_with_valid_rows_set_to_minus_one(self): def test_get_expected_matches_with_all_rows_be_default(self):
similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]]) similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]])
num_valid_rows = -1
expected_match_results = [-1, 1, 0] expected_match_results = [-1, 1, 0]
matcher = bipartite_matcher.GreedyBipartiteMatcher() matcher = bipartite_matcher.GreedyBipartiteMatcher()
match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows) match = matcher.match(similarity_matrix)
with self.test_session() as sess: with self.test_session() as sess:
match_results_out = sess.run(match._match_results) match_results_out = sess.run(match._match_results)
self.assertAllEqual(match_results_out, expected_match_results) self.assertAllEqual(match_results_out, expected_match_results)
def test_get_no_matches_with_zero_valid_rows(self): def test_get_no_matches_with_zero_valid_rows(self):
similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]]) similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]])
num_valid_rows = 0 valid_rows = tf.zeros([2], dtype=tf.bool)
expected_match_results = [-1, -1, -1] expected_match_results = [-1, -1, -1]
matcher = bipartite_matcher.GreedyBipartiteMatcher() matcher = bipartite_matcher.GreedyBipartiteMatcher()
match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows) match = matcher.match(similarity_matrix, valid_rows)
with self.test_session() as sess: with self.test_session() as sess:
match_results_out = sess.run(match._match_results) match_results_out = sess.run(match._match_results)
self.assertAllEqual(match_results_out, expected_match_results) self.assertAllEqual(match_results_out, expected_match_results)
def test_get_expected_matches_with_only_one_valid_row(self): def test_get_expected_matches_with_only_one_valid_row(self):
similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]]) similarity_matrix = tf.constant([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]])
num_valid_rows = 1 valid_rows = tf.constant([True, False], dtype=tf.bool)
expected_match_results = [-1, -1, 0] expected_match_results = [-1, -1, 0]
matcher = bipartite_matcher.GreedyBipartiteMatcher() matcher = bipartite_matcher.GreedyBipartiteMatcher()
match = matcher.match(similarity_matrix, num_valid_rows=num_valid_rows) match = matcher.match(similarity_matrix, valid_rows)
with self.test_session() as sess:
match_results_out = sess.run(match._match_results)
self.assertAllEqual(match_results_out, expected_match_results)
def test_get_expected_matches_with_only_one_valid_row_at_bottom(self):
similarity_matrix = tf.constant([[0.15, 0.2, 0.3], [0.50, 0.1, 0.8]])
valid_rows = tf.constant([False, True], dtype=tf.bool)
expected_match_results = [-1, -1, 0]
matcher = bipartite_matcher.GreedyBipartiteMatcher()
match = matcher.match(similarity_matrix, valid_rows)
with self.test_session() as sess: with self.test_session() as sess:
match_results_out = sess.run(match._match_results) match_results_out = sess.run(match._match_results)
self.assertAllEqual(match_results_out, expected_match_results) self.assertAllEqual(match_results_out, expected_match_results)
......
...@@ -103,7 +103,6 @@ from object_detection.core import box_list_ops ...@@ -103,7 +103,6 @@ from object_detection.core import box_list_ops
from object_detection.core import box_predictor from object_detection.core import box_predictor
from object_detection.core import losses from object_detection.core import losses
from object_detection.core import model from object_detection.core import model
from object_detection.core import post_processing
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner from object_detection.core import target_assigner
from object_detection.utils import ops from object_detection.utils import ops
...@@ -234,11 +233,11 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -234,11 +233,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
first_stage_box_predictor_depth, first_stage_box_predictor_depth,
first_stage_minibatch_size, first_stage_minibatch_size,
first_stage_sampler, first_stage_sampler,
first_stage_nms_score_threshold, first_stage_non_max_suppression_fn,
first_stage_nms_iou_threshold,
first_stage_max_proposals, first_stage_max_proposals,
first_stage_localization_loss_weight, first_stage_localization_loss_weight,
first_stage_objectness_loss_weight, first_stage_objectness_loss_weight,
crop_and_resize_fn,
initial_crop_size, initial_crop_size,
maxpool_kernel_size, maxpool_kernel_size,
maxpool_stride, maxpool_stride,
...@@ -255,8 +254,9 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -255,8 +254,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
hard_example_miner=None, hard_example_miner=None,
parallel_iterations=16, parallel_iterations=16,
add_summaries=True, add_summaries=True,
use_matmul_crop_and_resize=False, clip_anchors_to_image=False,
clip_anchors_to_image=False): use_static_shapes=False,
resize_masks=True):
"""FasterRCNNMetaArch Constructor. """FasterRCNNMetaArch Constructor.
Args: Args:
...@@ -309,18 +309,22 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -309,18 +309,22 @@ class FasterRCNNMetaArch(model.DetectionModel):
to the loss function for any given image within the image batch and is to the loss function for any given image within the image batch and is
only called "batch_size" due to terminology from the Faster R-CNN paper. only called "batch_size" due to terminology from the Faster R-CNN paper.
first_stage_sampler: Sampler to use for first stage loss (RPN loss). first_stage_sampler: Sampler to use for first stage loss (RPN loss).
first_stage_nms_score_threshold: Score threshold for non max suppression first_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression
for the Region Proposal Network (RPN). This value is expected to be in callable that takes `boxes`, `scores` and optional `clip_window`(with
[0, 1] as it is applied directly after a softmax transformation. The all other inputs already set) and returns a dictionary containing
recommended value for Faster R-CNN is 0. tensors with keys: `detection_boxes`, `detection_scores`,
first_stage_nms_iou_threshold: The Intersection Over Union (IOU) threshold `detection_classes`, `num_detections`. This is used to perform non max
for performing Non-Max Suppression (NMS) on the boxes predicted by the suppression on the boxes predicted by the Region Proposal Network
Region Proposal Network (RPN). (RPN).
See `post_processing.batch_multiclass_non_max_suppression` for the type
and shape of these tensors.
first_stage_max_proposals: Maximum number of boxes to retain after first_stage_max_proposals: Maximum number of boxes to retain after
performing Non-Max Suppression (NMS) on the boxes predicted by the performing Non-Max Suppression (NMS) on the boxes predicted by the
Region Proposal Network (RPN). Region Proposal Network (RPN).
first_stage_localization_loss_weight: A float first_stage_localization_loss_weight: A float
first_stage_objectness_loss_weight: A float first_stage_objectness_loss_weight: A float
crop_and_resize_fn: A differentiable resampler to use for cropping RPN
proposal features.
initial_crop_size: A single integer indicating the output size initial_crop_size: A single integer indicating the output size
(width and height are set to be the same) of the initial bilinear (width and height are set to be the same) of the initial bilinear
interpolation based cropping during ROI pooling. interpolation based cropping during ROI pooling.
...@@ -367,12 +371,13 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -367,12 +371,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
in parallel for calls to tf.map_fn. in parallel for calls to tf.map_fn.
add_summaries: boolean (default: True) controlling whether summary ops add_summaries: boolean (default: True) controlling whether summary ops
should be added to tensorflow graph. should be added to tensorflow graph.
use_matmul_crop_and_resize: Force the use of matrix multiplication based
crop and resize instead of standard tf.image.crop_and_resize while
computing second stage input feature maps.
clip_anchors_to_image: Normally, anchors generated for a given image size clip_anchors_to_image: Normally, anchors generated for a given image size
are pruned during training if they lie outside the image window. This are pruned during training if they lie outside the image window. This
option clips the anchors to be within the image instead of pruning. option clips the anchors to be within the image instead of pruning.
use_static_shapes: If True, uses implementation of ops with static shape
guarantees.
resize_masks: Indicates whether the masks presend in the groundtruth
should be resized in the model with `image_resizer_fn`
Raises: Raises:
ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at
...@@ -384,9 +389,6 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -384,9 +389,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
# in the future. # in the future.
super(FasterRCNNMetaArch, self).__init__(num_classes=num_classes) super(FasterRCNNMetaArch, self).__init__(num_classes=num_classes)
if is_training and second_stage_batch_size > first_stage_max_proposals:
raise ValueError('second_stage_batch_size should be no greater than '
'first_stage_max_proposals.')
if not isinstance(first_stage_anchor_generator, if not isinstance(first_stage_anchor_generator,
grid_anchor_generator.GridAnchorGenerator): grid_anchor_generator.GridAnchorGenerator):
raise ValueError('first_stage_anchor_generator must be of type ' raise ValueError('first_stage_anchor_generator must be of type '
...@@ -394,6 +396,7 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -394,6 +396,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
self._is_training = is_training self._is_training = is_training
self._image_resizer_fn = image_resizer_fn self._image_resizer_fn = image_resizer_fn
self._resize_masks = resize_masks
self._feature_extractor = feature_extractor self._feature_extractor = feature_extractor
self._number_of_stages = number_of_stages self._number_of_stages = number_of_stages
...@@ -425,9 +428,9 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -425,9 +428,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
min_depth=0, min_depth=0,
max_depth=0)) max_depth=0))
self._first_stage_nms_score_threshold = first_stage_nms_score_threshold self._first_stage_nms_fn = first_stage_non_max_suppression_fn
self._first_stage_nms_iou_threshold = first_stage_nms_iou_threshold
self._first_stage_max_proposals = first_stage_max_proposals self._first_stage_max_proposals = first_stage_max_proposals
self._use_static_shapes = use_static_shapes
self._first_stage_localization_loss = ( self._first_stage_localization_loss = (
losses.WeightedSmoothL1LocalizationLoss()) losses.WeightedSmoothL1LocalizationLoss())
...@@ -437,6 +440,7 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -437,6 +440,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
self._first_stage_obj_loss_weight = first_stage_objectness_loss_weight self._first_stage_obj_loss_weight = first_stage_objectness_loss_weight
# Per-region cropping parameters # Per-region cropping parameters
self._crop_and_resize_fn = crop_and_resize_fn
self._initial_crop_size = initial_crop_size self._initial_crop_size = initial_crop_size
self._maxpool_kernel_size = maxpool_kernel_size self._maxpool_kernel_size = maxpool_kernel_size
self._maxpool_stride = maxpool_stride self._maxpool_stride = maxpool_stride
...@@ -458,7 +462,6 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -458,7 +462,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
self._second_stage_cls_loss_weight = second_stage_classification_loss_weight self._second_stage_cls_loss_weight = second_stage_classification_loss_weight
self._second_stage_mask_loss_weight = ( self._second_stage_mask_loss_weight = (
second_stage_mask_prediction_loss_weight) second_stage_mask_prediction_loss_weight)
self._use_matmul_crop_and_resize = use_matmul_crop_and_resize
self._hard_example_miner = hard_example_miner self._hard_example_miner = hard_example_miner
self._parallel_iterations = parallel_iterations self._parallel_iterations = parallel_iterations
...@@ -673,9 +676,13 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -673,9 +676,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
} }
if self._number_of_stages >= 2: if self._number_of_stages >= 2:
# If mixed-precision training on TPU is enabled, rpn_box_encodings and
# rpn_objectness_predictions_with_background are bfloat16 tensors.
# Considered prediction results, they need to be casted to float32
# tensors for correct postprocess_rpn computation in predict_second_stage.
prediction_dict.update(self._predict_second_stage( prediction_dict.update(self._predict_second_stage(
rpn_box_encodings, tf.to_float(rpn_box_encodings),
rpn_objectness_predictions_with_background, tf.to_float(rpn_objectness_predictions_with_background),
rpn_features_to_crop, rpn_features_to_crop,
self._anchors.get(), image_shape, true_image_shapes)) self._anchors.get(), image_shape, true_image_shapes))
...@@ -719,7 +726,7 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -719,7 +726,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
[batch_size, num_valid_anchors, 2] containing class [batch_size, num_valid_anchors, 2] containing class
predictions (logits) for each of the anchors. Note that this predictions (logits) for each of the anchors. Note that this
tensor *includes* background class predictions (at class index 0). tensor *includes* background class predictions (at class index 0).
rpn_features_to_crop: A 4-D float32 tensor with shape rpn_features_to_crop: A 4-D float32 or bfloat16 tensor with shape
[batch_size, height, width, depth] representing image features to crop [batch_size, height, width, depth] representing image features to crop
using the proposal boxes predicted by the RPN. using the proposal boxes predicted by the RPN.
anchors: 2-D float tensor of shape anchors: 2-D float tensor of shape
...@@ -758,17 +765,22 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -758,17 +765,22 @@ class FasterRCNNMetaArch(model.DetectionModel):
boxes proposed by the RPN, thus enabling one to extract features and boxes proposed by the RPN, thus enabling one to extract features and
get box classification and prediction for externally selected areas get box classification and prediction for externally selected areas
of the image. of the image.
6) box_classifier_features: a 4-D float32 tensor representing the 6) box_classifier_features: a 4-D float32 or bfloat16 tensor
features for each proposal. representing the features for each proposal.
""" """
image_shape_2d = self._image_batch_shape_2d(image_shape) image_shape_2d = self._image_batch_shape_2d(image_shape)
proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn( proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn(
rpn_box_encodings, rpn_objectness_predictions_with_background, rpn_box_encodings, rpn_objectness_predictions_with_background,
anchors, image_shape_2d, true_image_shapes) anchors, image_shape_2d, true_image_shapes)
# If mixed-precision training on TPU is enabled, the dtype of
# rpn_features_to_crop is bfloat16, otherwise it is float32. tf.cast is
# used to match the dtype of proposal_boxes_normalized to that of
# rpn_features_to_crop for further computation.
flattened_proposal_feature_maps = ( flattened_proposal_feature_maps = (
self._compute_second_stage_input_feature_maps( self._compute_second_stage_input_feature_maps(
rpn_features_to_crop, proposal_boxes_normalized)) rpn_features_to_crop,
tf.cast(proposal_boxes_normalized, rpn_features_to_crop.dtype)))
box_classifier_features = ( box_classifier_features = (
self._feature_extractor.extract_box_classifier_features( self._feature_extractor.extract_box_classifier_features(
...@@ -956,8 +968,11 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -956,8 +968,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
image_shape: A 1-D tensor representing the input image shape. image_shape: A 1-D tensor representing the input image shape.
""" """
image_shape = tf.shape(preprocessed_inputs) image_shape = tf.shape(preprocessed_inputs)
rpn_features_to_crop, _ = self._feature_extractor.extract_proposal_features(
preprocessed_inputs, scope=self.first_stage_feature_extractor_scope) rpn_features_to_crop, self.endpoints = (
self._feature_extractor.extract_proposal_features(
preprocessed_inputs,
scope=self.first_stage_feature_extractor_scope))
feature_map_shape = tf.shape(rpn_features_to_crop) feature_map_shape = tf.shape(rpn_features_to_crop)
anchors = box_list_ops.concatenate( anchors = box_list_ops.concatenate(
...@@ -965,12 +980,15 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -965,12 +980,15 @@ class FasterRCNNMetaArch(model.DetectionModel):
feature_map_shape[2])])) feature_map_shape[2])]))
with slim.arg_scope(self._first_stage_box_predictor_arg_scope_fn()): with slim.arg_scope(self._first_stage_box_predictor_arg_scope_fn()):
kernel_size = self._first_stage_box_predictor_kernel_size kernel_size = self._first_stage_box_predictor_kernel_size
reuse = tf.get_variable_scope().reuse
rpn_box_predictor_features = slim.conv2d( rpn_box_predictor_features = slim.conv2d(
rpn_features_to_crop, rpn_features_to_crop,
self._first_stage_box_predictor_depth, self._first_stage_box_predictor_depth,
kernel_size=[kernel_size, kernel_size], kernel_size=[kernel_size, kernel_size],
rate=self._first_stage_atrous_rate, rate=self._first_stage_atrous_rate,
activation_fn=tf.nn.relu6) activation_fn=tf.nn.relu6,
scope='Conv',
reuse=reuse)
return (rpn_box_predictor_features, rpn_features_to_crop, return (rpn_box_predictor_features, rpn_features_to_crop,
anchors, image_shape) anchors, image_shape)
...@@ -1223,14 +1241,9 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1223,14 +1241,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
rpn_objectness_predictions_with_background_batch)[:, :, 1] rpn_objectness_predictions_with_background_batch)[:, :, 1]
clip_window = self._compute_clip_window(image_shapes) clip_window = self._compute_clip_window(image_shapes)
(proposal_boxes, proposal_scores, _, _, _, (proposal_boxes, proposal_scores, _, _, _,
num_proposals) = post_processing.batch_multiclass_non_max_suppression( num_proposals) = self._first_stage_nms_fn(
tf.expand_dims(proposal_boxes, axis=2), tf.expand_dims(proposal_boxes, axis=2),
tf.expand_dims(rpn_objectness_softmax_without_background, tf.expand_dims(rpn_objectness_softmax_without_background, axis=2),
axis=2),
self._first_stage_nms_score_threshold,
self._first_stage_nms_iou_threshold,
self._first_stage_max_proposals,
self._first_stage_max_proposals,
clip_window=clip_window) clip_window=clip_window)
if self._is_training: if self._is_training:
proposal_boxes = tf.stop_gradient(proposal_boxes) proposal_boxes = tf.stop_gradient(proposal_boxes)
...@@ -1377,16 +1390,19 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1377,16 +1390,19 @@ class FasterRCNNMetaArch(model.DetectionModel):
groundtruth_masks_list = self._groundtruth_lists.get( groundtruth_masks_list = self._groundtruth_lists.get(
fields.BoxListFields.masks) fields.BoxListFields.masks)
if groundtruth_masks_list is not None: # TODO(rathodv): Remove mask resizing once the legacy pipeline is deleted.
if groundtruth_masks_list is not None and self._resize_masks:
resized_masks_list = [] resized_masks_list = []
for mask in groundtruth_masks_list: for mask in groundtruth_masks_list:
_, resized_mask, _ = self._image_resizer_fn( _, resized_mask, _ = self._image_resizer_fn(
# Reuse the given `image_resizer_fn` to resize groundtruth masks. # Reuse the given `image_resizer_fn` to resize groundtruth masks.
# `mask` tensor for an image is of the shape [num_masks, # `mask` tensor for an image is of the shape [num_masks,
# image_height, image_width]. Below we create a dummy image of the # image_height, image_width]. Below we create a dummy image of the
# the shape [image_height, image_width, 1] to use with # the shape [image_height, image_width, 1] to use with
# `image_resizer_fn`. # `image_resizer_fn`.
image=tf.zeros(tf.stack([tf.shape(mask)[1], tf.shape(mask)[2], 1])), image=tf.zeros(tf.stack([tf.shape(mask)[1],
tf.shape(mask)[2], 1])),
masks=mask) masks=mask)
resized_masks_list.append(resized_mask) resized_masks_list.append(resized_mask)
...@@ -1443,11 +1459,16 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1443,11 +1459,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
tf.range(proposal_boxlist.num_boxes()) < num_valid_proposals, tf.range(proposal_boxlist.num_boxes()) < num_valid_proposals,
cls_weights > 0 cls_weights > 0
) )
sampled_indices = self._second_stage_sampler.subsample( selected_positions = self._second_stage_sampler.subsample(
valid_indicator, valid_indicator,
self._second_stage_batch_size, self._second_stage_batch_size,
positive_indicator) positive_indicator)
return box_list_ops.boolean_mask(proposal_boxlist, sampled_indices) return box_list_ops.boolean_mask(
proposal_boxlist,
selected_positions,
use_static_shapes=self._use_static_shapes,
indicator_sum=(self._second_stage_batch_size
if self._use_static_shapes else None))
def _compute_second_stage_input_feature_maps(self, features_to_crop, def _compute_second_stage_input_feature_maps(self, features_to_crop,
proposal_boxes_normalized): proposal_boxes_normalized):
...@@ -1467,35 +1488,10 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1467,35 +1488,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns: Returns:
A float32 tensor with shape [K, new_height, new_width, depth]. A float32 tensor with shape [K, new_height, new_width, depth].
""" """
def get_box_inds(proposals): cropped_regions = self._flatten_first_two_dimensions(
proposals_shape = proposals.get_shape().as_list() self._crop_and_resize_fn(
if any(dim is None for dim in proposals_shape): features_to_crop, proposal_boxes_normalized,
proposals_shape = tf.shape(proposals) [self._initial_crop_size, self._initial_crop_size]))
ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
multiplier = tf.expand_dims(
tf.range(start=0, limit=proposals_shape[0]), 1)
return tf.reshape(ones_mat * multiplier, [-1])
if self._use_matmul_crop_and_resize:
def _single_image_crop_and_resize(inputs):
single_image_features_to_crop, proposal_boxes_normalized = inputs
return ops.matmul_crop_and_resize(
tf.expand_dims(single_image_features_to_crop, 0),
proposal_boxes_normalized,
[self._initial_crop_size, self._initial_crop_size])
cropped_regions = self._flatten_first_two_dimensions(
shape_utils.static_or_dynamic_map_fn(
_single_image_crop_and_resize,
elems=[features_to_crop, proposal_boxes_normalized],
dtype=tf.float32,
parallel_iterations=self._parallel_iterations))
else:
cropped_regions = tf.image.crop_and_resize(
features_to_crop,
self._flatten_first_two_dimensions(proposal_boxes_normalized),
get_box_inds(proposal_boxes_normalized),
(self._initial_crop_size, self._initial_crop_size))
return slim.max_pool2d( return slim.max_pool2d(
cropped_regions, cropped_regions,
[self._maxpool_kernel_size, self._maxpool_kernel_size], [self._maxpool_kernel_size, self._maxpool_kernel_size],
...@@ -1738,11 +1734,17 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1738,11 +1734,17 @@ class FasterRCNNMetaArch(model.DetectionModel):
sampled_reg_indices = tf.multiply(batch_sampled_indices, sampled_reg_indices = tf.multiply(batch_sampled_indices,
batch_reg_weights) batch_reg_weights)
losses_mask = None
if self.groundtruth_has_field(fields.InputDataFields.is_annotated):
losses_mask = tf.stack(self.groundtruth_lists(
fields.InputDataFields.is_annotated))
localization_losses = self._first_stage_localization_loss( localization_losses = self._first_stage_localization_loss(
rpn_box_encodings, batch_reg_targets, weights=sampled_reg_indices) rpn_box_encodings, batch_reg_targets, weights=sampled_reg_indices,
losses_mask=losses_mask)
objectness_losses = self._first_stage_objectness_loss( objectness_losses = self._first_stage_objectness_loss(
rpn_objectness_predictions_with_background, rpn_objectness_predictions_with_background,
batch_one_hot_targets, weights=batch_sampled_indices) batch_one_hot_targets, weights=batch_sampled_indices,
losses_mask=losses_mask)
localization_loss = tf.reduce_mean( localization_loss = tf.reduce_mean(
tf.reduce_sum(localization_losses, axis=1) / normalizer) tf.reduce_sum(localization_losses, axis=1) / normalizer)
objectness_loss = tf.reduce_mean( objectness_loss = tf.reduce_mean(
...@@ -1866,32 +1868,32 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1866,32 +1868,32 @@ class FasterRCNNMetaArch(model.DetectionModel):
# for just one class to avoid over-counting for regression loss and # for just one class to avoid over-counting for regression loss and
# (optionally) mask loss. # (optionally) mask loss.
else: else:
# We only predict refined location encodings for the non background reshaped_refined_box_encodings = (
# classes, but we now pad it to make it compatible with the class self._get_refined_encodings_for_postitive_class(
# predictions refined_box_encodings,
refined_box_encodings_with_background = tf.pad( one_hot_flat_cls_targets_with_background, batch_size))
refined_box_encodings, [[0, 0], [1, 0], [0, 0]])
refined_box_encodings_masked_by_class_targets = tf.boolean_mask( losses_mask = None
refined_box_encodings_with_background, if self.groundtruth_has_field(fields.InputDataFields.is_annotated):
tf.greater(one_hot_flat_cls_targets_with_background, 0)) losses_mask = tf.stack(self.groundtruth_lists(
reshaped_refined_box_encodings = tf.reshape( fields.InputDataFields.is_annotated))
refined_box_encodings_masked_by_class_targets,
[batch_size, self.max_num_proposals, self._box_coder.code_size])
second_stage_loc_losses = self._second_stage_localization_loss( second_stage_loc_losses = self._second_stage_localization_loss(
reshaped_refined_box_encodings, reshaped_refined_box_encodings,
batch_reg_targets, weights=batch_reg_weights) / normalizer batch_reg_targets,
weights=batch_reg_weights,
losses_mask=losses_mask) / normalizer
second_stage_cls_losses = ops.reduce_sum_trailing_dimensions( second_stage_cls_losses = ops.reduce_sum_trailing_dimensions(
self._second_stage_classification_loss( self._second_stage_classification_loss(
class_predictions_with_background, class_predictions_with_background,
batch_cls_targets_with_background, batch_cls_targets_with_background,
weights=batch_cls_weights), weights=batch_cls_weights,
losses_mask=losses_mask),
ndims=2) / normalizer ndims=2) / normalizer
second_stage_loc_loss = tf.reduce_sum( second_stage_loc_loss = tf.reduce_sum(
tf.boolean_mask(second_stage_loc_losses, paddings_indicator)) second_stage_loc_losses * tf.to_float(paddings_indicator))
second_stage_cls_loss = tf.reduce_sum( second_stage_cls_loss = tf.reduce_sum(
tf.boolean_mask(second_stage_cls_losses, paddings_indicator)) second_stage_cls_losses * tf.to_float(paddings_indicator))
if self._hard_example_miner: if self._hard_example_miner:
(second_stage_loc_loss, second_stage_cls_loss (second_stage_loc_loss, second_stage_cls_loss
...@@ -1954,10 +1956,9 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1954,10 +1956,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
box_list.BoxList(tf.reshape(proposal_boxes, [-1, 4])), box_list.BoxList(tf.reshape(proposal_boxes, [-1, 4])),
image_shape[1], image_shape[2]).get() image_shape[1], image_shape[2]).get()
flat_cropped_gt_mask = tf.image.crop_and_resize( flat_cropped_gt_mask = self._crop_and_resize_fn(
tf.expand_dims(flat_gt_masks, -1), tf.expand_dims(flat_gt_masks, -1),
flat_normalized_proposals, tf.expand_dims(flat_normalized_proposals, axis=1),
tf.range(flat_normalized_proposals.shape[0].value),
[mask_height, mask_width]) [mask_height, mask_width])
batch_cropped_gt_mask = tf.reshape( batch_cropped_gt_mask = tf.reshape(
...@@ -1968,14 +1969,16 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1968,14 +1969,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
self._second_stage_mask_loss( self._second_stage_mask_loss(
reshaped_prediction_masks, reshaped_prediction_masks,
batch_cropped_gt_mask, batch_cropped_gt_mask,
weights=batch_mask_target_weights), weights=batch_mask_target_weights,
losses_mask=losses_mask),
ndims=2) / ( ndims=2) / (
mask_height * mask_width * tf.maximum( mask_height * mask_width * tf.maximum(
tf.reduce_sum( tf.reduce_sum(
batch_mask_target_weights, axis=1, keep_dims=True batch_mask_target_weights, axis=1, keep_dims=True
), tf.ones((batch_size, 1)))) ), tf.ones((batch_size, 1))))
second_stage_mask_loss = tf.reduce_sum( second_stage_mask_loss = tf.reduce_sum(
tf.boolean_mask(second_stage_mask_losses, paddings_indicator)) tf.where(paddings_indicator, second_stage_mask_losses,
tf.zeros_like(second_stage_mask_losses)))
if second_stage_mask_loss is not None: if second_stage_mask_loss is not None:
mask_loss = tf.multiply(self._second_stage_mask_loss_weight, mask_loss = tf.multiply(self._second_stage_mask_loss_weight,
...@@ -1983,6 +1986,29 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1983,6 +1986,29 @@ class FasterRCNNMetaArch(model.DetectionModel):
loss_dict[mask_loss.op.name] = mask_loss loss_dict[mask_loss.op.name] = mask_loss
return loss_dict return loss_dict
def _get_refined_encodings_for_postitive_class(
self, refined_box_encodings, flat_cls_targets_with_background,
batch_size):
# We only predict refined location encodings for the non background
# classes, but we now pad it to make it compatible with the class
# predictions
refined_box_encodings_with_background = tf.pad(refined_box_encodings,
[[0, 0], [1, 0], [0, 0]])
refined_box_encodings_masked_by_class_targets = (
box_list_ops.boolean_mask(
box_list.BoxList(
tf.reshape(refined_box_encodings_with_background,
[-1, self._box_coder.code_size])),
tf.reshape(tf.greater(flat_cls_targets_with_background, 0), [-1]),
use_static_shapes=self._use_static_shapes,
indicator_sum=batch_size * self.max_num_proposals
if self._use_static_shapes else None).get())
return tf.reshape(
refined_box_encodings_masked_by_class_targets, [
batch_size, self.max_num_proposals,
self._box_coder.code_size
])
def _padded_batched_proposals_indicator(self, def _padded_batched_proposals_indicator(self,
num_proposals, num_proposals,
max_num_proposals): max_num_proposals):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment