Unverified Commit 3f78f4cf authored by derekjchow's avatar derekjchow Committed by GitHub
Browse files

Merge pull request #3494 from pkulzc/master

Update object detection with internal changes and remove unused BUILD files.
parents 73748d01 0319908c
...@@ -187,7 +187,7 @@ class OptimizerBuilderTest(tf.test.TestCase): ...@@ -187,7 +187,7 @@ class OptimizerBuilderTest(tf.test.TestCase):
optimizer, _ = optimizer_builder.build(optimizer_proto) optimizer, _ = optimizer_builder.build(optimizer_proto)
self.assertTrue( self.assertTrue(
isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer)) isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer))
# TODO: Find a way to not depend on the private members. # TODO(rathodv): Find a way to not depend on the private members.
self.assertAlmostEqual(optimizer._ema._decay, 0.2) self.assertAlmostEqual(optimizer._ema._decay, 0.2)
def testBuildEmptyOptimizer(self): def testBuildEmptyOptimizer(self):
......
...@@ -256,12 +256,10 @@ def build(preprocessor_step_config): ...@@ -256,12 +256,10 @@ def build(preprocessor_step_config):
area_range = [(op.min_area, op.max_area) for op in config.operations] area_range = [(op.min_area, op.max_area) for op in config.operations]
overlap_thresh = [op.overlap_thresh for op in config.operations] overlap_thresh = [op.overlap_thresh for op in config.operations]
random_coef = [op.random_coef for op in config.operations] random_coef = [op.random_coef for op in config.operations]
min_padded_size_ratio = [ min_padded_size_ratio = [tuple(op.min_padded_size_ratio)
(op.min_padded_size_ratio[0], op.min_padded_size_ratio[1]) for op in config.operations]
for op in config.operations] max_padded_size_ratio = [tuple(op.max_padded_size_ratio)
max_padded_size_ratio = [ for op in config.operations]
(op.max_padded_size_ratio[0], op.max_padded_size_ratio[1])
for op in config.operations]
pad_color = [(op.pad_color_r, op.pad_color_g, op.pad_color_b) pad_color = [(op.pad_color_r, op.pad_color_g, op.pad_color_b)
for op in config.operations] for op in config.operations]
return (preprocessor.ssd_random_crop_pad, return (preprocessor.ssd_random_crop_pad,
...@@ -296,30 +294,29 @@ def build(preprocessor_step_config): ...@@ -296,30 +294,29 @@ def build(preprocessor_step_config):
if step_type == 'ssd_random_crop_pad_fixed_aspect_ratio': if step_type == 'ssd_random_crop_pad_fixed_aspect_ratio':
config = preprocessor_step_config.ssd_random_crop_pad_fixed_aspect_ratio config = preprocessor_step_config.ssd_random_crop_pad_fixed_aspect_ratio
kwargs = {}
aspect_ratio = config.aspect_ratio
if aspect_ratio:
kwargs['aspect_ratio'] = aspect_ratio
min_padded_size_ratio = config.min_padded_size_ratio
if min_padded_size_ratio:
if len(min_padded_size_ratio) != 2:
raise ValueError('min_padded_size_ratio should have 2 elements if set!')
kwargs['min_padded_size_ratio'] = tuple(min_padded_size_ratio)
max_padded_size_ratio = config.max_padded_size_ratio
if max_padded_size_ratio:
if len(max_padded_size_ratio) != 2:
raise ValueError('max_padded_size_ratio should have 2 elements if set!')
kwargs['max_padded_size_ratio'] = tuple(max_padded_size_ratio)
if config.operations: if config.operations:
min_object_covered = [op.min_object_covered for op in config.operations] kwargs['min_object_covered'] = [op.min_object_covered
aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio) for op in config.operations]
for op in config.operations] kwargs['aspect_ratio_range'] = [(op.min_aspect_ratio, op.max_aspect_ratio)
area_range = [(op.min_area, op.max_area) for op in config.operations] for op in config.operations]
overlap_thresh = [op.overlap_thresh for op in config.operations] kwargs['area_range'] = [(op.min_area, op.max_area)
random_coef = [op.random_coef for op in config.operations] for op in config.operations]
min_padded_size_ratio = [ kwargs['overlap_thresh'] = [op.overlap_thresh for op in config.operations]
(op.min_padded_size_ratio[0], op.min_padded_size_ratio[1]) kwargs['random_coef'] = [op.random_coef for op in config.operations]
for op in config.operations] return (preprocessor.ssd_random_crop_pad_fixed_aspect_ratio, kwargs)
max_padded_size_ratio = [
(op.max_padded_size_ratio[0], op.max_padded_size_ratio[1])
for op in config.operations]
return (preprocessor.ssd_random_crop_pad_fixed_aspect_ratio,
{
'min_object_covered': min_object_covered,
'aspect_ratio': config.aspect_ratio,
'aspect_ratio_range': aspect_ratio_range,
'area_range': area_range,
'overlap_thresh': overlap_thresh,
'random_coef': random_coef,
'min_padded_size_ratio': min_padded_size_ratio,
'max_padded_size_ratio': max_padded_size_ratio,
})
return (preprocessor.ssd_random_crop_pad_fixed_aspect_ratio, {})
raise ValueError('Unknown preprocessing step.') raise ValueError('Unknown preprocessing step.')
...@@ -532,8 +532,6 @@ class PreprocessorBuilderTest(tf.test.TestCase): ...@@ -532,8 +532,6 @@ class PreprocessorBuilderTest(tf.test.TestCase):
max_area: 1.0 max_area: 1.0
overlap_thresh: 0.0 overlap_thresh: 0.0
random_coef: 0.375 random_coef: 0.375
min_padded_size_ratio: [1.0, 1.0]
max_padded_size_ratio: [2.0, 2.0]
} }
operations { operations {
min_object_covered: 0.25 min_object_covered: 0.25
...@@ -543,10 +541,10 @@ class PreprocessorBuilderTest(tf.test.TestCase): ...@@ -543,10 +541,10 @@ class PreprocessorBuilderTest(tf.test.TestCase):
max_area: 1.0 max_area: 1.0
overlap_thresh: 0.25 overlap_thresh: 0.25
random_coef: 0.375 random_coef: 0.375
min_padded_size_ratio: [1.0, 1.0]
max_padded_size_ratio: [2.0, 2.0]
} }
aspect_ratio: 0.875 aspect_ratio: 0.875
min_padded_size_ratio: [1.0, 1.0]
max_padded_size_ratio: [2.0, 2.0]
} }
""" """
preprocessor_proto = preprocessor_pb2.PreprocessingStep() preprocessor_proto = preprocessor_pb2.PreprocessingStep()
...@@ -560,8 +558,8 @@ class PreprocessorBuilderTest(tf.test.TestCase): ...@@ -560,8 +558,8 @@ class PreprocessorBuilderTest(tf.test.TestCase):
'area_range': [(0.5, 1.0), (0.5, 1.0)], 'area_range': [(0.5, 1.0), (0.5, 1.0)],
'overlap_thresh': [0.0, 0.25], 'overlap_thresh': [0.0, 0.25],
'random_coef': [0.375, 0.375], 'random_coef': [0.375, 0.375],
'min_padded_size_ratio': [(1.0, 1.0), (1.0, 1.0)], 'min_padded_size_ratio': (1.0, 1.0),
'max_padded_size_ratio': [(2.0, 2.0), (2.0, 2.0)]}) 'max_padded_size_ratio': (2.0, 2.0)})
if __name__ == '__main__': if __name__ == '__main__':
......
# Tensorflow Object Detection API: Core.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "batcher",
srcs = ["batcher.py"],
deps = [
":prefetcher",
":preprocessor",
":standard_fields",
"//tensorflow",
],
)
py_test(
name = "batcher_test",
srcs = ["batcher_test.py"],
deps = [
":batcher",
"//tensorflow",
],
)
py_library(
name = "box_list",
srcs = [
"box_list.py",
],
deps = [
"//tensorflow",
],
)
py_test(
name = "box_list_test",
srcs = ["box_list_test.py"],
deps = [
":box_list",
],
)
py_library(
name = "box_list_ops",
srcs = [
"box_list_ops.py",
],
deps = [
":box_list",
"//tensorflow",
"//tensorflow/models/research/object_detection/utils:shape_utils",
],
)
py_test(
name = "box_list_ops_test",
srcs = ["box_list_ops_test.py"],
deps = [
":box_list",
":box_list_ops",
],
)
py_library(
name = "box_coder",
srcs = [
"box_coder.py",
],
deps = [
"//tensorflow",
],
)
py_test(
name = "box_coder_test",
srcs = [
"box_coder_test.py",
],
deps = [
":box_coder",
":box_list",
"//tensorflow",
],
)
py_library(
name = "keypoint_ops",
srcs = [
"keypoint_ops.py",
],
deps = [
"//tensorflow",
],
)
py_test(
name = "keypoint_ops_test",
srcs = ["keypoint_ops_test.py"],
deps = [
":keypoint_ops",
],
)
py_library(
name = "losses",
srcs = ["losses.py"],
deps = [
":box_list",
":box_list_ops",
"//tensorflow",
"//tensorflow/models/research/object_detection/utils:ops",
],
)
py_library(
name = "matcher",
srcs = [
"matcher.py",
],
deps = [
"//tensorflow/models/research/object_detection/utils:ops",
],
)
py_library(
name = "model",
srcs = ["model.py"],
deps = [
":standard_fields",
],
)
py_test(
name = "matcher_test",
srcs = [
"matcher_test.py",
],
deps = [
":matcher",
"//tensorflow",
],
)
py_library(
name = "prefetcher",
srcs = ["prefetcher.py"],
deps = ["//tensorflow"],
)
py_library(
name = "preprocessor",
srcs = [
"preprocessor.py",
],
deps = [
":box_list",
":box_list_ops",
":keypoint_ops",
":preprocessor_cache",
":standard_fields",
"//tensorflow",
"//tensorflow/models/research/object_detection/utils:shape_utils",
],
)
py_library(
name = "preprocessor_cache",
srcs = [
"preprocessor_cache.py",
],
)
py_test(
name = "preprocessor_test",
srcs = [
"preprocessor_test.py",
],
deps = [
":preprocessor",
":preprocessor_cache",
"//tensorflow",
],
)
py_test(
name = "losses_test",
srcs = ["losses_test.py"],
deps = [
":box_list",
":losses",
":matcher",
"//tensorflow",
],
)
py_test(
name = "prefetcher_test",
srcs = ["prefetcher_test.py"],
deps = [
":prefetcher",
"//tensorflow",
],
)
py_library(
name = "standard_fields",
srcs = [
"standard_fields.py",
],
)
py_library(
name = "post_processing",
srcs = ["post_processing.py"],
deps = [
":box_list",
":box_list_ops",
":standard_fields",
"//tensorflow",
"//tensorflow/models/research/object_detection/utils:shape_utils",
],
)
py_test(
name = "post_processing_test",
srcs = ["post_processing_test.py"],
deps = [
":box_list",
":box_list_ops",
":post_processing",
"//tensorflow",
],
)
py_library(
name = "target_assigner",
srcs = [
"target_assigner.py",
],
deps = [
":box_list",
":matcher",
":region_similarity_calculator",
":standard_fields",
"//tensorflow",
"//tensorflow/models/research/object_detection/box_coders:faster_rcnn_box_coder",
"//tensorflow/models/research/object_detection/box_coders:mean_stddev_box_coder",
"//tensorflow/models/research/object_detection/core:box_coder",
"//tensorflow/models/research/object_detection/matchers:argmax_matcher",
"//tensorflow/models/research/object_detection/matchers:bipartite_matcher",
"//tensorflow/models/research/object_detection/utils:shape_utils",
],
)
py_test(
name = "target_assigner_test",
size = "large",
timeout = "long",
srcs = ["target_assigner_test.py"],
deps = [
":box_list",
":region_similarity_calculator",
":target_assigner",
"//tensorflow",
"//tensorflow/models/research/object_detection/box_coders:keypoint_box_coder",
"//tensorflow/models/research/object_detection/box_coders:mean_stddev_box_coder",
"//tensorflow/models/research/object_detection/matchers:bipartite_matcher",
"//tensorflow/models/research/object_detection/utils:test_case",
],
)
py_library(
name = "data_decoder",
srcs = ["data_decoder.py"],
)
py_library(
name = "data_parser",
srcs = ["data_parser.py"],
)
py_library(
name = "box_predictor",
srcs = ["box_predictor.py"],
deps = [
"//tensorflow",
"//tensorflow/models/research/object_detection/utils:ops",
"//tensorflow/models/research/object_detection/utils:shape_utils",
"//tensorflow/models/research/object_detection/utils:static_shape",
],
)
py_test(
name = "box_predictor_test",
srcs = ["box_predictor_test.py"],
deps = [
":box_predictor",
"//tensorflow",
"//tensorflow/models/research/object_detection/builders:hyperparams_builder",
"//tensorflow/models/research/object_detection/protos:hyperparams_py_pb2",
"//tensorflow/models/research/object_detection/utils:test_case",
],
)
py_library(
name = "region_similarity_calculator",
srcs = [
"region_similarity_calculator.py",
],
deps = [
"//tensorflow",
"//tensorflow/models/research/object_detection/core:box_list_ops",
],
)
py_test(
name = "region_similarity_calculator_test",
srcs = [
"region_similarity_calculator_test.py",
],
deps = [
":region_similarity_calculator",
"//tensorflow/models/research/object_detection/core:box_list",
],
)
py_library(
name = "anchor_generator",
srcs = [
"anchor_generator.py",
],
deps = [
"//tensorflow",
],
)
py_library(
name = "minibatch_sampler",
srcs = [
"minibatch_sampler.py",
],
deps = [
"//tensorflow",
"//tensorflow/models/research/object_detection/utils:ops",
],
)
py_test(
name = "minibatch_sampler_test",
srcs = [
"minibatch_sampler_test.py",
],
deps = [
":minibatch_sampler",
"//tensorflow",
],
)
py_library(
name = "balanced_positive_negative_sampler",
srcs = [
"balanced_positive_negative_sampler.py",
],
deps = [
":minibatch_sampler",
"//tensorflow",
],
)
py_test(
name = "balanced_positive_negative_sampler_test",
srcs = [
"balanced_positive_negative_sampler_test.py",
],
deps = [
":balanced_positive_negative_sampler",
"//tensorflow",
],
)
...@@ -77,7 +77,7 @@ class AnchorGenerator(object): ...@@ -77,7 +77,7 @@ class AnchorGenerator(object):
def generate(self, feature_map_shape_list, **params): def generate(self, feature_map_shape_list, **params):
"""Generates a collection of bounding boxes to be used as anchors. """Generates a collection of bounding boxes to be used as anchors.
TODO: remove **params from argument list and make stride and TODO(rathodv): remove **params from argument list and make stride and
offsets (for multiple_grid_anchor_generator) constructor arguments. offsets (for multiple_grid_anchor_generator) constructor arguments.
Args: Args:
...@@ -88,7 +88,9 @@ class AnchorGenerator(object): ...@@ -88,7 +88,9 @@ class AnchorGenerator(object):
**params: parameters for anchor generation op **params: parameters for anchor generation op
Returns: Returns:
boxes: a BoxList holding a collection of N anchor boxes boxes_list: a list of BoxLists each holding anchor boxes corresponding to
the input feature map shapes.
Raises: Raises:
ValueError: if the number of feature map shapes does not match the length ValueError: if the number of feature map shapes does not match the length
of NumAnchorsPerLocation. of NumAnchorsPerLocation.
...@@ -98,13 +100,14 @@ class AnchorGenerator(object): ...@@ -98,13 +100,14 @@ class AnchorGenerator(object):
raise ValueError('Number of feature maps is expected to equal the length ' raise ValueError('Number of feature maps is expected to equal the length '
'of `num_anchors_per_location`.') 'of `num_anchors_per_location`.')
with tf.name_scope(self.name_scope()): with tf.name_scope(self.name_scope()):
anchors = self._generate(feature_map_shape_list, **params) anchors_list = self._generate(feature_map_shape_list, **params)
if self.check_num_anchors: if self.check_num_anchors:
with tf.control_dependencies([ with tf.control_dependencies([
self._assert_correct_number_of_anchors( self._assert_correct_number_of_anchors(
anchors, feature_map_shape_list)]): anchors_list, feature_map_shape_list)]):
anchors.set(tf.identity(anchors.get())) for item in anchors_list:
return anchors item.set(tf.identity(item.get()))
return anchors_list
@abstractmethod @abstractmethod
def _generate(self, feature_map_shape_list, **params): def _generate(self, feature_map_shape_list, **params):
...@@ -117,15 +120,17 @@ class AnchorGenerator(object): ...@@ -117,15 +120,17 @@ class AnchorGenerator(object):
**params: parameters for anchor generation op **params: parameters for anchor generation op
Returns: Returns:
boxes: a BoxList holding a collection of N anchor boxes boxes_list: a list of BoxList, each holding a collection of N anchor
boxes.
""" """
pass pass
def _assert_correct_number_of_anchors(self, anchors, feature_map_shape_list): def _assert_correct_number_of_anchors(self, anchors_list,
feature_map_shape_list):
"""Assert that correct number of anchors was generated. """Assert that correct number of anchors was generated.
Args: Args:
anchors: box_list.BoxList object holding anchors generated anchors_list: A list of box_list.BoxList object holding anchors generated.
feature_map_shape_list: list of (height, width) pairs in the format feature_map_shape_list: list of (height, width) pairs in the format
[(height_0, width_0), (height_1, width_1), ...] that the generated [(height_0, width_0), (height_1, width_1), ...] that the generated
anchors must align with. anchors must align with.
...@@ -134,10 +139,12 @@ class AnchorGenerator(object): ...@@ -134,10 +139,12 @@ class AnchorGenerator(object):
match the number of expected anchors. match the number of expected anchors.
""" """
expected_num_anchors = 0 expected_num_anchors = 0
for num_anchors_per_location, feature_map_shape in zip( actual_num_anchors = 0
self.num_anchors_per_location(), feature_map_shape_list): for num_anchors_per_location, feature_map_shape, anchors in zip(
self.num_anchors_per_location(), feature_map_shape_list, anchors_list):
expected_num_anchors += (num_anchors_per_location expected_num_anchors += (num_anchors_per_location
* feature_map_shape[0] * feature_map_shape[0]
* feature_map_shape[1]) * feature_map_shape[1])
return tf.assert_equal(expected_num_anchors, anchors.num_boxes()) actual_num_anchors += anchors.num_boxes()
return tf.assert_equal(expected_num_anchors, actual_num_anchors)
...@@ -585,7 +585,7 @@ def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None): ...@@ -585,7 +585,7 @@ def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None):
['Incorrect field size: actual vs expected.', num_entries, num_boxes]) ['Incorrect field size: actual vs expected.', num_entries, num_boxes])
with tf.control_dependencies([length_assert]): with tf.control_dependencies([length_assert]):
# TODO: Remove with tf.device when top_k operation runs # TODO(derekjchow): Remove with tf.device when top_k operation runs
# correctly on GPU. # correctly on GPU.
with tf.device('/cpu:0'): with tf.device('/cpu:0'):
_, sorted_indices = tf.nn.top_k(field_to_sort, num_boxes, sorted=True) _, sorted_indices = tf.nn.top_k(field_to_sort, num_boxes, sorted=True)
...@@ -657,7 +657,7 @@ def filter_greater_than(boxlist, thresh, scope=None): ...@@ -657,7 +657,7 @@ def filter_greater_than(boxlist, thresh, scope=None):
This op keeps the collection of boxes whose corresponding scores are This op keeps the collection of boxes whose corresponding scores are
greater than the input threshold. greater than the input threshold.
TODO: Change function name to filter_scores_greater_than TODO(jonathanhuang): Change function name to filter_scores_greater_than
Args: Args:
boxlist: BoxList holding N boxes. Must contain a 'scores' field boxlist: BoxList holding N boxes. Must contain a 'scores' field
...@@ -937,7 +937,7 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5): ...@@ -937,7 +937,7 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
iou_ = iou(selected_boxes, pool_boxes) iou_ = iou(selected_boxes, pool_boxes)
match_indicator = tf.to_float(tf.greater(iou_, iou_thresh)) match_indicator = tf.to_float(tf.greater(iou_, iou_thresh))
num_matches = tf.reduce_sum(match_indicator, 1) num_matches = tf.reduce_sum(match_indicator, 1)
# TODO: Handle the case where some boxes in selected_boxes do not # TODO(kbanoop): Handle the case where some boxes in selected_boxes do not
# match to any boxes in pool_boxes. For such boxes without any matches, we # match to any boxes in pool_boxes. For such boxes without any matches, we
# should return the original boxes without voting. # should return the original boxes without voting.
match_assert = tf.Assert( match_assert = tf.Assert(
......
...@@ -738,12 +738,9 @@ class NonMaxSuppressionTest(tf.test.TestCase): ...@@ -738,12 +738,9 @@ class NonMaxSuppressionTest(tf.test.TestCase):
boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5])) boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5]))
iou_thresh = .5 iou_thresh = .5
max_output_size = 3 max_output_size = 3
nms = box_list_ops.non_max_suppression( with self.assertRaisesWithPredicateMatch(ValueError,
boxes, iou_thresh, max_output_size) 'Dimensions must be equal'):
with self.test_session() as sess: box_list_ops.non_max_suppression(boxes, iou_thresh, max_output_size)
with self.assertRaisesWithPredicateMatch(
errors.InvalidArgumentError, 'scores has incompatible shape'):
sess.run(nms.get())
def test_select_from_three_clusters(self): def test_select_from_three_clusters(self):
corners = tf.constant([[0, 0, 1, 1], corners = tf.constant([[0, 0, 1, 1],
......
...@@ -64,11 +64,9 @@ class BoxPredictor(object): ...@@ -64,11 +64,9 @@ class BoxPredictor(object):
scope=None, **params): scope=None, **params):
"""Computes encoded object locations and corresponding confidences. """Computes encoded object locations and corresponding confidences.
Takes a high level image feature map as input and produce two predictions, Takes a list of high level image feature maps as input and produces a list
(1) a tensor encoding box locations, and of box encodings and a list of class scores where each element in the output
(2) a tensor encoding class scores for each corresponding box. lists correspond to the feature maps in the input list.
In this interface, we only assume that two tensors are returned as output
and do not assume anything about their shapes.
Args: Args:
image_features: A list of float tensors of shape [batch_size, height_i, image_features: A list of float tensors of shape [batch_size, height_i,
...@@ -81,12 +79,14 @@ class BoxPredictor(object): ...@@ -81,12 +79,14 @@ class BoxPredictor(object):
Returns: Returns:
A dictionary containing at least the following tensors. A dictionary containing at least the following tensors.
box_encodings: A float tensor of shape box_encodings: A list of float tensors of shape
[batch_size, num_anchors, q, code_size] representing the location of [batch_size, num_anchors_i, q, code_size] representing the location of
the objects, where q is 1 or the number of classes. the objects, where q is 1 or the number of classes. Each entry in the
class_predictions_with_background: A float tensor of shape list corresponds to a feature map in the input `image_features` list.
[batch_size, num_anchors, num_classes + 1] representing the class class_predictions_with_background: A list of float tensors of shape
predictions for the proposals. [batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
Raises: Raises:
ValueError: If length of `image_features` is not equal to length of ValueError: If length of `image_features` is not equal to length of
...@@ -104,7 +104,7 @@ class BoxPredictor(object): ...@@ -104,7 +104,7 @@ class BoxPredictor(object):
return self._predict(image_features, num_predictions_per_location, return self._predict(image_features, num_predictions_per_location,
**params) **params)
# TODO: num_predictions_per_location could be moved to constructor. # TODO(rathodv): num_predictions_per_location could be moved to constructor.
# This is currently only used by ConvolutionalBoxPredictor. # This is currently only used by ConvolutionalBoxPredictor.
@abstractmethod @abstractmethod
def _predict(self, image_features, num_predictions_per_location, **params): def _predict(self, image_features, num_predictions_per_location, **params):
...@@ -120,12 +120,14 @@ class BoxPredictor(object): ...@@ -120,12 +120,14 @@ class BoxPredictor(object):
Returns: Returns:
A dictionary containing at least the following tensors. A dictionary containing at least the following tensors.
box_encodings: A float tensor of shape box_encodings: A list of float tensors of shape
[batch_size, num_anchors, q, code_size] representing the location of [batch_size, num_anchors_i, q, code_size] representing the location of
the objects, where q is 1 or the number of classes. the objects, where q is 1 or the number of classes. Each entry in the
class_predictions_with_background: A float tensor of shape list corresponds to a feature map in the input `image_features` list.
[batch_size, num_anchors, num_classes + 1] representing the class class_predictions_with_background: A list of float tensors of shape
predictions for the proposals. [batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
""" """
pass pass
...@@ -133,7 +135,7 @@ class BoxPredictor(object): ...@@ -133,7 +135,7 @@ class BoxPredictor(object):
class RfcnBoxPredictor(BoxPredictor): class RfcnBoxPredictor(BoxPredictor):
"""RFCN Box Predictor. """RFCN Box Predictor.
Applies a position sensitve ROI pooling on position sensitive feature maps to Applies a position sensitive ROI pooling on position sensitive feature maps to
predict classes and refined locations. See https://arxiv.org/abs/1605.06409 predict classes and refined locations. See https://arxiv.org/abs/1605.06409
for details. for details.
...@@ -191,12 +193,14 @@ class RfcnBoxPredictor(BoxPredictor): ...@@ -191,12 +193,14 @@ class RfcnBoxPredictor(BoxPredictor):
box_code_size]. box_code_size].
Returns: Returns:
box_encodings: A float tensor of shape box_encodings: A list of float tensors of shape
[batch_size, num_anchors, num_classes, code_size] representing the [batch_size, num_anchors_i, q, code_size] representing the location of
location of the objects. the objects, where q is 1 or the number of classes. Each entry in the
class_predictions_with_background: A float tensor of shape list corresponds to a feature map in the input `image_features` list.
[batch_size, num_anchors, num_classes + 1] representing the class class_predictions_with_background: A list of float tensors of shape
predictions for the proposals. [batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
Raises: Raises:
ValueError: if num_predictions_per_location is not 1 or if ValueError: if num_predictions_per_location is not 1 or if
...@@ -266,11 +270,12 @@ class RfcnBoxPredictor(BoxPredictor): ...@@ -266,11 +270,12 @@ class RfcnBoxPredictor(BoxPredictor):
class_predictions_with_background, class_predictions_with_background,
[batch_size * num_boxes, 1, total_classes]) [batch_size * num_boxes, 1, total_classes])
return {BOX_ENCODINGS: box_encodings, return {BOX_ENCODINGS: [box_encodings],
CLASS_PREDICTIONS_WITH_BACKGROUND: CLASS_PREDICTIONS_WITH_BACKGROUND:
class_predictions_with_background} [class_predictions_with_background]}
# TODO(rathodv): Change the implementation to return lists of predictions.
class MaskRCNNBoxPredictor(BoxPredictor): class MaskRCNNBoxPredictor(BoxPredictor):
"""Mask R-CNN Box Predictor. """Mask R-CNN Box Predictor.
...@@ -644,18 +649,18 @@ class ConvolutionalBoxPredictor(BoxPredictor): ...@@ -644,18 +649,18 @@ class ConvolutionalBoxPredictor(BoxPredictor):
feature map. feature map.
Returns: Returns:
A dictionary containing the following tensors. box_encodings: A list of float tensors of shape
box_encodings: A float tensor of shape [batch_size, num_anchors, 1, [batch_size, num_anchors_i, q, code_size] representing the location of
code_size] representing the location of the objects, where the objects, where q is 1 or the number of classes. Each entry in the
num_anchors = feat_height * feat_width * num_predictions_per_location list corresponds to a feature map in the input `image_features` list.
class_predictions_with_background: A float tensor of shape class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors, num_classes + 1] representing the class [batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
""" """
box_encodings_list = [] box_encodings_list = []
class_predictions_list = [] class_predictions_list = []
# TODO: Come up with a better way to generate scope names # TODO(rathodv): Come up with a better way to generate scope names
# in box predictor once we have time to retrain all models in the zoo. # in box predictor once we have time to retrain all models in the zoo.
# The following lines create scope names to be backwards compatible with the # The following lines create scope names to be backwards compatible with the
# existing checkpoints. # existing checkpoints.
...@@ -741,12 +746,13 @@ class ConvolutionalBoxPredictor(BoxPredictor): ...@@ -741,12 +746,13 @@ class ConvolutionalBoxPredictor(BoxPredictor):
num_predictions_per_location, num_predictions_per_location,
num_class_slots])) num_class_slots]))
class_predictions_list.append(class_predictions_with_background) class_predictions_list.append(class_predictions_with_background)
return {BOX_ENCODINGS: tf.concat(box_encodings_list, axis=1), return {
CLASS_PREDICTIONS_WITH_BACKGROUND: BOX_ENCODINGS: box_encodings_list,
tf.concat(class_predictions_list, axis=1)} CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_list
}
# TODO: Merge the implementation with ConvolutionalBoxPredictor above # TODO(rathodv): Merge the implementation with ConvolutionalBoxPredictor above
# since they are very similar. # since they are very similar.
class WeightSharedConvolutionalBoxPredictor(BoxPredictor): class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
"""Convolutional Box Predictor with weight sharing. """Convolutional Box Predictor with weight sharing.
...@@ -806,13 +812,14 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -806,13 +812,14 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
shared. shared.
Returns: Returns:
A dictionary containing the following tensors. box_encodings: A list of float tensors of shape
box_encodings: A float tensor of shape [batch_size, num_anchors, 1, [batch_size, num_anchors_i, q, code_size] representing the location of
code_size] representing the location of the objects, where the objects, where q is 1 or the number of classes. Each entry in the
num_anchors = feat_height * feat_width * num_predictions_per_location list corresponds to a feature map in the input `image_features` list.
class_predictions_with_background: A float tensor of shape class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors, num_classes + 1] representing the class [batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
Raises: Raises:
ValueError: If the image feature maps do not have the same number of ValueError: If the image feature maps do not have the same number of
...@@ -890,6 +897,7 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor): ...@@ -890,6 +897,7 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
num_predictions_per_location, num_predictions_per_location,
num_class_slots])) num_class_slots]))
class_predictions_list.append(class_predictions_with_background) class_predictions_list.append(class_predictions_with_background)
return {BOX_ENCODINGS: tf.concat(box_encodings_list, axis=1), return {
CLASS_PREDICTIONS_WITH_BACKGROUND: BOX_ENCODINGS: box_encodings_list,
tf.concat(class_predictions_list, axis=1)} CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_list
}
...@@ -165,9 +165,11 @@ class RfcnBoxPredictorTest(tf.test.TestCase): ...@@ -165,9 +165,11 @@ class RfcnBoxPredictorTest(tf.test.TestCase):
[image_features], num_predictions_per_location=[1], [image_features], num_predictions_per_location=[1],
scope='BoxPredictor', scope='BoxPredictor',
proposal_boxes=proposal_boxes) proposal_boxes=proposal_boxes)
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] box_encodings = tf.concat(
class_predictions_with_background = box_predictions[ box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] class_predictions_with_background = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
init_op = tf.global_variables_initializer() init_op = tf.global_variables_initializer()
with self.test_session() as sess: with self.test_session() as sess:
...@@ -215,9 +217,11 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -215,9 +217,11 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
box_predictions = conv_box_predictor.predict( box_predictions = conv_box_predictor.predict(
[image_features], num_predictions_per_location=[5], [image_features], num_predictions_per_location=[5],
scope='BoxPredictor') scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] box_encodings = tf.concat(
objectness_predictions = box_predictions[ box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] objectness_predictions = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
return (box_encodings, objectness_predictions) return (box_encodings, objectness_predictions)
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
(box_encodings, objectness_predictions) = self.execute(graph_fn, (box_encodings, objectness_predictions) = self.execute(graph_fn,
...@@ -242,9 +246,10 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -242,9 +246,10 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
box_predictions = conv_box_predictor.predict( box_predictions = conv_box_predictor.predict(
[image_features], num_predictions_per_location=[1], [image_features], num_predictions_per_location=[1],
scope='BoxPredictor') scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] box_encodings = tf.concat(
objectness_predictions = box_predictions[ box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] objectness_predictions = tf.concat(box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
return (box_encodings, objectness_predictions) return (box_encodings, objectness_predictions)
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
(box_encodings, objectness_predictions) = self.execute(graph_fn, (box_encodings, objectness_predictions) = self.execute(graph_fn,
...@@ -273,9 +278,11 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -273,9 +278,11 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
[image_features], [image_features],
num_predictions_per_location=[5], num_predictions_per_location=[5],
scope='BoxPredictor') scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] box_encodings = tf.concat(
class_predictions_with_background = box_predictions[ box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] class_predictions_with_background = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
return (box_encodings, class_predictions_with_background) return (box_encodings, class_predictions_with_background)
(box_encodings, (box_encodings,
class_predictions_with_background) = self.execute(graph_fn, class_predictions_with_background) = self.execute(graph_fn,
...@@ -302,9 +309,11 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -302,9 +309,11 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
box_predictions = conv_box_predictor.predict( box_predictions = conv_box_predictor.predict(
[image_features], num_predictions_per_location=[5], [image_features], num_predictions_per_location=[5],
scope='BoxPredictor') scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] box_encodings = tf.concat(
objectness_predictions = box_predictions[ box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] objectness_predictions = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
init_op = tf.global_variables_initializer() init_op = tf.global_variables_initializer()
resolution = 32 resolution = 32
...@@ -348,9 +357,11 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -348,9 +357,11 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
box_predictions = conv_box_predictor.predict( box_predictions = conv_box_predictor.predict(
[image_features], num_predictions_per_location=[5], [image_features], num_predictions_per_location=[5],
scope='BoxPredictor') scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] box_encodings = tf.concat(
objectness_predictions = box_predictions[ box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] objectness_predictions = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
init_op = tf.global_variables_initializer() init_op = tf.global_variables_initializer()
resolution = 32 resolution = 32
...@@ -412,9 +423,10 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -412,9 +423,10 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
box_predictions = conv_box_predictor.predict( box_predictions = conv_box_predictor.predict(
[image_features], num_predictions_per_location=[5], [image_features], num_predictions_per_location=[5],
scope='BoxPredictor') scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] box_encodings = tf.concat(
objectness_predictions = box_predictions[ box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] objectness_predictions = tf.concat(box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
return (box_encodings, objectness_predictions) return (box_encodings, objectness_predictions)
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
(box_encodings, objectness_predictions) = self.execute( (box_encodings, objectness_predictions) = self.execute(
...@@ -438,9 +450,10 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -438,9 +450,10 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
[image_features], [image_features],
num_predictions_per_location=[5], num_predictions_per_location=[5],
scope='BoxPredictor') scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] box_encodings = tf.concat(
class_predictions_with_background = box_predictions[ box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] class_predictions_with_background = tf.concat(box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
return (box_encodings, class_predictions_with_background) return (box_encodings, class_predictions_with_background)
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
...@@ -466,9 +479,11 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -466,9 +479,11 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
[image_features1, image_features2], [image_features1, image_features2],
num_predictions_per_location=[5, 5], num_predictions_per_location=[5, 5],
scope='BoxPredictor') scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] box_encodings = tf.concat(
class_predictions_with_background = box_predictions[ box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] class_predictions_with_background = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
return (box_encodings, class_predictions_with_background) return (box_encodings, class_predictions_with_background)
image_features1 = np.random.rand(4, 8, 8, 64).astype(np.float32) image_features1 = np.random.rand(4, 8, 8, 64).astype(np.float32)
...@@ -493,9 +508,11 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -493,9 +508,11 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
[image_features1, image_features2], [image_features1, image_features2],
num_predictions_per_location=[5, 5], num_predictions_per_location=[5, 5],
scope='BoxPredictor') scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] box_encodings = tf.concat(
class_predictions_with_background = box_predictions[ box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] class_predictions_with_background = tf.concat(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1)
return (box_encodings, class_predictions_with_background) return (box_encodings, class_predictions_with_background)
with self.test_session(graph=tf.Graph()): with self.test_session(graph=tf.Graph()):
...@@ -543,9 +560,10 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): ...@@ -543,9 +560,10 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
box_predictions = conv_box_predictor.predict( box_predictions = conv_box_predictor.predict(
[image_features], num_predictions_per_location=[5], [image_features], num_predictions_per_location=[5],
scope='BoxPredictor') scope='BoxPredictor')
box_encodings = box_predictions[box_predictor.BOX_ENCODINGS] box_encodings = tf.concat(box_predictions[box_predictor.BOX_ENCODINGS],
objectness_predictions = box_predictions[ axis=1)
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND] objectness_predictions = tf.concat(box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
init_op = tf.global_variables_initializer() init_op = tf.global_variables_initializer()
resolution = 32 resolution = 32
......
...@@ -116,14 +116,23 @@ class WeightedL2LocalizationLoss(Loss): ...@@ -116,14 +116,23 @@ class WeightedL2LocalizationLoss(Loss):
class WeightedSmoothL1LocalizationLoss(Loss): class WeightedSmoothL1LocalizationLoss(Loss):
"""Smooth L1 localization loss function. """Smooth L1 localization loss function aka Huber Loss..
The smooth L1_loss is defined elementwise as .5 x^2 if |x|<1 and |x|-.5 The smooth L1_loss is defined elementwise as .5 x^2 if |x| <= delta and
otherwise, where x is the difference between predictions and target. 0.5 x^2 + delta * (|x|-delta) otherwise, where x is the difference between
predictions and target.
See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015) See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015)
""" """
def __init__(self, delta=1.0):
"""Constructor.
Args:
delta: delta for smooth L1 loss.
"""
self._delta = delta
def _compute_loss(self, prediction_tensor, target_tensor, weights): def _compute_loss(self, prediction_tensor, target_tensor, weights):
"""Compute loss function. """Compute loss function.
...@@ -138,13 +147,14 @@ class WeightedSmoothL1LocalizationLoss(Loss): ...@@ -138,13 +147,14 @@ class WeightedSmoothL1LocalizationLoss(Loss):
loss: a float tensor of shape [batch_size, num_anchors] tensor loss: a float tensor of shape [batch_size, num_anchors] tensor
representing the value of the loss function. representing the value of the loss function.
""" """
diff = prediction_tensor - target_tensor return tf.reduce_sum(tf.losses.huber_loss(
abs_diff = tf.abs(diff) target_tensor,
abs_diff_lt_1 = tf.less(abs_diff, 1) prediction_tensor,
anchorwise_smooth_l1norm = tf.reduce_sum( delta=self._delta,
tf.where(abs_diff_lt_1, 0.5 * tf.square(abs_diff), abs_diff - 0.5), weights=tf.expand_dims(weights, axis=2),
2) * weights loss_collection=None,
return anchorwise_smooth_l1norm reduction=tf.losses.Reduction.NONE
), axis=2)
class WeightedIOULocalizationLoss(Loss): class WeightedIOULocalizationLoss(Loss):
......
...@@ -545,7 +545,7 @@ class WeightedSoftmaxClassificationLossTest(tf.test.TestCase): ...@@ -545,7 +545,7 @@ class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
def testReturnsCorrectAnchorWiseLossWithHighLogitScaleSetting(self): def testReturnsCorrectAnchorWiseLossWithHighLogitScaleSetting(self):
"""At very high logit_scale, all predictions will be ~0.33.""" """At very high logit_scale, all predictions will be ~0.33."""
# TODO: Also test logit_scale with anchorwise=False. # TODO(yonib): Also test logit_scale with anchorwise=False.
logit_scale = 10e16 logit_scale = 10e16
prediction_tensor = tf.constant([[[-100, 100, -100], prediction_tensor = tf.constant([[[-100, 100, -100],
[100, -100, -100], [100, -100, -100],
......
...@@ -204,7 +204,7 @@ def _random_integer(minval, maxval, seed): ...@@ -204,7 +204,7 @@ def _random_integer(minval, maxval, seed):
[], minval=minval, maxval=maxval, dtype=tf.int32, seed=seed) [], minval=minval, maxval=maxval, dtype=tf.int32, seed=seed)
# TODO: This method is needed because the current # TODO(mttang): This method is needed because the current
# tf.image.rgb_to_grayscale method does not support quantization. Replace with # tf.image.rgb_to_grayscale method does not support quantization. Replace with
# tf.image.rgb_to_grayscale after quantization support is added. # tf.image.rgb_to_grayscale after quantization support is added.
def _rgb_to_grayscale(images, name=None): def _rgb_to_grayscale(images, name=None):
...@@ -2140,7 +2140,7 @@ def resize_to_range(image, ...@@ -2140,7 +2140,7 @@ def resize_to_range(image,
return result return result
# TODO: Make sure the static shapes are preserved. # TODO(alirezafathi): Make sure the static shapes are preserved.
def resize_to_min_dimension(image, masks=None, min_dimension=600): def resize_to_min_dimension(image, masks=None, min_dimension=600):
"""Resizes image and masks given the min size maintaining the aspect ratio. """Resizes image and masks given the min size maintaining the aspect ratio.
...@@ -2226,7 +2226,7 @@ def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None): ...@@ -2226,7 +2226,7 @@ def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
return tuple(result) return tuple(result)
# TODO: Investigate if instead the function should return None if # TODO(alirezafathi): Investigate if instead the function should return None if
# masks is None. # masks is None.
# pylint: disable=g-doc-return-or-yield # pylint: disable=g-doc-return-or-yield
def resize_image(image, def resize_image(image,
......
...@@ -324,7 +324,7 @@ class TargetAssigner(object): ...@@ -324,7 +324,7 @@ class TargetAssigner(object):
return self._box_coder return self._box_coder
# TODO: This method pulls in all the implementation dependencies into # TODO(rathodv): This method pulls in all the implementation dependencies into
# core. Therefore its best to have this factory method outside of core. # core. Therefore its best to have this factory method outside of core.
def create_target_assigner(reference, stage=None, def create_target_assigner(reference, stage=None,
negative_class_weight=1.0, negative_class_weight=1.0,
......
...@@ -836,7 +836,7 @@ class CreateTargetAssignerTest(tf.test.TestCase): ...@@ -836,7 +836,7 @@ class CreateTargetAssignerTest(tf.test.TestCase):
def test_create_target_assigner(self): def test_create_target_assigner(self):
"""Tests that named constructor gives working target assigners. """Tests that named constructor gives working target assigners.
TODO: Make this test more general. TODO(rathodv): Make this test more general.
""" """
corners = [[0.0, 0.0, 1.0, 1.0]] corners = [[0.0, 0.0, 1.0, 1.0]]
groundtruth = box_list.BoxList(tf.constant(corners)) groundtruth = box_list.BoxList(tf.constant(corners))
......
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
exports_files([
"pet_label_map.pbtxt",
])
# Tensorflow Object Detection API: data decoders.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "tf_example_decoder",
srcs = ["tf_example_decoder.py"],
deps = [
"//tensorflow",
"//tensorflow/models/research/object_detection/core:data_decoder",
"//tensorflow/models/research/object_detection/core:standard_fields",
"//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
"//tensorflow/models/research/object_detection/utils:label_map_util",
],
)
py_test(
name = "tf_example_decoder_test",
srcs = ["tf_example_decoder_test.py"],
deps = [
":tf_example_decoder",
"//tensorflow",
"//tensorflow/models/research/object_detection/core:standard_fields",
"//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
],
)
...@@ -20,6 +20,9 @@ protos for object detection. ...@@ -20,6 +20,9 @@ protos for object detection.
""" """
import tensorflow as tf import tensorflow as tf
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from object_detection.core import data_decoder from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.protos import input_reader_pb2 from object_detection.protos import input_reader_pb2
...@@ -28,6 +31,40 @@ from object_detection.utils import label_map_util ...@@ -28,6 +31,40 @@ from object_detection.utils import label_map_util
slim_example_decoder = tf.contrib.slim.tfexample_decoder slim_example_decoder = tf.contrib.slim.tfexample_decoder
class BackupHandler(slim_example_decoder.ItemHandler):
"""An ItemHandler that tries two ItemHandlers in order."""
def __init__(self, handler, backup):
"""Initializes the BackupHandler handler.
If the first Handler's tensors_to_item returns a Tensor with no elements,
the second Handler is used.
Args:
handler: The primary ItemHandler.
backup: The backup ItemHandler.
Raises:
ValueError: if either is not an ItemHandler.
"""
if not isinstance(handler, slim_example_decoder.ItemHandler):
raise ValueError('Primary handler is of type %s instead of ItemHandler' %
type(handler))
if not isinstance(backup, slim_example_decoder.ItemHandler):
raise ValueError(
'Backup handler is of type %s instead of ItemHandler' % type(backup))
self._handler = handler
self._backup = backup
super(BackupHandler, self).__init__(handler.keys + backup.keys)
def tensors_to_item(self, keys_to_tensors):
item = self._handler.tensors_to_item(keys_to_tensors)
return control_flow_ops.cond(
pred=math_ops.equal(math_ops.reduce_prod(array_ops.shape(item)), 0),
true_fn=lambda: self._backup.tensors_to_item(keys_to_tensors),
false_fn=lambda: item)
class TfExampleDecoder(data_decoder.DataDecoder): class TfExampleDecoder(data_decoder.DataDecoder):
"""Tensorflow Example proto decoder.""" """Tensorflow Example proto decoder."""
...@@ -101,13 +138,18 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -101,13 +138,18 @@ class TfExampleDecoder(data_decoder.DataDecoder):
'image/object/weight': 'image/object/weight':
tf.VarLenFeature(tf.float32), tf.VarLenFeature(tf.float32),
} }
if dct_method:
image = slim_example_decoder.Image(
image_key='image/encoded',
format_key='image/format',
channels=3,
dct_method=dct_method)
else:
image = slim_example_decoder.Image(
image_key='image/encoded', format_key='image/format', channels=3)
self.items_to_handlers = { self.items_to_handlers = {
fields.InputDataFields.image: fields.InputDataFields.image:
slim_example_decoder.Image( image,
image_key='image/encoded',
format_key='image/format',
channels=3,
dct_method=dct_method),
fields.InputDataFields.source_id: ( fields.InputDataFields.source_id: (
slim_example_decoder.Tensor('image/source_id')), slim_example_decoder.Tensor('image/source_id')),
fields.InputDataFields.key: ( fields.InputDataFields.key: (
...@@ -160,7 +202,11 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -160,7 +202,11 @@ class TfExampleDecoder(data_decoder.DataDecoder):
default_value=-1) default_value=-1)
# If the label_map_proto is provided, try to use it in conjunction with # If the label_map_proto is provided, try to use it in conjunction with
# the class text, and fall back to a materialized ID. # the class text, and fall back to a materialized ID.
label_handler = slim_example_decoder.BackupHandler( # TODO(lzc): note that here we are using BackupHandler defined in this
# file(which is branching slim_example_decoder.BackupHandler). Need to
# switch back to slim_example_decoder.BackupHandler once tf 1.5 becomes
# more popular.
label_handler = BackupHandler(
slim_example_decoder.LookupTensor( slim_example_decoder.LookupTensor(
'image/object/class/text', table, default_value=''), 'image/object/class/text', table, default_value=''),
slim_example_decoder.Tensor('image/object/class/label')) slim_example_decoder.Tensor('image/object/class/label'))
......
...@@ -19,10 +19,19 @@ import os ...@@ -19,10 +19,19 @@ import os
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from tensorflow.core.example import example_pb2
from tensorflow.core.example import feature_pb2
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import lookup_ops
from tensorflow.python.ops import parsing_ops
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.data_decoders import tf_example_decoder from object_detection.data_decoders import tf_example_decoder
from object_detection.protos import input_reader_pb2 from object_detection.protos import input_reader_pb2
slim_example_decoder = tf.contrib.slim.tfexample_decoder
class TfExampleDecoderTest(tf.test.TestCase): class TfExampleDecoderTest(tf.test.TestCase):
...@@ -57,6 +66,68 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -57,6 +66,68 @@ class TfExampleDecoderTest(tf.test.TestCase):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _Int64FeatureFromList(self, ndarray):
return feature_pb2.Feature(
int64_list=feature_pb2.Int64List(value=ndarray.flatten().tolist()))
def _BytesFeatureFromList(self, ndarray):
values = ndarray.flatten().tolist()
for i in range(len(values)):
values[i] = values[i].encode('utf-8')
return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=values))
def testDecodeExampleWithBranchedBackupHandler(self):
example1 = example_pb2.Example(
features=feature_pb2.Features(
feature={
'image/object/class/text':
self._BytesFeatureFromList(
np.array(['cat', 'dog', 'guinea pig'])),
'image/object/class/label':
self._Int64FeatureFromList(np.array([42, 10, 900]))
}))
example2 = example_pb2.Example(
features=feature_pb2.Features(
feature={
'image/object/class/text':
self._BytesFeatureFromList(
np.array(['cat', 'dog', 'guinea pig'])),
}))
example3 = example_pb2.Example(
features=feature_pb2.Features(
feature={
'image/object/class/label':
self._Int64FeatureFromList(np.array([42, 10, 901]))
}))
# 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
table = lookup_ops.index_table_from_tensor(
constant_op.constant(['dog', 'guinea pig', 'cat']))
keys_to_features = {
'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string),
'image/object/class/label': parsing_ops.VarLenFeature(dtypes.int64),
}
backup_handler = tf_example_decoder.BackupHandler(
handler=slim_example_decoder.Tensor('image/object/class/label'),
backup=slim_example_decoder.LookupTensor('image/object/class/text',
table))
items_to_handlers = {
'labels': backup_handler,
}
decoder = slim_example_decoder.TFExampleDecoder(keys_to_features,
items_to_handlers)
obtained_class_ids_each_example = []
with self.test_session() as sess:
sess.run(lookup_ops.tables_initializer())
for example in [example1, example2, example3]:
serialized_example = array_ops.reshape(
example.SerializeToString(), shape=[])
obtained_class_ids_each_example.append(
decoder.decode(serialized_example)[0].eval())
self.assertAllClose([42, 10, 900], obtained_class_ids_each_example[0])
self.assertAllClose([2, 0, 1], obtained_class_ids_each_example[1])
self.assertAllClose([42, 10, 901], obtained_class_ids_each_example[2])
def testDecodeJpegImage(self): def testDecodeJpegImage(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
......
# Tensorflow Object Detection API: dataset tools.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_binary(
name = "create_coco_tf_record",
srcs = [
"create_coco_tf_record.py",
],
deps = [
"//PIL:pil",
"//pycocotools",
"//tensorflow",
"//tensorflow/models/research/object_detection/utils:dataset_util",
"//tensorflow/models/research/object_detection/utils:label_map_util",
],
)
py_test(
name = "create_coco_tf_record_test",
srcs = [
"create_coco_tf_record_test.py",
],
deps = [
":create_coco_tf_record",
"//tensorflow",
],
)
py_binary(
name = "create_kitti_tf_record",
srcs = [
"create_kitti_tf_record.py",
],
deps = [
"//PIL:pil",
"//lxml",
"//tensorflow",
"//tensorflow/models/research/object_detection/utils:dataset_util",
"//tensorflow/models/research/object_detection/utils:label_map_util",
"//tensorflow/models/research/object_detection/utils:np_box_ops",
],
)
py_test(
name = "create_kitti_tf_record_test",
srcs = [
"create_kitti_tf_record_test.py",
],
deps = [
":create_kitti_tf_record",
"//tensorflow",
],
)
py_binary(
name = "create_pascal_tf_record",
srcs = [
"create_pascal_tf_record.py",
],
deps = [
"//PIL:pil",
"//lxml",
"//tensorflow",
"//tensorflow/models/research/object_detection/utils:dataset_util",
"//tensorflow/models/research/object_detection/utils:label_map_util",
],
)
py_test(
name = "create_pascal_tf_record_test",
srcs = [
"create_pascal_tf_record_test.py",
],
deps = [
":create_pascal_tf_record",
"//tensorflow",
],
)
py_binary(
name = "create_pet_tf_record",
srcs = [
"create_pet_tf_record.py",
],
deps = [
"//PIL:pil",
"//lxml",
"//tensorflow",
"//tensorflow/models/research/object_detection/utils:dataset_util",
"//tensorflow/models/research/object_detection/utils:label_map_util",
],
)
py_library(
name = "oid_tfrecord_creation",
srcs = ["oid_tfrecord_creation.py"],
deps = [
"//tensorflow",
"//tensorflow/models/research/object_detection/core:standard_fields",
"//tensorflow/models/research/object_detection/utils:dataset_util",
],
)
py_test(
name = "oid_tfrecord_creation_test",
srcs = ["oid_tfrecord_creation_test.py"],
deps = [
":oid_tfrecord_creation",
"//contextlib2",
"//pandas",
"//tensorflow",
],
)
py_binary(
name = "create_oid_tf_record",
srcs = ["create_oid_tf_record.py"],
deps = [
":oid_tfrecord_creation",
"//contextlib2",
"//pandas",
"//tensorflow",
"//tensorflow/models/research/object_detection/utils:label_map_util",
],
)
...@@ -120,7 +120,7 @@ def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use, ...@@ -120,7 +120,7 @@ def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use,
# Filter all bounding boxes of this frame that are of a legal class, and # Filter all bounding boxes of this frame that are of a legal class, and
# don't overlap with a dontcare region. # don't overlap with a dontcare region.
# TODO filter out targets that are truncated or heavily occluded. # TODO(talremez) filter out targets that are truncated or heavily occluded.
annotation_for_image = filter_annotations(img_anno, classes_to_use) annotation_for_image = filter_annotations(img_anno, classes_to_use)
example = prepare_example(image_path, annotation_for_image, label_map_dict) example = prepare_example(image_path, annotation_for_image, label_map_dict)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment